]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/pdns_recursor.cc
rec: Fix the cache cleaning code being only run once for workers
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25
26 #include <netdb.h>
27 #include <sys/stat.h>
28 #include <unistd.h>
29 #ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
30 #include <boost/container/flat_set.hpp>
31 #endif
32 #include "ws-recursor.hh"
33 #include <thread>
34 #include "threadname.hh"
35 #include "recpacketcache.hh"
36 #include "utility.hh"
37 #include "dns_random.hh"
38 #ifdef HAVE_LIBSODIUM
39 #include <sodium.h>
40 #endif
41 #include "opensslsigners.hh"
42 #include <iostream>
43 #include <errno.h>
44 #include <boost/static_assert.hpp>
45 #include <map>
46 #include <set>
47 #include "recursor_cache.hh"
48 #include "cachecleaner.hh"
49 #include <stdio.h>
50 #include <signal.h>
51 #include <stdlib.h>
52 #include "misc.hh"
53 #include "mtasker.hh"
54 #include <utility>
55 #include "arguments.hh"
56 #include "syncres.hh"
57 #include <fcntl.h>
58 #include <fstream>
59 #include "sortlist.hh"
60 #include "sstuff.hh"
61 #include <boost/tuple/tuple.hpp>
62 #include <boost/tuple/tuple_comparison.hpp>
63 #include <boost/shared_array.hpp>
64 #include <boost/function.hpp>
65 #include <boost/algorithm/string.hpp>
66 #ifdef MALLOC_TRACE
67 #include "malloctrace.hh"
68 #endif
69 #include <netinet/tcp.h>
70 #include "capabilities.hh"
71 #include "dnsparser.hh"
72 #include "dnswriter.hh"
73 #include "dnsrecords.hh"
74 #include "zoneparser-tng.hh"
75 #include "rec_channel.hh"
76 #include "logger.hh"
77 #include "iputils.hh"
78 #include "mplexer.hh"
79 #include "config.h"
80 #include "lua-recursor4.hh"
81 #include "version.hh"
82 #include "responsestats.hh"
83 #include "secpoll-recursor.hh"
84 #include "dnsname.hh"
85 #include "filterpo.hh"
86 #include "rpzloader.hh"
87 #include "validate-recursor.hh"
88 #include "rec-lua-conf.hh"
89 #include "ednsoptions.hh"
90 #include "gettime.hh"
91 #include "pubsuffix.hh"
92 #ifdef NOD_ENABLED
93 #include "nod.hh"
94 #endif /* NOD_ENABLED */
95
96 #include "rec-protobuf.hh"
97 #include "rec-snmp.hh"
98
99 #ifdef HAVE_SYSTEMD
100 #include <systemd/sd-daemon.h>
101 #endif
102
103 #include "namespaces.hh"
104
105 #ifdef HAVE_PROTOBUF
106 #include "uuid-utils.hh"
107 #endif
108
109 #include "xpf.hh"
110
111 typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
112
113 static thread_local std::shared_ptr<RecursorLua4> t_pdl;
114 static thread_local unsigned int t_id = 0;
115 static thread_local std::shared_ptr<Regex> t_traceRegex;
116 static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
117 #ifdef HAVE_PROTOBUF
118 static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_protobufServers{nullptr};
119 static thread_local uint64_t t_protobufServersGeneration;
120 static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_outgoingProtobufServers{nullptr};
121 static thread_local uint64_t t_outgoingProtobufServersGeneration;
122 #endif /* HAVE_PROTOBUF */
123
124 thread_local std::unique_ptr<MT_t> MT; // the big MTasker
125 thread_local std::unique_ptr<MemRecursorCache> t_RC;
126 thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
127 thread_local FDMultiplexer* t_fdm{nullptr};
128 thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes, t_bogusremotes;
129 thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring, t_bogusqueryring;
130 thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
131 #ifdef NOD_ENABLED
132 thread_local std::shared_ptr<nod::NODDB> t_nodDBp;
133 thread_local std::shared_ptr<nod::UniqueResponseDB> t_udrDBp;
134 #endif /* NOD_ENABLED */
135 __thread struct timeval g_now; // timestamp, updated (too) frequently
136
137 typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
138
139 // for communicating with our threads
140 // effectively readonly after startup
141 struct RecThreadInfo
142 {
143 struct ThreadPipeSet
144 {
145 int writeToThread{-1};
146 int readToThread{-1};
147 int writeFromThread{-1};
148 int readFromThread{-1};
149 int writeQueriesToThread{-1}; // this one is non-blocking
150 int readQueriesToThread{-1};
151 };
152
153 /* FD corresponding to TCP sockets this thread is listening
154 on.
155 These FDs are also in deferredAdds when we have one
156 socket per listener, and in g_deferredAdds instead. */
157 std::set<int> tcpSockets;
158 /* FD corresponding to listening sockets if we have one socket per
159 listener (with reuseport), otherwise all listeners share the
160 same FD and g_deferredAdds is then used instead */
161 deferredAdd_t deferredAdds;
162 struct ThreadPipeSet pipes;
163 std::thread thread;
164 MT_t* mt{nullptr};
165 uint64_t numberOfDistributedQueries{0};
166 /* handle the web server, carbon, statistics and the control channel */
167 bool isHandler{false};
168 /* accept incoming queries (and distributes them to the workers if pdns-distributes-queries is set) */
169 bool isListener{false};
170 /* process queries */
171 bool isWorker{false};
172 };
173
174 /* first we have the handler thread, t_id == 0 (some other
175 helper threads like SNMP might have t_id == 0 as well)
176 then the distributor threads if any
177 and finally the workers */
178 static std::vector<RecThreadInfo> s_threadInfos;
179 /* without reuseport, all listeners share the same sockets */
180 static deferredAdd_t g_deferredAdds;
181
182 typedef vector<int> tcpListenSockets_t;
183 typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
184
185 static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
186 static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
187 static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
188 static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
189 static AtomicCounter counter;
190 static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
191 static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
192 static NetmaskGroup g_XPFAcl;
193 static size_t g_tcpMaxQueriesPerConn;
194 static size_t s_maxUDPQueriesPerRound;
195 static uint64_t g_latencyStatSize;
196 static uint32_t g_disthashseed;
197 static unsigned int g_maxTCPPerClient;
198 static unsigned int g_maxMThreads;
199 static unsigned int g_numDistributorThreads;
200 static unsigned int g_numWorkerThreads;
201 static int g_tcpTimeout;
202 static uint16_t g_udpTruncationThreshold;
203 static uint16_t g_xpfRRCode{0};
204 static std::atomic<bool> statsWanted;
205 static std::atomic<bool> g_quiet;
206 static bool g_logCommonErrors;
207 static bool g_anyToTcp;
208 static bool g_weDistributeQueries; // if true, 1 or more threads listen on the incoming query sockets and distribute them to workers
209 static bool g_reusePort{false};
210 static bool g_gettagNeedsEDNSOptions{false};
211 static time_t g_statisticsInterval;
212 static bool g_useIncomingECS;
213 std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
214 #ifdef NOD_ENABLED
215 static bool g_nodEnabled;
216 static DNSName g_nodLookupDomain;
217 static bool g_nodLog;
218 static SuffixMatchNode g_nodDomainWL;
219 static std::string g_nod_pbtag;
220 static bool g_udrEnabled;
221 static bool g_udrLog;
222 static std::string g_udr_pbtag;
223 #endif /* NOD_ENABLED */
224 #ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
225 static boost::container::flat_set<uint16_t> s_avoidUdpSourcePorts;
226 #else
227 static std::set<uint16_t> s_avoidUdpSourcePorts;
228 #endif
229 static uint16_t s_minUdpSourcePort;
230 static uint16_t s_maxUdpSourcePort;
231 static double s_balancingFactor;
232
233 RecursorControlChannel s_rcc; // only active in the handler thread
234 RecursorStats g_stats;
235 string s_programname="pdns_recursor";
236 string s_pidfname;
237 bool g_lowercaseOutgoing;
238 unsigned int g_networkTimeoutMsec;
239 unsigned int g_numThreads;
240 uint16_t g_outgoingEDNSBufsize;
241 bool g_logRPZChanges{false};
242
243 #define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
244 #define LOCAL_NETS_INVERSE "!127.0.0.0/8, !10.0.0.0/8, !100.64.0.0/10, !169.254.0.0/16, !192.168.0.0/16, !172.16.0.0/12, !::1/128, !fc00::/7, !fe80::/10"
245 // Bad Nets taken from both:
246 // http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
247 // and
248 // http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
249 // where such a network may not be considered a valid destination
250 #define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
251 #define DONT_QUERY LOCAL_NETS ", " BAD_NETS
252
253 //! used to send information to a newborn mthread
254 struct DNSComboWriter {
255 DNSComboWriter(const std::string& query, const struct timeval& now): d_mdp(true, query), d_now(now), d_query(query)
256 {
257 }
258
259 DNSComboWriter(const std::string& query, const struct timeval& now, std::vector<std::string>&& policyTags, LuaContext::LuaObject&& data): d_mdp(true, query), d_now(now), d_query(query), d_policyTags(std::move(policyTags)), d_data(std::move(data))
260 {
261 }
262
263 void setRemote(const ComboAddress& sa)
264 {
265 d_remote=sa;
266 }
267
268 void setSource(const ComboAddress& sa)
269 {
270 d_source=sa;
271 }
272
273 void setLocal(const ComboAddress& sa)
274 {
275 d_local=sa;
276 }
277
278 void setDestination(const ComboAddress& sa)
279 {
280 d_destination=sa;
281 }
282
283 void setSocket(int sock)
284 {
285 d_socket=sock;
286 }
287
288 string getRemote() const
289 {
290 if (d_source == d_remote) {
291 return d_source.toStringWithPort();
292 }
293 return d_source.toStringWithPort() + " (proxied by " + d_remote.toStringWithPort() + ")";
294 }
295
296 MOADNSParser d_mdp;
297 struct timeval d_now;
298 /* Remote client, might differ from d_source
299 in case of XPF, in which case d_source holds
300 the IP of the client and d_remote of the proxy
301 */
302 ComboAddress d_remote;
303 ComboAddress d_source;
304 /* Destination address, might differ from
305 d_destination in case of XPF, in which case
306 d_destination holds the IP of the proxy and
307 d_local holds our own. */
308 ComboAddress d_local;
309 ComboAddress d_destination;
310 #ifdef HAVE_PROTOBUF
311 boost::uuids::uuid d_uuid;
312 string d_requestorId;
313 string d_deviceId;
314 #endif
315 std::string d_query;
316 std::vector<std::string> d_policyTags;
317 LuaContext::LuaObject d_data;
318 EDNSSubnetOpts d_ednssubnet;
319 shared_ptr<TCPConnection> d_tcpConnection;
320 int d_socket;
321 unsigned int d_tag{0};
322 uint32_t d_qhash{0};
323 uint32_t d_ttlCap{std::numeric_limits<uint32_t>::max()};
324 uint16_t d_ecsBegin{0};
325 uint16_t d_ecsEnd{0};
326 bool d_variable{false};
327 bool d_ecsFound{false};
328 bool d_ecsParsed{false};
329 bool d_tcp;
330 };
331
332 MT_t* getMT()
333 {
334 return MT ? MT.get() : nullptr;
335 }
336
337 ArgvMap &arg()
338 {
339 static ArgvMap theArg;
340 return theArg;
341 }
342
343 unsigned int getRecursorThreadId()
344 {
345 return t_id;
346 }
347
348 int getMTaskerTID()
349 {
350 return MT->getTid();
351 }
352
353 static bool isDistributorThread()
354 {
355 if (t_id == 0) {
356 return false;
357 }
358
359 return g_weDistributeQueries && s_threadInfos.at(t_id).isListener;
360 }
361
362 static bool isHandlerThread()
363 {
364 if (t_id == 0) {
365 return true;
366 }
367
368 return s_threadInfos.at(t_id).isHandler;
369 }
370
371 static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
372
373 // -1 is error, 0 is timeout, 1 is success
374 int asendtcp(const string& data, Socket* sock)
375 {
376 PacketID pident;
377 pident.sock=sock;
378 pident.outMSG=data;
379
380 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
381 string packet;
382
383 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
384
385 if(!ret || ret==-1) { // timeout
386 t_fdm->removeWriteFD(sock->getHandle());
387 }
388 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
389 return -1;
390 }
391 return ret;
392 }
393
394 static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
395
396 // -1 is error, 0 is timeout, 1 is success
397 int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
398 {
399 data.clear();
400 PacketID pident;
401 pident.sock=sock;
402 pident.inNeeded=len;
403 pident.inIncompleteOkay=incompleteOkay;
404 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
405
406 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
407 if(!ret || ret==-1) { // timeout
408 t_fdm->removeReadFD(sock->getHandle());
409 }
410 else if(data.empty()) {// error, EOF or other
411 return -1;
412 }
413
414 return ret;
415 }
416
417 static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
418 {
419 PacketID pident=*any_cast<PacketID>(&var);
420 char resp[512];
421 ComboAddress fromaddr;
422 socklen_t addrlen=sizeof(fromaddr);
423
424 ssize_t ret=recvfrom(fd, resp, sizeof(resp), 0, (sockaddr *)&fromaddr, &addrlen);
425 if (fromaddr != pident.remote) {
426 g_log<<Logger::Notice<<"Response received from the wrong remote host ("<<fromaddr.toStringWithPort()<<" instead of "<<pident.remote.toStringWithPort()<<"), discarding"<<endl;
427
428 }
429
430 t_fdm->removeReadFD(fd);
431 if(ret >= 0) {
432 string data(resp, (size_t) ret);
433 MT->sendEvent(pident, &data);
434 }
435 else {
436 string empty;
437 MT->sendEvent(pident, &empty);
438 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
439 }
440 }
441 string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
442 {
443 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
444 s.setNonBlocking();
445 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
446
447 s.bind(local);
448 s.connect(dest);
449 s.send(query);
450
451 PacketID pident;
452 pident.sock=&s;
453 pident.remote=dest;
454 pident.type=0;
455 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
456
457 string data;
458
459 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
460
461 if(!ret || ret==-1) { // timeout
462 t_fdm->removeReadFD(s.getHandle());
463 }
464 else if(data.empty()) {// error, EOF or other
465 // we could special case this
466 return data;
467 }
468 return data;
469 }
470
471 //! pick a random query local address
472 ComboAddress getQueryLocalAddress(int family, uint16_t port)
473 {
474 ComboAddress ret;
475 if(family==AF_INET) {
476 if(g_localQueryAddresses4.empty())
477 ret = g_local4;
478 else
479 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
480 ret.sin4.sin_port = htons(port);
481 }
482 else {
483 if(g_localQueryAddresses6.empty())
484 ret = g_local6;
485 else
486 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
487
488 ret.sin6.sin6_port = htons(port);
489 }
490 return ret;
491 }
492
493 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
494
495 static void setSocketBuffer(int fd, int optname, uint32_t size)
496 {
497 uint32_t psize=0;
498 socklen_t len=sizeof(psize);
499
500 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
501 g_log<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
502 return;
503 }
504
505 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
506 g_log<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
507 }
508
509
510 static void setSocketReceiveBuffer(int fd, uint32_t size)
511 {
512 setSocketBuffer(fd, SO_RCVBUF, size);
513 }
514
515 static void setSocketSendBuffer(int fd, uint32_t size)
516 {
517 setSocketBuffer(fd, SO_SNDBUF, size);
518 }
519
520
521 // you can ask this class for a UDP socket to send a query from
522 // this socket is not yours, don't even think about deleting it
523 // but after you call 'returnSocket' on it, don't assume anything anymore
524 class UDPClientSocks
525 {
526 unsigned int d_numsocks;
527 public:
528 UDPClientSocks() : d_numsocks(0)
529 {
530 }
531
532 typedef set<int> socks_t;
533 socks_t d_socks;
534
535 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
536 int getSocket(const ComboAddress& toaddr, int* fd)
537 {
538 *fd=makeClientSocket(toaddr.sin4.sin_family);
539 if(*fd < 0) // temporary error - receive exception otherwise
540 return -2;
541
542 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
543 int err = errno;
544 // returnSocket(*fd);
545 try {
546 closesocket(*fd);
547 }
548 catch(const PDNSException& e) {
549 g_log<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
550 }
551
552 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
553 return -2;
554 return -1;
555 }
556
557 d_socks.insert(*fd);
558 d_numsocks++;
559 return 0;
560 }
561
562 void returnSocket(int fd)
563 {
564 socks_t::iterator i=d_socks.find(fd);
565 if(i==d_socks.end()) {
566 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd)+") not in the pool");
567 }
568 returnSocketLocked(i);
569 }
570
571 // return a socket to the pool, or simply erase it
572 void returnSocketLocked(socks_t::iterator& i)
573 {
574 if(i==d_socks.end()) {
575 throw PDNSException("Trying to return a socket not in the pool");
576 }
577 try {
578 t_fdm->removeReadFD(*i);
579 }
580 catch(FDMultiplexerException& e) {
581 // we sometimes return a socket that has not yet been assigned to t_fdm
582 }
583 try {
584 closesocket(*i);
585 }
586 catch(const PDNSException& e) {
587 g_log<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
588 }
589
590 d_socks.erase(i++);
591 --d_numsocks;
592 }
593
594 // returns -1 for errors which might go away, throws for ones that won't
595 static int makeClientSocket(int family)
596 {
597 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
598
599 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
600 return ret;
601
602 if(ret<0)
603 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
604
605 // setCloseOnExec(ret); // we're not going to exec
606
607 int tries=10;
608 ComboAddress sin;
609 while(--tries) {
610 uint16_t port;
611
612 if(tries==1) // fall back to kernel 'random'
613 port = 0;
614 else {
615 do {
616 port = s_minUdpSourcePort + dns_random(s_maxUdpSourcePort - s_minUdpSourcePort + 1);
617 }
618 while (s_avoidUdpSourcePorts.count(port));
619 }
620
621 sin=getQueryLocalAddress(family, port); // does htons for us
622
623 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
624 break;
625 }
626 if(!tries)
627 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
628
629 setReceiveSocketErrors(ret, family);
630 setNonBlocking(ret);
631 return ret;
632 }
633 };
634
635 static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
636
637 /* these two functions are used by LWRes */
638 // -2 is OS error, -1 is error that depends on the remote, > 0 is success
639 int asendto(const char *data, size_t len, int flags,
640 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
641 {
642
643 PacketID pident;
644 pident.domain = domain;
645 pident.remote = toaddr;
646 pident.type = qtype;
647
648 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
649 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
650
651 for(; chain.first != chain.second; chain.first++) {
652 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
653 /*
654 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
655 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
656 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
657 */
658 chain.first->key.chain.insert(id); // we can chain
659 *fd=-1; // gets used in waitEvent / sendEvent later on
660 return 1;
661 }
662 }
663
664 int ret=t_udpclientsocks->getSocket(toaddr, fd);
665 if(ret < 0)
666 return ret;
667
668 pident.fd=*fd;
669 pident.id=id;
670
671 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
672 ret = send(*fd, data, len, 0);
673
674 int tmp = errno;
675
676 if(ret < 0)
677 t_udpclientsocks->returnSocket(*fd);
678
679 errno = tmp; // this is for logging purposes only
680 return ret;
681 }
682
683 // -1 is error, 0 is timeout, 1 is success
684 int arecvfrom(std::string& packet, int flags, const ComboAddress& fromaddr, size_t *d_len,
685 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
686 {
687 static optional<unsigned int> nearMissLimit;
688 if(!nearMissLimit)
689 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
690
691 PacketID pident;
692 pident.fd=fd;
693 pident.id=id;
694 pident.domain=domain;
695 pident.type = qtype;
696 pident.remote=fromaddr;
697
698 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
699
700 if(ret > 0) {
701 if(packet.empty()) // means "error"
702 return -1;
703
704 *d_len=packet.size();
705
706 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
707 g_log<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
708 g_stats.spoofCount++;
709 return -1;
710 }
711 }
712 else {
713 if(fd >= 0)
714 t_udpclientsocks->returnSocket(fd);
715 }
716 return ret;
717 }
718
719 static void writePid(void)
720 {
721 if(!::arg().mustDo("write-pid"))
722 return;
723 ofstream of(s_pidfname.c_str(), std::ios_base::app);
724 if(of)
725 of<< Utility::getpid() <<endl;
726 else
727 g_log<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
728 }
729
730 TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : data(2, 0), d_remote(addr), d_fd(fd)
731 {
732 ++s_currentConnections;
733 (*t_tcpClientCounts)[d_remote]++;
734 }
735
736 TCPConnection::~TCPConnection()
737 {
738 try {
739 if(closesocket(d_fd) < 0)
740 g_log<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
741 }
742 catch(const PDNSException& e) {
743 g_log<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
744 }
745
746 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
747 t_tcpClientCounts->erase(d_remote);
748 --s_currentConnections;
749 }
750
751 AtomicCounter TCPConnection::s_currentConnections;
752
753 static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
754
755 // the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
756 static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
757 {
758 if(packetsize > 1000 && t_largeanswerremotes)
759 t_largeanswerremotes->push_back(remote);
760 switch(res) {
761 case RCode::ServFail:
762 if(t_servfailremotes) {
763 t_servfailremotes->push_back(remote);
764 if(query && t_servfailqueryring) // packet cache
765 t_servfailqueryring->push_back(make_pair(*query, qtype));
766 }
767 g_stats.servFails++;
768 break;
769 case RCode::NXDomain:
770 g_stats.nxDomains++;
771 break;
772 case RCode::NoError:
773 g_stats.noErrors++;
774 break;
775 }
776 }
777
778 static string makeLoginfo(const std::unique_ptr<DNSComboWriter>& dc)
779 try
780 {
781 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->getRemote())+")";
782 }
783 catch(...)
784 {
785 return "Exception making error message for exception";
786 }
787
788 #ifdef HAVE_PROTOBUF
789 static void protobufLogQuery(uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId)
790 {
791 if (!t_protobufServers) {
792 return;
793 }
794
795 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
796 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
797 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
798 message.setServerIdentity(SyncRes::s_serverID);
799 message.setEDNSSubnet(ednssubnet, ednssubnet.isIpv4() ? maskV4 : maskV6);
800 message.setRequestorId(requestorId);
801 message.setDeviceId(deviceId);
802
803 if (!policyTags.empty()) {
804 message.setPolicyTags(policyTags);
805 }
806
807 // cerr <<message.toDebugString()<<endl;
808 std::string str;
809 message.serialize(str);
810
811 for (auto& server : *t_protobufServers) {
812 server->queueData(str);
813 }
814 }
815
816 static void protobufLogResponse(const RecProtoBufMessage& message)
817 {
818 if (!t_protobufServers) {
819 return;
820 }
821
822 // cerr <<message.toDebugString()<<endl;
823 std::string str;
824 message.serialize(str);
825
826 for (auto& server : *t_protobufServers) {
827 server->queueData(str);
828 }
829 }
830 #endif
831
832 /**
833 * Chases the CNAME provided by the PolicyCustom RPZ policy.
834 *
835 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
836 * @param qtype: The QType of the original query
837 * @param sr: A SyncRes
838 * @param res: An integer that will contain the RCODE of the lookup we do
839 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
840 */
841 static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
842 {
843 if (spoofed.d_type == QType::CNAME) {
844 bool oldWantsRPZ = sr.getWantsRPZ();
845 sr.setWantsRPZ(false);
846 vector<DNSRecord> ans;
847 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, QClass::IN, ans);
848 for (const auto& rec : ans) {
849 if(rec.d_place == DNSResourceRecord::ANSWER) {
850 ret.push_back(rec);
851 }
852 }
853 // Reset the RPZ state of the SyncRes
854 sr.setWantsRPZ(oldWantsRPZ);
855 }
856 }
857
858 static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, uint32_t ttlCap, const uint16_t maxAnswerSize)
859 {
860 pw.startRecord(rec.d_name, rec.d_type, (rec.d_ttl > ttlCap ? ttlCap : rec.d_ttl), rec.d_class, rec.d_place);
861
862 if(rec.d_type != QType::OPT) // their TTL ain't real
863 minTTL = min(minTTL, rec.d_ttl);
864
865 rec.d_content->toPacket(pw);
866 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
867 pw.rollback();
868 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
869 pw.getHeader()->tc=1;
870 pw.truncate();
871 }
872 return false;
873 }
874
875 return true;
876 }
877
878 #ifdef HAVE_PROTOBUF
879 static std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> startProtobufServers(const ProtobufExportConfig& config)
880 {
881 auto result = std::make_shared<std::vector<std::unique_ptr<RemoteLogger>>>();
882
883 for (const auto& server : config.servers) {
884 try {
885 result->emplace_back(new RemoteLogger(server, config.timeout, 100*config.maxQueuedEntries, config.reconnectWaitTime, config.asyncConnect));
886 }
887 catch(const std::exception& e) {
888 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.what()<<endl;
889 }
890 catch(const PDNSException& e) {
891 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.reason<<endl;
892 }
893 }
894
895 return result;
896 }
897
898 static bool checkProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
899 {
900 if (!luaconfsLocal->protobufExportConfig.enabled) {
901 if (t_protobufServers) {
902 for (auto& server : *t_protobufServers) {
903 server->stop();
904 }
905 t_protobufServers.reset();
906 }
907
908 return false;
909 }
910
911 /* if the server was not running, or if it was running according to a
912 previous configuration */
913 if (!t_protobufServers ||
914 t_protobufServersGeneration < luaconfsLocal->generation) {
915
916 if (t_protobufServers) {
917 for (auto& server : *t_protobufServers) {
918 server->stop();
919 }
920 }
921 t_protobufServers.reset();
922
923 t_protobufServers = startProtobufServers(luaconfsLocal->protobufExportConfig);
924 t_protobufServersGeneration = luaconfsLocal->generation;
925 }
926
927 return true;
928 }
929
930 static bool checkOutgoingProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
931 {
932 if (!luaconfsLocal->outgoingProtobufExportConfig.enabled) {
933 if (t_outgoingProtobufServers) {
934 for (auto& server : *t_outgoingProtobufServers) {
935 server->stop();
936 }
937 }
938 t_outgoingProtobufServers.reset();
939
940 return false;
941 }
942
943 /* if the server was not running, or if it was running according to a
944 previous configuration */
945 if (!t_outgoingProtobufServers ||
946 t_outgoingProtobufServersGeneration < luaconfsLocal->generation) {
947
948 if (t_outgoingProtobufServers) {
949 for (auto& server : *t_outgoingProtobufServers) {
950 server->stop();
951 }
952 }
953 t_outgoingProtobufServers.reset();
954
955 t_outgoingProtobufServers = startProtobufServers(luaconfsLocal->outgoingProtobufExportConfig);
956 t_outgoingProtobufServersGeneration = luaconfsLocal->generation;
957 }
958
959 return true;
960 }
961 #endif /* HAVE_PROTOBUF */
962
963 #ifdef NOD_ENABLED
964 static bool nodCheckNewDomain(const DNSName& dname)
965 {
966 static const QType qt(QType::A);
967 static const uint16_t qc(QClass::IN);
968 bool ret = false;
969 // First check the (sub)domain isn't whitelisted for NOD purposes
970 if (!g_nodDomainWL.check(dname)) {
971 // Now check the NODDB (note this is probablistic so can have FNs/FPs)
972 if (t_nodDBp && t_nodDBp->isNewDomain(dname)) {
973 if (g_nodLog) {
974 // This should probably log to a dedicated log file
975 g_log<<Logger::Notice<<"Newly observed domain nod="<<dname.toLogString()<<endl;
976 }
977 if (!(g_nodLookupDomain.isRoot())) {
978 // Send a DNS A query to <domain>.g_nodLookupDomain
979 DNSName qname = dname;
980 vector<DNSRecord> dummy;
981 qname += g_nodLookupDomain;
982 directResolve(qname, qt, qc, dummy);
983 }
984 ret = true;
985 }
986 }
987 return ret;
988 }
989
990 static void nodAddDomain(const DNSName& dname)
991 {
992 // Don't bother adding domains on the nod whitelist
993 if (!g_nodDomainWL.check(dname)) {
994 if (t_nodDBp) {
995 // This keeps the nod info up to date
996 t_nodDBp->addDomain(dname);
997 }
998 }
999 }
1000
1001 static bool udrCheckUniqueDNSRecord(const DNSName& dname, uint16_t qtype, const DNSRecord& record)
1002 {
1003 bool ret = false;
1004 if (record.d_place == DNSResourceRecord::ANSWER ||
1005 record.d_place == DNSResourceRecord::ADDITIONAL) {
1006 // Create a string that represent a triplet of (qname, qtype and RR[type, name, content])
1007 std::stringstream ss;
1008 ss << dname.toDNSStringLC() << ":" << qtype << ":" << qtype << ":" << record.d_type << ":" << record.d_name.toDNSStringLC() << ":" << record.d_content->getZoneRepresentation();
1009 if (t_udrDBp && t_udrDBp->isUniqueResponse(ss.str())) {
1010 if (g_udrLog) {
1011 // This should also probably log to a dedicated file.
1012 g_log<<Logger::Notice<<"Unique response observed: qname="<<dname.toLogString()<<" qtype="<<QType(qtype).getName()<< " rrtype=" << QType(record.d_type).getName() << " rrname=" << record.d_name.toLogString() << " rrcontent=" << record.d_content->getZoneRepresentation() << endl;
1013 }
1014 ret = true;
1015 }
1016 }
1017 return ret;
1018 }
1019 #endif /* NOD_ENABLED */
1020
1021 static void startDoResolve(void *p)
1022 {
1023 auto dc=std::unique_ptr<DNSComboWriter>(reinterpret_cast<DNSComboWriter*>(p));
1024 try {
1025 if (t_queryring)
1026 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
1027
1028 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
1029 EDNSOpts edo;
1030 std::vector<pair<uint16_t, string> > ednsOpts;
1031 bool variableAnswer = dc->d_variable;
1032 bool haveEDNS=false;
1033 #ifdef NOD_ENABLED
1034 bool hasUDR = false;
1035 #endif /* NOD_ENABLED */
1036 DNSPacketWriter::optvect_t returnedEdnsOptions; // Here we stuff all the options for the return packet
1037 uint8_t ednsExtRCode = 0;
1038 if(getEDNSOpts(dc->d_mdp, &edo)) {
1039 haveEDNS=true;
1040 if (edo.d_version != 0) {
1041 ednsExtRCode = ERCode::BADVERS;
1042 }
1043
1044 if(!dc->d_tcp) {
1045 /* rfc6891 6.2.3:
1046 "Values lower than 512 MUST be treated as equal to 512."
1047 */
1048 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
1049 }
1050 ednsOpts = edo.d_options;
1051 maxanswersize -= 11; // EDNS header size
1052
1053 for (const auto& o : edo.d_options) {
1054 if (o.first == EDNSOptionCode::ECS && g_useIncomingECS && !dc->d_ecsParsed) {
1055 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
1056 } else if (o.first == EDNSOptionCode::NSID) {
1057 const static string mode_server_id = ::arg()["server-id"];
1058 if(mode_server_id != "disabled" && !mode_server_id.empty() &&
1059 maxanswersize > (2 + 2 + mode_server_id.size())) {
1060 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::NSID, mode_server_id));
1061 variableAnswer = true; // Can't packetcache an answer with NSID
1062 // Option Code and Option Length are both 2
1063 maxanswersize -= 2 + 2 + mode_server_id.size();
1064 }
1065 }
1066 }
1067 }
1068 /* perhaps there was no EDNS or no ECS but by now we looked */
1069 dc->d_ecsParsed = true;
1070 vector<DNSRecord> ret;
1071 vector<uint8_t> packet;
1072
1073 auto luaconfsLocal = g_luaconfs.getLocal();
1074 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
1075 bool wantsRPZ(true);
1076 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
1077 bool logResponse = false;
1078 #ifdef HAVE_PROTOBUF
1079 if (checkProtobufExport(luaconfsLocal)) {
1080 logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
1081 Netmask requestorNM(dc->d_source, dc->d_source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1082 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
1083 pbMessage = RecProtoBufMessage(RecProtoBufMessage::Response, dc->d_uuid, &requestor, &dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass, dc->d_mdp.d_header.id, dc->d_tcp, 0);
1084 pbMessage->setServerIdentity(SyncRes::s_serverID);
1085 pbMessage->setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1086 }
1087 #endif /* HAVE_PROTOBUF */
1088
1089 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
1090
1091 pw.getHeader()->aa=0;
1092 pw.getHeader()->ra=1;
1093 pw.getHeader()->qr=1;
1094 pw.getHeader()->tc=0;
1095 pw.getHeader()->id=dc->d_mdp.d_header.id;
1096 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
1097 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
1098
1099 /* This is the lowest TTL seen in the records of the response,
1100 so we can't cache it for longer than this value.
1101 If we have a TTL cap, this value can't be larger than the
1102 cap no matter what. */
1103 uint32_t minTTL = dc->d_ttlCap;
1104
1105 SyncRes sr(dc->d_now);
1106
1107 bool DNSSECOK=false;
1108 if(t_pdl) {
1109 sr.setLuaEngine(t_pdl);
1110 }
1111 if(g_dnssecmode != DNSSECMode::Off) {
1112 sr.setDoDNSSEC(true);
1113
1114 // Does the requestor want DNSSEC records?
1115 if(edo.d_extFlags & EDNSOpts::DNSSECOK) {
1116 DNSSECOK=true;
1117 g_stats.dnssecQueries++;
1118 }
1119 if (dc->d_mdp.d_header.cd) {
1120 /* Per rfc6840 section 5.9, "When processing a request with
1121 the Checking Disabled (CD) bit set, a resolver SHOULD attempt
1122 to return all response data, even data that has failed DNSSEC
1123 validation. */
1124 ++g_stats.dnssecCheckDisabledQueries;
1125 }
1126 if (dc->d_mdp.d_header.ad) {
1127 /* Per rfc6840 section 5.7, "the AD bit in a query as a signal
1128 indicating that the requester understands and is interested in the
1129 value of the AD bit in the response. This allows a requester to
1130 indicate that it understands the AD bit without also requesting
1131 DNSSEC data via the DO bit. */
1132 ++g_stats.dnssecAuthenticDataQueries;
1133 }
1134 } else {
1135 // Ignore the client-set CD flag
1136 pw.getHeader()->cd=0;
1137 }
1138 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
1139
1140 #ifdef HAVE_PROTOBUF
1141 sr.setInitialRequestId(dc->d_uuid);
1142 sr.setOutgoingProtobufServers(t_outgoingProtobufServers);
1143 #endif
1144
1145 sr.setQuerySource(dc->d_remote, g_useIncomingECS && !dc->d_ednssubnet.source.empty() ? boost::optional<const EDNSSubnetOpts&>(dc->d_ednssubnet) : boost::none);
1146
1147 bool tracedQuery=false; // we could consider letting Lua know about this too
1148 bool shouldNotValidate = false;
1149
1150 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
1151 int res = RCode::NoError;
1152 DNSFilterEngine::Policy appliedPolicy;
1153 std::vector<DNSRecord> spoofed;
1154 RecursorLua4::DNSQuestion dq(dc->d_source, dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ, logResponse);
1155 dq.ednsFlags = &edo.d_extFlags;
1156 dq.ednsOptions = &ednsOpts;
1157 dq.tag = dc->d_tag;
1158 dq.discardedPolicies = &sr.d_discardedPolicies;
1159 dq.policyTags = &dc->d_policyTags;
1160 dq.appliedPolicy = &appliedPolicy;
1161 dq.currentRecords = &ret;
1162 dq.dh = &dc->d_mdp.d_header;
1163 dq.data = dc->d_data;
1164 #ifdef HAVE_PROTOBUF
1165 dq.requestorId = dc->d_requestorId;
1166 dq.deviceId = dc->d_deviceId;
1167 #endif
1168
1169 if(ednsExtRCode != 0) {
1170 goto sendit;
1171 }
1172
1173 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
1174 pw.getHeader()->tc = 1;
1175 res = 0;
1176 variableAnswer = true;
1177 goto sendit;
1178 }
1179
1180 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
1181 sr.setLogMode(SyncRes::Store);
1182 tracedQuery=true;
1183 }
1184
1185
1186 if(!g_quiet || tracedQuery) {
1187 g_log<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
1188 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
1189 if(!dc->d_ednssubnet.source.empty()) {
1190 g_log<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
1191 }
1192 g_log<<endl;
1193 }
1194
1195 sr.setId(MT->getTid());
1196 if(!dc->d_mdp.d_header.rd)
1197 sr.setCacheOnly();
1198
1199 if (t_pdl) {
1200 t_pdl->prerpz(dq, res);
1201 }
1202
1203 // Check if the query has a policy attached to it
1204 if (wantsRPZ) {
1205 appliedPolicy = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_source, sr.d_discardedPolicies);
1206 }
1207
1208 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
1209 if(!t_pdl || !t_pdl->preresolve(dq, res)) {
1210
1211 sr.setWantsRPZ(wantsRPZ);
1212 if(wantsRPZ) {
1213 switch(appliedPolicy.d_kind) {
1214 case DNSFilterEngine::PolicyKind::NoAction:
1215 break;
1216 case DNSFilterEngine::PolicyKind::Drop:
1217 g_stats.policyDrops++;
1218 g_stats.policyResults[appliedPolicy.d_kind]++;
1219 return;
1220 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1221 g_stats.policyResults[appliedPolicy.d_kind]++;
1222 res=RCode::NXDomain;
1223 goto haveAnswer;
1224 case DNSFilterEngine::PolicyKind::NODATA:
1225 g_stats.policyResults[appliedPolicy.d_kind]++;
1226 res=RCode::NoError;
1227 goto haveAnswer;
1228 case DNSFilterEngine::PolicyKind::Custom:
1229 g_stats.policyResults[appliedPolicy.d_kind]++;
1230 res=RCode::NoError;
1231 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1232 for (const auto& dr : spoofed) {
1233 ret.push_back(dr);
1234 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1235 }
1236 goto haveAnswer;
1237 case DNSFilterEngine::PolicyKind::Truncate:
1238 if(!dc->d_tcp) {
1239 g_stats.policyResults[appliedPolicy.d_kind]++;
1240 res=RCode::NoError;
1241 pw.getHeader()->tc=1;
1242 goto haveAnswer;
1243 }
1244 break;
1245 }
1246 }
1247
1248 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
1249 try {
1250 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
1251 shouldNotValidate = sr.wasOutOfBand();
1252 }
1253 catch(ImmediateServFailException &e) {
1254 if(g_logCommonErrors)
1255 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
1256 res = RCode::ServFail;
1257 }
1258
1259 dq.validationState = sr.getValidationState();
1260
1261 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
1262 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
1263 appliedPolicy = sr.d_appliedPolicy;
1264 g_stats.policyResults[appliedPolicy.d_kind]++;
1265 switch(appliedPolicy.d_kind) {
1266 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
1267 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
1268 case DNSFilterEngine::PolicyKind::Drop:
1269 g_stats.policyDrops++;
1270 return;
1271 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1272 ret.clear();
1273 res=RCode::NXDomain;
1274 goto haveAnswer;
1275
1276 case DNSFilterEngine::PolicyKind::NODATA:
1277 ret.clear();
1278 res=RCode::NoError;
1279 goto haveAnswer;
1280
1281 case DNSFilterEngine::PolicyKind::Truncate:
1282 if(!dc->d_tcp) {
1283 ret.clear();
1284 res=RCode::NoError;
1285 pw.getHeader()->tc=1;
1286 goto haveAnswer;
1287 }
1288 break;
1289
1290 case DNSFilterEngine::PolicyKind::Custom:
1291 ret.clear();
1292 res=RCode::NoError;
1293 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1294 for (const auto& dr : spoofed) {
1295 ret.push_back(dr);
1296 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1297 }
1298 goto haveAnswer;
1299 }
1300 }
1301
1302 if (wantsRPZ) {
1303 appliedPolicy = luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies);
1304 }
1305
1306 if(t_pdl) {
1307 if(res == RCode::NoError) {
1308 auto i=ret.cbegin();
1309 for(; i!= ret.cend(); ++i)
1310 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
1311 break;
1312 if(i == ret.cend() && t_pdl->nodata(dq, res))
1313 shouldNotValidate = true;
1314
1315 }
1316 else if(res == RCode::NXDomain && t_pdl->nxdomain(dq, res))
1317 shouldNotValidate = true;
1318
1319 if(t_pdl->postresolve(dq, res))
1320 shouldNotValidate = true;
1321 }
1322
1323 if (wantsRPZ) { //XXX This block is repeated, see above
1324 g_stats.policyResults[appliedPolicy.d_kind]++;
1325 switch(appliedPolicy.d_kind) {
1326 case DNSFilterEngine::PolicyKind::NoAction:
1327 break;
1328 case DNSFilterEngine::PolicyKind::Drop:
1329 g_stats.policyDrops++;
1330 return;
1331 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1332 ret.clear();
1333 res=RCode::NXDomain;
1334 goto haveAnswer;
1335
1336 case DNSFilterEngine::PolicyKind::NODATA:
1337 ret.clear();
1338 res=RCode::NoError;
1339 goto haveAnswer;
1340
1341 case DNSFilterEngine::PolicyKind::Truncate:
1342 if(!dc->d_tcp) {
1343 ret.clear();
1344 res=RCode::NoError;
1345 pw.getHeader()->tc=1;
1346 goto haveAnswer;
1347 }
1348 break;
1349
1350 case DNSFilterEngine::PolicyKind::Custom:
1351 ret.clear();
1352 res=RCode::NoError;
1353 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1354 for (const auto& dr : spoofed) {
1355 ret.push_back(dr);
1356 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1357 }
1358 goto haveAnswer;
1359 }
1360 }
1361 }
1362 haveAnswer:;
1363 if(res == PolicyDecision::DROP) {
1364 g_stats.policyDrops++;
1365 return;
1366 }
1367 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1368 {
1369 string trace(sr.getTrace());
1370 if(!trace.empty()) {
1371 vector<string> lines;
1372 boost::split(lines, trace, boost::is_any_of("\n"));
1373 for(const string& line : lines) {
1374 if(!line.empty())
1375 g_log<<Logger::Warning<< line << endl;
1376 }
1377 }
1378 }
1379
1380 if(res == -1) {
1381 pw.getHeader()->rcode=RCode::ServFail;
1382 // no commit here, because no record
1383 g_stats.servFails++;
1384 }
1385 else {
1386 pw.getHeader()->rcode=res;
1387
1388 // Does the validation mode or query demand validation?
1389 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1390 try {
1391 if(sr.doLog()) {
1392 g_log<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<endl;
1393 }
1394
1395 auto state = sr.getValidationState();
1396
1397 if(state == Secure) {
1398 if(sr.doLog()) {
1399 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates correctly"<<endl;
1400 }
1401
1402 // Is the query source interested in the value of the ad-bit?
1403 if (dc->d_mdp.d_header.ad || DNSSECOK)
1404 pw.getHeader()->ad=1;
1405 }
1406 else if(state == Insecure) {
1407 if(sr.doLog()) {
1408 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Insecure"<<endl;
1409 }
1410
1411 pw.getHeader()->ad=0;
1412 }
1413 else if(state == Bogus) {
1414 if(t_bogusremotes)
1415 t_bogusremotes->push_back(dc->d_source);
1416 if(t_bogusqueryring)
1417 t_bogusqueryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
1418 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
1419 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Bogus"<<endl;
1420 }
1421
1422 // Does the query or validation mode sending out a SERVFAIL on validation errors?
1423 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
1424 if(sr.doLog()) {
1425 g_log<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
1426 }
1427
1428 pw.getHeader()->rcode=RCode::ServFail;
1429 goto sendit;
1430 } else {
1431 if(sr.doLog()) {
1432 g_log<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
1433 }
1434 }
1435 }
1436 }
1437 catch(ImmediateServFailException &e) {
1438 if(g_logCommonErrors)
1439 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
1440 pw.getHeader()->rcode=RCode::ServFail;
1441 goto sendit;
1442 }
1443 }
1444
1445 if(ret.size()) {
1446 orderAndShuffle(ret);
1447 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_source)) {
1448 stable_sort(ret.begin(), ret.end(), *sl);
1449 variableAnswer=true;
1450 }
1451 }
1452
1453 bool needCommit = false;
1454 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
1455 if( ! DNSSECOK &&
1456 ( i->d_type == QType::NSEC3 ||
1457 (
1458 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1459 (
1460 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1461 i->d_place != DNSResourceRecord::ANSWER
1462 )
1463 )
1464 )
1465 ) {
1466 continue;
1467 }
1468
1469 if (!addRecordToPacket(pw, *i, minTTL, dc->d_ttlCap, maxanswersize)) {
1470 needCommit = false;
1471 break;
1472 }
1473 needCommit = true;
1474
1475 #ifdef NOD_ENABLED
1476 bool udr = false;
1477 if (g_udrEnabled) {
1478 udr = udrCheckUniqueDNSRecord(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, *i);
1479 if (!hasUDR && udr)
1480 hasUDR = true;
1481 }
1482 #endif /* NOD ENABLED */
1483
1484 #ifdef HAVE_PROTOBUF
1485 if (t_protobufServers) {
1486 #ifdef NOD_ENABLED
1487 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes, udr);
1488 #else
1489 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes);
1490 #endif /* NOD_ENABLED */
1491 }
1492 #endif
1493 }
1494 if(needCommit)
1495 pw.commit();
1496 }
1497 sendit:;
1498
1499 if(g_useIncomingECS && dc->d_ecsFound && !sr.wasVariable() && !variableAnswer) {
1500 // cerr<<"Stuffing in a 0 scope because answer is static"<<endl;
1501 EDNSSubnetOpts eo;
1502 eo.source = dc->d_ednssubnet.source;
1503 ComboAddress sa;
1504 sa.reset();
1505 sa.sin4.sin_family = eo.source.getNetwork().sin4.sin_family;
1506 eo.scope = Netmask(sa, 0);
1507
1508 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::ECS, makeEDNSSubnetOptsString(eo)));
1509 }
1510
1511 if (haveEDNS) {
1512 /* we try to add the EDNS OPT RR even for truncated answers,
1513 as rfc6891 states:
1514 "The minimal response MUST be the DNS header, question section, and an
1515 OPT record. This MUST also occur when a truncated response (using
1516 the DNS header's TC bit) is returned."
1517 */
1518 pw.addOpt(512, ednsExtRCode, DNSSECOK ? EDNSOpts::DNSSECOK : 0, returnedEdnsOptions);
1519 pw.commit();
1520 }
1521
1522 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
1523 updateResponseStats(res, dc->d_source, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1524 #ifdef NOD_ENABLED
1525 bool nod = false;
1526 if (g_nodEnabled) {
1527 if (nodCheckNewDomain(dc->d_mdp.d_qname))
1528 nod = true;
1529 }
1530 #endif /* NOD_ENABLED */
1531 #ifdef HAVE_PROTOBUF
1532 if (t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && (!appliedPolicy.d_name || appliedPolicy.d_name->empty()) && dc->d_policyTags.empty())) {
1533 pbMessage->setBytes(packet.size());
1534 pbMessage->setResponseCode(pw.getHeader()->rcode);
1535 if (appliedPolicy.d_name) {
1536 pbMessage->setAppliedPolicy(*appliedPolicy.d_name);
1537 pbMessage->setAppliedPolicyType(appliedPolicy.d_type);
1538 }
1539 pbMessage->setPolicyTags(dc->d_policyTags);
1540 pbMessage->setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1541 pbMessage->setRequestorId(dq.requestorId);
1542 pbMessage->setDeviceId(dq.deviceId);
1543 #ifdef NOD_ENABLED
1544 if (g_nodEnabled) {
1545 if (nod) {
1546 pbMessage->setNOD(true);
1547 pbMessage->addPolicyTag(g_nod_pbtag);
1548 }
1549 if (hasUDR) {
1550 pbMessage->addPolicyTag(g_udr_pbtag);
1551 }
1552 }
1553 #endif /* NOD_ENABLED */
1554 protobufLogResponse(*pbMessage);
1555 #ifdef NOD_ENABLED
1556 if (g_nodEnabled) {
1557 pbMessage->setNOD(false);
1558 pbMessage->clearUDR();
1559 if (nod)
1560 pbMessage->removePolicyTag(g_nod_pbtag);
1561 if (hasUDR)
1562 pbMessage->removePolicyTag(g_udr_pbtag);
1563 }
1564 #endif /* NOD_ENABLED */
1565 }
1566 #endif
1567 if(!dc->d_tcp) {
1568 struct msghdr msgh;
1569 struct iovec iov;
1570 char cbuf[256];
1571 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
1572 msgh.msg_control=NULL;
1573
1574 if(g_fromtosockets.count(dc->d_socket)) {
1575 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
1576 }
1577 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
1578 g_log<<Logger::Warning<<"Sending UDP reply to client "<<dc->getRemote()<<" failed with: "<<strerror(errno)<<endl;
1579
1580 if(variableAnswer || sr.wasVariable()) {
1581 g_stats.variableResponses++;
1582 }
1583 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
1584 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, std::move(dc->d_query), dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
1585 string((const char*)&*packet.begin(), packet.size()),
1586 g_now.tv_sec,
1587 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
1588 min(minTTL,SyncRes::s_packetcachettl),
1589 dq.validationState,
1590 dc->d_ecsBegin,
1591 dc->d_ecsEnd,
1592 std::move(pbMessage));
1593 }
1594 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
1595 }
1596 else {
1597 char buf[2];
1598 buf[0]=packet.size()/256;
1599 buf[1]=packet.size()%256;
1600
1601 Utility::iovec iov[2];
1602
1603 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1604 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
1605
1606 int wret=Utility::writev(dc->d_socket, iov, 2);
1607 bool hadError=true;
1608
1609 if(wret == 0)
1610 g_log<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
1611 else if(wret < 0 )
1612 g_log<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
1613 else if((unsigned int)wret != 2 + packet.size())
1614 g_log<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
1615 else
1616 hadError=false;
1617
1618 // update tcp connection status, either by closing or moving to 'BYTE0'
1619
1620 if(hadError) {
1621 // no need to remove us from FDM, we weren't there
1622 dc->d_socket = -1;
1623 }
1624 else {
1625 dc->d_tcpConnection->queriesCount++;
1626 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1627 dc->d_socket = -1;
1628 }
1629 else {
1630 dc->d_tcpConnection->state=TCPConnection::BYTE0;
1631 Utility::gettimeofday(&g_now, 0); // needs to be updated
1632 struct timeval ttd = g_now;
1633 ttd.tv_sec += g_tcpTimeout;
1634
1635 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection, &ttd);
1636 }
1637 }
1638 }
1639 float spent=makeFloat(sr.getNow()-dc->d_now);
1640 if(!g_quiet) {
1641 g_log<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1642 g_log<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
1643 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
1644 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
1645
1646 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1647 g_log<< ", dnssec="<<vStates[sr.getValidationState()];
1648 }
1649
1650 g_log<<endl;
1651
1652 }
1653
1654 if (sr.d_outqueries || sr.d_authzonequeries) {
1655 t_RC->cacheMisses++;
1656 }
1657 else {
1658 t_RC->cacheHits++;
1659 }
1660
1661 if(spent < 0.001)
1662 g_stats.answers0_1++;
1663 else if(spent < 0.010)
1664 g_stats.answers1_10++;
1665 else if(spent < 0.1)
1666 g_stats.answers10_100++;
1667 else if(spent < 1.0)
1668 g_stats.answers100_1000++;
1669 else
1670 g_stats.answersSlow++;
1671
1672 uint64_t newLat=(uint64_t)(spent*1000000);
1673 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
1674 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
1675 // no worries, we do this for packet cache hits elsewhere
1676
1677 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
1678 if(ourtime < 1)
1679 g_stats.ourtime0_1++;
1680 else if(ourtime < 2)
1681 g_stats.ourtime1_2++;
1682 else if(ourtime < 4)
1683 g_stats.ourtime2_4++;
1684 else if(ourtime < 8)
1685 g_stats.ourtime4_8++;
1686 else if(ourtime < 16)
1687 g_stats.ourtime8_16++;
1688 else if(ourtime < 32)
1689 g_stats.ourtime16_32++;
1690 else {
1691 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1692 g_stats.ourtimeSlow++;
1693 }
1694 if(ourtime >= 0.0) {
1695 newLat=ourtime*1000; // usec
1696 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
1697 }
1698 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
1699 }
1700 catch(PDNSException &ae) {
1701 g_log<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
1702 }
1703 catch(const MOADNSException &mde) {
1704 g_log<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<mde.what()<<endl;
1705 }
1706 catch(std::exception& e) {
1707 g_log<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
1708
1709 // Luawrapper nests the exception from Lua, so we unnest it here
1710 try {
1711 std::rethrow_if_nested(e);
1712 } catch(const std::exception& ne) {
1713 g_log<<". Extra info: "<<ne.what();
1714 } catch(...) {}
1715
1716 g_log<<endl;
1717 }
1718 catch(...) {
1719 g_log<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
1720 }
1721
1722 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
1723 }
1724
1725 static void makeControlChannelSocket(int processNum=-1)
1726 {
1727 string sockname=::arg()["socket-dir"]+"/"+s_programname;
1728 if(processNum >= 0)
1729 sockname += "."+std::to_string(processNum);
1730 sockname+=".controlsocket";
1731 s_rcc.listen(sockname);
1732
1733 int sockowner = -1;
1734 int sockgroup = -1;
1735
1736 if (!::arg().isEmpty("socket-group"))
1737 sockgroup=::arg().asGid("socket-group");
1738 if (!::arg().isEmpty("socket-owner"))
1739 sockowner=::arg().asUid("socket-owner");
1740
1741 if (sockgroup > -1 || sockowner > -1) {
1742 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1743 unixDie("Failed to chown control socket");
1744 }
1745 }
1746
1747 // do mode change if socket-mode is given
1748 if(!::arg().isEmpty("socket-mode")) {
1749 mode_t sockmode=::arg().asMode("socket-mode");
1750 if(chmod(sockname.c_str(), sockmode) < 0) {
1751 unixDie("Failed to chmod control socket");
1752 }
1753 }
1754 }
1755
1756 static void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass,
1757 bool& foundECS, EDNSSubnetOpts* ednssubnet, EDNSOptionViewMap* options,
1758 bool& foundXPF, ComboAddress* xpfSource, ComboAddress* xpfDest)
1759 {
1760 const bool lookForXPF = xpfSource != nullptr && g_xpfRRCode != 0;
1761 const bool lookForECS = ednssubnet != nullptr;
1762 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(question.c_str());
1763 size_t questionLen = question.length();
1764 unsigned int consumed=0;
1765 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1766
1767 size_t pos= sizeof(dnsheader)+consumed+4;
1768 const size_t headerSize = /* root */ 1 + sizeof(dnsrecordheader);
1769 const uint16_t arcount = ntohs(dh->arcount);
1770
1771 for (uint16_t arpos = 0; arpos < arcount && questionLen > (pos + headerSize) && ((lookForECS && !foundECS) || (lookForXPF && !foundXPF)); arpos++) {
1772 if (question.at(pos) != 0) {
1773 /* not an OPT or a XPF, bye. */
1774 return;
1775 }
1776
1777 pos += 1;
1778 const dnsrecordheader* drh = reinterpret_cast<const dnsrecordheader*>(&question.at(pos));
1779 pos += sizeof(dnsrecordheader);
1780
1781 if (pos >= questionLen) {
1782 return;
1783 }
1784
1785 /* OPT root label (1) followed by type (2) */
1786 if(lookForECS && ntohs(drh->d_type) == QType::OPT) {
1787 if (!options) {
1788 char* ecsStart = nullptr;
1789 size_t ecsLen = 0;
1790 /* we need to pass the record len */
1791 int res = getEDNSOption(const_cast<char*>(reinterpret_cast<const char*>(&question.at(pos - sizeof(drh->d_clen)))), questionLen - pos + sizeof(drh->d_clen), EDNSOptionCode::ECS, &ecsStart, &ecsLen);
1792 if (res == 0 && ecsLen > 4) {
1793 EDNSSubnetOpts eso;
1794 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1795 *ednssubnet=eso;
1796 foundECS = true;
1797 }
1798 }
1799 }
1800 else {
1801 /* we need to pass the record len */
1802 int res = getEDNSOptions(reinterpret_cast<const char*>(&question.at(pos -sizeof(drh->d_clen))), questionLen - pos + (sizeof(drh->d_clen)), *options);
1803 if (res == 0) {
1804 const auto& it = options->find(EDNSOptionCode::ECS);
1805 if (it != options->end() && !it->second.values.empty() && it->second.values.at(0).content != nullptr && it->second.values.at(0).size > 0) {
1806 EDNSSubnetOpts eso;
1807 if(getEDNSSubnetOptsFromString(it->second.values.at(0).content, it->second.values.at(0).size, &eso)) {
1808 *ednssubnet=eso;
1809 foundECS = true;
1810 }
1811 }
1812 }
1813 }
1814 }
1815 else if (lookForXPF && ntohs(drh->d_type) == g_xpfRRCode && ntohs(drh->d_class) == QClass::IN && drh->d_ttl == 0) {
1816 if ((questionLen - pos) < ntohs(drh->d_clen)) {
1817 return;
1818 }
1819
1820 foundXPF = parseXPFPayload(reinterpret_cast<const char*>(&question.at(pos)), ntohs(drh->d_clen), *xpfSource, xpfDest);
1821 }
1822
1823 pos += ntohs(drh->d_clen);
1824 }
1825 }
1826
1827 static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
1828 {
1829 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
1830
1831 if(conn->state==TCPConnection::BYTE0) {
1832 ssize_t bytes=recv(conn->getFD(), &conn->data[0], 2, 0);
1833 if(bytes==1)
1834 conn->state=TCPConnection::BYTE1;
1835 if(bytes==2) {
1836 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
1837 conn->data.resize(conn->qlen);
1838 conn->bytesread=0;
1839 conn->state=TCPConnection::GETQUESTION;
1840 }
1841 if(!bytes || bytes < 0) {
1842 t_fdm->removeReadFD(fd);
1843 return;
1844 }
1845 }
1846 else if(conn->state==TCPConnection::BYTE1) {
1847 ssize_t bytes=recv(conn->getFD(), &conn->data[1], 1, 0);
1848 if(bytes==1) {
1849 conn->state=TCPConnection::GETQUESTION;
1850 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
1851 conn->data.resize(conn->qlen);
1852 conn->bytesread=0;
1853 }
1854 if(!bytes || bytes < 0) {
1855 if(g_logCommonErrors)
1856 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected after first byte"<<endl;
1857 t_fdm->removeReadFD(fd);
1858 return;
1859 }
1860 }
1861 else if(conn->state==TCPConnection::GETQUESTION) {
1862 ssize_t bytes=recv(conn->getFD(), &conn->data[conn->bytesread], conn->qlen - conn->bytesread, 0);
1863 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
1864 if(g_logCommonErrors) {
1865 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected while reading question body"<<endl;
1866 }
1867 t_fdm->removeReadFD(fd);
1868 return;
1869 }
1870 conn->bytesread+=(uint16_t)bytes;
1871 if(conn->bytesread==conn->qlen) {
1872 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
1873
1874 std::unique_ptr<DNSComboWriter> dc;
1875 try {
1876 dc=std::unique_ptr<DNSComboWriter>(new DNSComboWriter(conn->data, g_now));
1877 }
1878 catch(const MOADNSException &mde) {
1879 g_stats.clientParseError++;
1880 if(g_logCommonErrors)
1881 g_log<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
1882 return;
1883 }
1884 dc->d_tcpConnection = conn; // carry the torch
1885 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
1886 dc->d_tcp=true;
1887 dc->setRemote(conn->d_remote);
1888 dc->setSource(conn->d_remote);
1889 ComboAddress dest;
1890 dest.reset();
1891 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1892 socklen_t len = dest.getSocklen();
1893 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1894 dc->setLocal(dest);
1895 dc->setDestination(dest);
1896 DNSName qname;
1897 uint16_t qtype=0;
1898 uint16_t qclass=0;
1899 bool needECS = false;
1900 bool needXPF = g_XPFAcl.match(conn->d_remote);
1901 string requestorId;
1902 string deviceId;
1903 bool logQuery = false;
1904 #ifdef HAVE_PROTOBUF
1905 auto luaconfsLocal = g_luaconfs.getLocal();
1906 if (checkProtobufExport(luaconfsLocal)) {
1907 needECS = true;
1908 }
1909 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
1910 #endif
1911
1912 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag_ffi || t_pdl->d_gettag))) {
1913
1914 try {
1915 EDNSOptionViewMap ednsOptions;
1916 bool xpfFound = false;
1917 dc->d_ecsParsed = true;
1918 dc->d_ecsFound = false;
1919 getQNameAndSubnet(conn->data, &qname, &qtype, &qclass,
1920 dc->d_ecsFound, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
1921 xpfFound, needXPF ? &dc->d_source : nullptr, needXPF ? &dc->d_destination : nullptr);
1922
1923 if(t_pdl) {
1924 try {
1925 if (t_pdl->d_gettag_ffi) {
1926 dc->d_tag = t_pdl->gettag_ffi(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, dc->d_ttlCap, dc->d_variable, logQuery);
1927 }
1928 else if (t_pdl->d_gettag) {
1929 dc->d_tag = t_pdl->gettag(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId);
1930 }
1931 }
1932 catch(const std::exception& e) {
1933 if(g_logCommonErrors)
1934 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
1935 }
1936 }
1937 }
1938 catch(const std::exception& e)
1939 {
1940 if(g_logCommonErrors)
1941 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
1942 }
1943 }
1944
1945 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(&conn->data[0]);
1946
1947 #ifdef HAVE_PROTOBUF
1948 if(t_protobufServers || t_outgoingProtobufServers) {
1949 dc->d_requestorId = requestorId;
1950 dc->d_deviceId = deviceId;
1951 dc->d_uuid = getUniqueID();
1952 }
1953
1954 if(t_protobufServers) {
1955 try {
1956
1957 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && dc->d_policyTags.empty())) {
1958 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, dc->d_source, dc->d_destination, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId);
1959 }
1960 }
1961 catch(std::exception& e) {
1962 if(g_logCommonErrors)
1963 g_log<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
1964 }
1965 }
1966 #endif
1967 if(t_pdl) {
1968 if(t_pdl->ipfilter(dc->d_source, dc->d_destination, *dh)) {
1969 if(!g_quiet)
1970 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED TCP question from "<<dc->d_source.toStringWithPort()<<(dc->d_source != dc->d_remote ? " (via "+dc->d_remote.toStringWithPort()+")" : "")<<" based on policy"<<endl;
1971 g_stats.policyDrops++;
1972 return;
1973 }
1974 }
1975
1976 if(dc->d_mdp.d_header.qr) {
1977 g_stats.ignoredCount++;
1978 if(g_logCommonErrors) {
1979 g_log<<Logger::Error<<"Ignoring answer from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
1980 }
1981 return;
1982 }
1983 if(dc->d_mdp.d_header.opcode) {
1984 g_stats.ignoredCount++;
1985 if(g_logCommonErrors) {
1986 g_log<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
1987 }
1988 return;
1989 }
1990 else if (dh->qdcount == 0) {
1991 g_stats.emptyQueriesCount++;
1992 if(g_logCommonErrors) {
1993 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<< dc->getRemote() <<" on server socket!"<<endl;
1994 }
1995 return;
1996 }
1997 else {
1998 ++g_stats.qcounter;
1999 ++g_stats.tcpqcounter;
2000 MT->makeThread(startDoResolve, dc.release()); // deletes dc, will set state to BYTE0 again
2001 return;
2002 }
2003 }
2004 }
2005 }
2006
2007 //! Handle new incoming TCP connection
2008 static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
2009 {
2010 ComboAddress addr;
2011 socklen_t addrlen=sizeof(addr);
2012 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
2013 if(newsock>=0) {
2014 if(MT->numProcesses() > g_maxMThreads) {
2015 g_stats.overCapacityDrops++;
2016 try {
2017 closesocket(newsock);
2018 }
2019 catch(const PDNSException& e) {
2020 g_log<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
2021 }
2022 return;
2023 }
2024
2025 if(t_remotes)
2026 t_remotes->push_back(addr);
2027 if(t_allowFrom && !t_allowFrom->match(&addr)) {
2028 if(!g_quiet)
2029 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2030
2031 g_stats.unauthorizedTCP++;
2032 try {
2033 closesocket(newsock);
2034 }
2035 catch(const PDNSException& e) {
2036 g_log<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
2037 }
2038 return;
2039 }
2040 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
2041 g_stats.tcpClientOverflow++;
2042 try {
2043 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
2044 }
2045 catch(const PDNSException& e) {
2046 g_log<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
2047 }
2048 return;
2049 }
2050
2051 setNonBlocking(newsock);
2052 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
2053 tc->state=TCPConnection::BYTE0;
2054
2055 struct timeval ttd;
2056 Utility::gettimeofday(&ttd, 0);
2057 ttd.tv_sec += g_tcpTimeout;
2058
2059 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc, &ttd);
2060 }
2061 }
2062
2063 static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
2064 {
2065 gettimeofday(&g_now, 0);
2066 struct timeval diff = g_now - tv;
2067 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
2068
2069 if(tv.tv_sec && delta > 1000.0) {
2070 g_stats.tooOldDrops++;
2071 return 0;
2072 }
2073
2074 ++g_stats.qcounter;
2075 if(fromaddr.sin4.sin_family==AF_INET6)
2076 g_stats.ipv6qcounter++;
2077
2078 string response;
2079 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
2080 unsigned int ctag=0;
2081 uint32_t qhash = 0;
2082 bool needECS = false;
2083 bool needXPF = g_XPFAcl.match(fromaddr);
2084 std::vector<std::string> policyTags;
2085 LuaContext::LuaObject data;
2086 ComboAddress source = fromaddr;
2087 ComboAddress destination = destaddr;
2088 string requestorId;
2089 string deviceId;
2090 bool logQuery = false;
2091 #ifdef HAVE_PROTOBUF
2092 boost::uuids::uuid uniqueId;
2093 auto luaconfsLocal = g_luaconfs.getLocal();
2094 if (checkProtobufExport(luaconfsLocal)) {
2095 uniqueId = getUniqueID();
2096 needECS = true;
2097 } else if (checkOutgoingProtobufExport(luaconfsLocal)) {
2098 uniqueId = getUniqueID();
2099 }
2100 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
2101 bool logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
2102 #endif
2103 EDNSSubnetOpts ednssubnet;
2104 bool ecsFound = false;
2105 bool ecsParsed = false;
2106 uint16_t ecsBegin = 0;
2107 uint16_t ecsEnd = 0;
2108 uint32_t ttlCap = std::numeric_limits<uint32_t>::max();
2109 bool variable = false;
2110 try {
2111 DNSName qname;
2112 uint16_t qtype=0;
2113 uint16_t qclass=0;
2114 uint32_t age;
2115 bool qnameParsed=false;
2116 #ifdef MALLOC_TRACE
2117 /*
2118 static uint64_t last=0;
2119 if(!last)
2120 g_mtracer->clearAllocators();
2121 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
2122 last=g_mtracer->getAllocs();
2123 cout<<g_mtracer->topAllocatorsString()<<endl;
2124 g_mtracer->clearAllocators();
2125 */
2126 #endif
2127
2128 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag || t_pdl->d_gettag_ffi))) {
2129 try {
2130 EDNSOptionViewMap ednsOptions;
2131 bool xpfFound = false;
2132
2133 ecsFound = false;
2134
2135 getQNameAndSubnet(question, &qname, &qtype, &qclass,
2136 ecsFound, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2137 xpfFound, needXPF ? &source : nullptr, needXPF ? &destination : nullptr);
2138
2139 qnameParsed = true;
2140 ecsParsed = true;
2141
2142 if(t_pdl) {
2143 try {
2144 if (t_pdl->d_gettag_ffi) {
2145 ctag = t_pdl->gettag_ffi(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, ttlCap, variable, logQuery);
2146 }
2147 else if (t_pdl->d_gettag) {
2148 ctag = t_pdl->gettag(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId);
2149 }
2150 }
2151 catch(const std::exception& e) {
2152 if(g_logCommonErrors)
2153 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
2154 }
2155 }
2156 }
2157 catch(const std::exception& e)
2158 {
2159 if(g_logCommonErrors)
2160 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
2161 }
2162 }
2163
2164 bool cacheHit = false;
2165 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
2166 #ifdef HAVE_PROTOBUF
2167 if (t_protobufServers) {
2168 pbMessage = RecProtoBufMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
2169 pbMessage->setServerIdentity(SyncRes::s_serverID);
2170 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && policyTags.empty())) {
2171 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, source, destination, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId);
2172 }
2173 }
2174 #endif /* HAVE_PROTOBUF */
2175
2176 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
2177 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
2178 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
2179 vState valState;
2180 if (qnameParsed) {
2181 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
2182 }
2183 else {
2184 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, &qtype, &qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
2185 }
2186
2187 if (cacheHit) {
2188 if(valState == Bogus) {
2189 if(t_bogusremotes)
2190 t_bogusremotes->push_back(source);
2191 if(t_bogusqueryring)
2192 t_bogusqueryring->push_back(make_pair(qname, qtype));
2193 }
2194
2195 #ifdef HAVE_PROTOBUF
2196 if(t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && pbMessage->getAppliedPolicy().empty() && pbMessage->getPolicyTags().empty())) {
2197 Netmask requestorNM(source, source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
2198 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
2199 pbMessage->update(uniqueId, &requestor, &destination, false, dh->id);
2200 pbMessage->setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
2201 pbMessage->setQueryTime(g_now.tv_sec, g_now.tv_usec);
2202 pbMessage->setRequestorId(requestorId);
2203 pbMessage->setDeviceId(deviceId);
2204 protobufLogResponse(*pbMessage);
2205 }
2206 #endif /* HAVE_PROTOBUF */
2207 if(!g_quiet)
2208 g_log<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<endl;
2209
2210 g_stats.packetCacheHits++;
2211 SyncRes::s_queries++;
2212 ageDNSPacket(response, age);
2213 struct msghdr msgh;
2214 struct iovec iov;
2215 char cbuf[256];
2216 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
2217 msgh.msg_control=NULL;
2218
2219 if(g_fromtosockets.count(fd)) {
2220 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
2221 }
2222 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
2223 g_log<<Logger::Warning<<"Sending UDP reply to client "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" failed with: "<<strerror(errno)<<endl;
2224
2225 if(response.length() >= sizeof(struct dnsheader)) {
2226 struct dnsheader tmpdh;
2227 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
2228 updateResponseStats(tmpdh.rcode, source, response.length(), 0, 0);
2229 }
2230 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
2231 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
2232 return 0;
2233 }
2234 }
2235 catch(std::exception& e) {
2236 g_log<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
2237 return 0;
2238 }
2239
2240 if(t_pdl) {
2241 if(t_pdl->ipfilter(source, destination, *dh)) {
2242 if(!g_quiet)
2243 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" based on policy"<<endl;
2244 g_stats.policyDrops++;
2245 return 0;
2246 }
2247 }
2248
2249 if(MT->numProcesses() > g_maxMThreads) {
2250 if(!g_quiet)
2251 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<", over capacity"<<endl;
2252
2253 g_stats.overCapacityDrops++;
2254 return 0;
2255 }
2256
2257 auto dc = std::unique_ptr<DNSComboWriter>(new DNSComboWriter(question, g_now, std::move(policyTags), std::move(data)));
2258 dc->setSocket(fd);
2259 dc->d_tag=ctag;
2260 dc->d_qhash=qhash;
2261 dc->setRemote(fromaddr);
2262 dc->setSource(source);
2263 dc->setLocal(destaddr);
2264 dc->setDestination(destination);
2265 dc->d_tcp=false;
2266 dc->d_ecsFound = ecsFound;
2267 dc->d_ecsParsed = ecsParsed;
2268 dc->d_ecsBegin = ecsBegin;
2269 dc->d_ecsEnd = ecsEnd;
2270 dc->d_ednssubnet = ednssubnet;
2271 dc->d_ttlCap = ttlCap;
2272 dc->d_variable = variable;
2273 #ifdef HAVE_PROTOBUF
2274 if (t_protobufServers || t_outgoingProtobufServers) {
2275 dc->d_uuid = std::move(uniqueId);
2276 }
2277 dc->d_requestorId = requestorId;
2278 dc->d_deviceId = deviceId;
2279 #endif
2280
2281 MT->makeThread(startDoResolve, (void*) dc.release()); // deletes dc
2282 return 0;
2283 }
2284
2285
2286 static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
2287 {
2288 ssize_t len;
2289 static const size_t maxIncomingQuerySize = 512;
2290 static thread_local std::string data;
2291 ComboAddress fromaddr;
2292 struct msghdr msgh;
2293 struct iovec iov;
2294 char cbuf[256];
2295 bool firstQuery = true;
2296
2297 for(size_t queriesCounter = 0; queriesCounter < s_maxUDPQueriesPerRound; queriesCounter++) {
2298 data.resize(maxIncomingQuerySize);
2299 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
2300 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), &data[0], data.size(), &fromaddr);
2301
2302 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
2303
2304 firstQuery = false;
2305
2306 if (static_cast<size_t>(len) < sizeof(dnsheader)) {
2307 g_stats.ignoredCount++;
2308 if (!g_quiet) {
2309 g_log<<Logger::Error<<"Ignoring too-short ("<<std::to_string(len)<<") query from "<<fromaddr.toString()<<endl;
2310 }
2311 return;
2312 }
2313
2314 if (msgh.msg_flags & MSG_TRUNC) {
2315 g_stats.truncatedDrops++;
2316 if (!g_quiet) {
2317 g_log<<Logger::Error<<"Ignoring truncated query from "<<fromaddr.toString()<<endl;
2318 }
2319 return;
2320 }
2321
2322 if(t_remotes) {
2323 t_remotes->push_back(fromaddr);
2324 }
2325
2326 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
2327 if(!g_quiet) {
2328 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2329 }
2330
2331 g_stats.unauthorizedUDP++;
2332 return;
2333 }
2334 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
2335 if(!fromaddr.sin4.sin_port) { // also works for IPv6
2336 if(!g_quiet) {
2337 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
2338 }
2339
2340 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
2341 return;
2342 }
2343
2344 try {
2345 data.resize(static_cast<size_t>(len));
2346 dnsheader* dh=(dnsheader*)&data[0];
2347
2348 if(dh->qr) {
2349 g_stats.ignoredCount++;
2350 if(g_logCommonErrors) {
2351 g_log<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
2352 }
2353 }
2354 else if(dh->opcode) {
2355 g_stats.ignoredCount++;
2356 if(g_logCommonErrors) {
2357 g_log<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
2358 }
2359 }
2360 else if (dh->qdcount == 0) {
2361 g_stats.emptyQueriesCount++;
2362 if(g_logCommonErrors) {
2363 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<<fromaddr.toString()<<" on server socket!"<<endl;
2364 }
2365 }
2366 else {
2367 struct timeval tv={0,0};
2368 HarvestTimestamp(&msgh, &tv);
2369 ComboAddress dest;
2370 dest.reset(); // this makes sure we ignore this address if not returned by recvmsg above
2371 auto loc = rplookup(g_listenSocketsAddresses, fd);
2372 if(HarvestDestinationAddress(&msgh, &dest)) {
2373 // but.. need to get port too
2374 if(loc) {
2375 dest.sin4.sin_port = loc->sin4.sin_port;
2376 }
2377 }
2378 else {
2379 if(loc) {
2380 dest = *loc;
2381 }
2382 else {
2383 dest.sin4.sin_family = fromaddr.sin4.sin_family;
2384 socklen_t slen = dest.getSocklen();
2385 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
2386 }
2387 }
2388
2389 if(g_weDistributeQueries) {
2390 distributeAsyncFunction(data, boost::bind(doProcessUDPQuestion, data, fromaddr, dest, tv, fd));
2391 }
2392 else {
2393 ++s_threadInfos[t_id].numberOfDistributedQueries;
2394 doProcessUDPQuestion(data, fromaddr, dest, tv, fd);
2395 }
2396 }
2397 }
2398 catch(const MOADNSException &mde) {
2399 g_stats.clientParseError++;
2400 if(g_logCommonErrors) {
2401 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
2402 }
2403 }
2404 catch(const std::runtime_error& e) {
2405 g_stats.clientParseError++;
2406 if(g_logCommonErrors) {
2407 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
2408 }
2409 }
2410 }
2411 else {
2412 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
2413 if(firstQuery && errno == EAGAIN) {
2414 g_stats.noPacketError++;
2415 }
2416
2417 break;
2418 }
2419 }
2420 }
2421
2422 static void makeTCPServerSockets(deferredAdd_t& deferredAdds, std::set<int>& tcpSockets)
2423 {
2424 int fd;
2425 vector<string>locals;
2426 stringtok(locals,::arg()["local-address"]," ,");
2427
2428 if(locals.empty())
2429 throw PDNSException("No local address specified");
2430
2431 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
2432 ServiceTuple st;
2433 st.port=::arg().asNum("local-port");
2434 parseService(*i, st);
2435
2436 ComboAddress sin;
2437
2438 sin.reset();
2439 sin.sin4.sin_family = AF_INET;
2440 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
2441 sin.sin6.sin6_family = AF_INET6;
2442 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
2443 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
2444 }
2445
2446 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
2447 if(fd<0)
2448 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
2449
2450 setCloseOnExec(fd);
2451
2452 int tmp=1;
2453 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
2454 g_log<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
2455 exit(1);
2456 }
2457 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
2458 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
2459 }
2460
2461 #ifdef TCP_DEFER_ACCEPT
2462 if(setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
2463 if(i==locals.begin())
2464 g_log<<Logger::Info<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
2465 }
2466 #endif
2467
2468 if( ::arg().mustDo("non-local-bind") )
2469 Utility::setBindAny(AF_INET, fd);
2470
2471 #ifdef SO_REUSEPORT
2472 if(g_reusePort) {
2473 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
2474 throw PDNSException("SO_REUSEPORT: "+stringerror());
2475 }
2476 #endif
2477
2478 if (::arg().asNum("tcp-fast-open") > 0) {
2479 #ifdef TCP_FASTOPEN
2480 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
2481 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
2482 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno)<<endl;
2483 }
2484 #else
2485 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
2486 #endif
2487 }
2488
2489 sin.sin4.sin_port = htons(st.port);
2490 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
2491 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
2492 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
2493
2494 setNonBlocking(fd);
2495 setSocketSendBuffer(fd, 65000);
2496 listen(fd, 128);
2497 deferredAdds.push_back(make_pair(fd, handleNewTCPQuestion));
2498 tcpSockets.insert(fd);
2499
2500 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
2501 // - fd is not that which we know here, but returned from accept()
2502 if(sin.sin4.sin_family == AF_INET)
2503 g_log<<Logger::Info<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
2504 else
2505 g_log<<Logger::Info<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
2506 }
2507 }
2508
2509 static void makeUDPServerSockets(deferredAdd_t& deferredAdds)
2510 {
2511 int one=1;
2512 vector<string>locals;
2513 stringtok(locals,::arg()["local-address"]," ,");
2514
2515 if(locals.empty())
2516 throw PDNSException("No local address specified");
2517
2518 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
2519 ServiceTuple st;
2520 st.port=::arg().asNum("local-port");
2521 parseService(*i, st);
2522
2523 ComboAddress sin;
2524
2525 sin.reset();
2526 sin.sin4.sin_family = AF_INET;
2527 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
2528 sin.sin6.sin6_family = AF_INET6;
2529 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
2530 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
2531 }
2532
2533 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
2534 if(fd < 0) {
2535 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
2536 }
2537 if (!setSocketTimestamps(fd))
2538 g_log<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
2539
2540 if(IsAnyAddress(sin)) {
2541 if(sin.sin4.sin_family == AF_INET)
2542 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
2543 g_fromtosockets.insert(fd);
2544 #ifdef IPV6_RECVPKTINFO
2545 if(sin.sin4.sin_family == AF_INET6)
2546 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
2547 g_fromtosockets.insert(fd);
2548 #endif
2549 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
2550 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
2551 }
2552 }
2553 if( ::arg().mustDo("non-local-bind") )
2554 Utility::setBindAny(AF_INET6, fd);
2555
2556 setCloseOnExec(fd);
2557
2558 setSocketReceiveBuffer(fd, 250000);
2559 sin.sin4.sin_port = htons(st.port);
2560
2561
2562 #ifdef SO_REUSEPORT
2563 if(g_reusePort) {
2564 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
2565 throw PDNSException("SO_REUSEPORT: "+stringerror());
2566 }
2567 #endif
2568 socklen_t socklen=sin.getSocklen();
2569 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
2570 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
2571
2572 setNonBlocking(fd);
2573
2574 deferredAdds.push_back(make_pair(fd, handleNewUDPQuestion));
2575 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
2576 if(sin.sin4.sin_family == AF_INET)
2577 g_log<<Logger::Info<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
2578 else
2579 g_log<<Logger::Info<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
2580 }
2581 }
2582
2583 static void daemonize(void)
2584 {
2585 if(fork())
2586 exit(0); // bye bye
2587
2588 setsid();
2589
2590 int i=open("/dev/null",O_RDWR); /* open stdin */
2591 if(i < 0)
2592 g_log<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
2593 else {
2594 dup2(i,0); /* stdin */
2595 dup2(i,1); /* stderr */
2596 dup2(i,2); /* stderr */
2597 close(i);
2598 }
2599 }
2600
2601 static void usr1Handler(int)
2602 {
2603 statsWanted=true;
2604 }
2605
2606 static void usr2Handler(int)
2607 {
2608 g_quiet= !g_quiet;
2609 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
2610 ::arg().set("quiet")=g_quiet ? "" : "no";
2611 }
2612
2613 static void doStats(void)
2614 {
2615 static time_t lastOutputTime;
2616 static uint64_t lastQueryCount;
2617
2618 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
2619 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
2620
2621 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
2622 g_log<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
2623 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
2624 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
2625 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
2626
2627 g_log<<Logger::Notice<<"stats: throttle map: "
2628 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
2629 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
2630 g_log<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2631 g_log<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
2632 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
2633 g_log<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
2634 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
2635
2636 //g_log<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
2637 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
2638
2639 g_log<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
2640 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
2641
2642 size_t idx = 0;
2643 for (const auto& threadInfo : s_threadInfos) {
2644 if(threadInfo.isWorker) {
2645 g_log<<Logger::Notice<<"stats: thread "<<idx<<" has been distributed "<<threadInfo.numberOfDistributedQueries<<" queries"<<endl;
2646 ++idx;
2647 }
2648 }
2649
2650 time_t now = time(0);
2651 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
2652 g_log<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
2653 }
2654 lastOutputTime = now;
2655 lastQueryCount = SyncRes::s_queries;
2656 }
2657 else if(statsWanted)
2658 g_log<<Logger::Notice<<"stats: no stats yet!"<<endl;
2659
2660 statsWanted=false;
2661 }
2662
2663 static void houseKeeping(void *)
2664 {
2665 static thread_local time_t last_rootupdate, last_prune, last_secpoll, last_trustAnchorUpdate{0};
2666 static thread_local int cleanCounter=0;
2667 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
2668 auto luaconfsLocal = g_luaconfs.getLocal();
2669
2670 if (last_trustAnchorUpdate == 0 && !luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0) {
2671 // Loading the Lua config file already "refreshed" the TAs
2672 last_trustAnchorUpdate = g_now.tv_sec + luaconfsLocal->trustAnchorFileInfo.interval * 3600;
2673 }
2674
2675 try {
2676 if(s_running) {
2677 return;
2678 }
2679 s_running=true;
2680
2681 struct timeval now;
2682 Utility::gettimeofday(&now, 0);
2683
2684 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
2685 t_RC->doPrune(g_maxCacheEntries / g_numThreads); // this function is local to a thread, so fine anyhow
2686 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
2687
2688 SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
2689
2690 if(!((cleanCounter++)%40)) { // this is a full scan!
2691 time_t limit=now.tv_sec-300;
2692 SyncRes::pruneNSSpeeds(limit);
2693 }
2694 last_prune=time(0);
2695 }
2696
2697 if(now.tv_sec - last_rootupdate > 7200) {
2698 int res = SyncRes::getRootNS(g_now, nullptr);
2699 if (!res)
2700 last_rootupdate=now.tv_sec;
2701 }
2702
2703 if(isHandlerThread()) {
2704
2705 if(now.tv_sec - last_secpoll >= 3600) {
2706 try {
2707 doSecPoll(&last_secpoll);
2708 }
2709 catch(std::exception& e)
2710 {
2711 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
2712 }
2713 catch(PDNSException& e)
2714 {
2715 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
2716 }
2717 catch(ImmediateServFailException &e)
2718 {
2719 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
2720 }
2721 catch(...)
2722 {
2723 g_log<<Logger::Error<<"Exception while performing security poll"<<endl;
2724 }
2725 }
2726
2727 if (!luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0 &&
2728 g_now.tv_sec - last_trustAnchorUpdate >= (luaconfsLocal->trustAnchorFileInfo.interval * 3600)) {
2729 g_log<<Logger::Debug<<"Refreshing Trust Anchors from file"<<endl;
2730 try {
2731 map<DNSName, dsmap_t> dsAnchors;
2732 if (updateTrustAnchorsFromFile(luaconfsLocal->trustAnchorFileInfo.fname, dsAnchors)) {
2733 g_luaconfs.modify([&dsAnchors](LuaConfigItems& lci) {
2734 lci.dsAnchors = dsAnchors;
2735 });
2736 }
2737 last_trustAnchorUpdate = now.tv_sec;
2738 } catch (const PDNSException &pe) {
2739 g_log<<Logger::Error<<"Unable to update Trust Anchors: "<<pe.reason<<endl;
2740 }
2741 }
2742 }
2743 s_running=false;
2744 }
2745 catch(PDNSException& ae)
2746 {
2747 s_running=false;
2748 g_log<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
2749 throw;
2750 }
2751 }
2752
2753 static void makeThreadPipes()
2754 {
2755 /* thread 0 is the handler / SNMP, we start at 1 */
2756 for(unsigned int n = 1; n <= (g_numWorkerThreads + g_numDistributorThreads); ++n) {
2757 auto& threadInfos = s_threadInfos.at(n);
2758
2759 int fd[2];
2760 if(pipe(fd) < 0)
2761 unixDie("Creating pipe for inter-thread communications");
2762
2763 threadInfos.pipes.readToThread = fd[0];
2764 threadInfos.pipes.writeToThread = fd[1];
2765
2766 if(pipe(fd) < 0)
2767 unixDie("Creating pipe for inter-thread communications");
2768
2769 threadInfos.pipes.readFromThread = fd[0];
2770 threadInfos.pipes.writeFromThread = fd[1];
2771
2772 if(pipe(fd) < 0)
2773 unixDie("Creating pipe for inter-thread communications");
2774
2775 threadInfos.pipes.readQueriesToThread = fd[0];
2776 threadInfos.pipes.writeQueriesToThread = fd[1];
2777
2778 if (!setNonBlocking(threadInfos.pipes.writeQueriesToThread)) {
2779 unixDie("Making pipe for inter-thread communications non-blocking");
2780 }
2781 }
2782 }
2783
2784 struct ThreadMSG
2785 {
2786 pipefunc_t func;
2787 bool wantAnswer;
2788 };
2789
2790 void broadcastFunction(const pipefunc_t& func)
2791 {
2792 /* This function might be called by the worker with t_id 0 during startup
2793 for the initialization of ACLs and domain maps. After that it should only
2794 be called by the handler. */
2795
2796 if (s_threadInfos.empty() && isHandlerThread()) {
2797 /* the handler and distributors will call themselves below, but
2798 during startup we get called while s_threadInfos has not been
2799 populated yet to update the ACL or domain maps, so we need to
2800 handle that case.
2801 */
2802 func();
2803 }
2804
2805 unsigned int n = 0;
2806 for (const auto& threadInfo : s_threadInfos) {
2807 if(n++ == t_id) {
2808 func(); // don't write to ourselves!
2809 continue;
2810 }
2811
2812 ThreadMSG* tmsg = new ThreadMSG();
2813 tmsg->func = func;
2814 tmsg->wantAnswer = true;
2815 if(write(threadInfo.pipes.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2816 delete tmsg;
2817
2818 unixDie("write to thread pipe returned wrong size or error");
2819 }
2820
2821 string* resp = nullptr;
2822 if(read(threadInfo.pipes.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2823 unixDie("read from thread pipe returned wrong size or error");
2824
2825 if(resp) {
2826 delete resp;
2827 resp = nullptr;
2828 }
2829 }
2830 }
2831
2832 static bool trySendingQueryToWorker(unsigned int target, ThreadMSG* tmsg)
2833 {
2834 auto& targetInfo = s_threadInfos[target];
2835 if(!targetInfo.isWorker) {
2836 g_log<<Logger::Error<<"distributeAsyncFunction() tried to assign a query to a non-worker thread"<<endl;
2837 exit(1);
2838 }
2839
2840 const auto& tps = targetInfo.pipes;
2841
2842 ssize_t written = write(tps.writeQueriesToThread, &tmsg, sizeof(tmsg));
2843 if (written > 0) {
2844 if (static_cast<size_t>(written) != sizeof(tmsg)) {
2845 delete tmsg;
2846 unixDie("write to thread pipe returned wrong size or error");
2847 }
2848 }
2849 else {
2850 int error = errno;
2851 if (error == EAGAIN || error == EWOULDBLOCK) {
2852 return false;
2853 } else {
2854 delete tmsg;
2855 unixDie("write to thread pipe returned wrong size or error:" + std::to_string(error));
2856 }
2857 }
2858
2859 ++targetInfo.numberOfDistributedQueries;
2860
2861 return true;
2862 }
2863
2864 static unsigned int getWorkerLoad(size_t workerIdx)
2865 {
2866 const auto mt = s_threadInfos[/* skip handler */ 1 + g_numDistributorThreads + workerIdx].mt;
2867 if (mt != nullptr) {
2868 return mt->numProcesses();
2869 }
2870 return 0;
2871 }
2872
2873 static unsigned int selectWorker(unsigned int hash)
2874 {
2875 if (s_balancingFactor == 0) {
2876 return /* skip handler */ 1 + g_numDistributorThreads + (hash % g_numWorkerThreads);
2877 }
2878
2879 /* we start with one, representing the query we are currently handling */
2880 double currentLoad = 1;
2881 std::vector<unsigned int> load(g_numWorkerThreads);
2882 for (size_t idx = 0; idx < g_numWorkerThreads; idx++) {
2883 load[idx] = getWorkerLoad(idx);
2884 currentLoad += load[idx];
2885 // cerr<<"load for worker "<<idx<<" is "<<load[idx]<<endl;
2886 }
2887
2888 double targetLoad = (currentLoad / g_numWorkerThreads) * s_balancingFactor;
2889 // cerr<<"total load is "<<currentLoad<<", number of workers is "<<g_numWorkerThreads<<", target load is "<<targetLoad<<endl;
2890
2891 unsigned int worker = hash % g_numWorkerThreads;
2892 /* at least one server has to be at or below the average load */
2893 if (load[worker] > targetLoad) {
2894 ++g_stats.rebalancedQueries;
2895 do {
2896 // cerr<<"worker "<<worker<<" is above the target load, selecting another one"<<endl;
2897 worker = (worker + 1) % g_numWorkerThreads;
2898 }
2899 while(load[worker] > targetLoad);
2900 }
2901
2902 return /* skip handler */ 1 + g_numDistributorThreads + worker;
2903 }
2904
2905 // This function is only called by the distributor threads, when pdns-distributes-queries is set
2906 void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
2907 {
2908 if (!isDistributorThread()) {
2909 g_log<<Logger::Error<<"distributeAsyncFunction() has been called by a worker ("<<t_id<<")"<<endl;
2910 exit(1);
2911 }
2912
2913 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
2914 unsigned int target = selectWorker(hash);
2915
2916 ThreadMSG* tmsg = new ThreadMSG();
2917 tmsg->func = func;
2918 tmsg->wantAnswer = false;
2919
2920 if (!trySendingQueryToWorker(target, tmsg)) {
2921 /* if this function failed but did not raise an exception, it means that the pipe
2922 was full, let's try another one */
2923 unsigned int newTarget = 0;
2924 do {
2925 newTarget = /* skip handler */ 1 + g_numDistributorThreads + dns_random(g_numWorkerThreads);
2926 } while (newTarget == target);
2927
2928 if (!trySendingQueryToWorker(newTarget, tmsg)) {
2929 g_stats.queryPipeFullDrops++;
2930 delete tmsg;
2931 }
2932 }
2933 }
2934
2935 static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
2936 {
2937 ThreadMSG* tmsg = nullptr;
2938
2939 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread || fd == readQueriesToThread
2940 unixDie("read from thread pipe returned wrong size or error");
2941 }
2942
2943 void *resp=0;
2944 try {
2945 resp = tmsg->func();
2946 }
2947 catch(std::exception& e) {
2948 if(g_logCommonErrors)
2949 g_log<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2950 }
2951 catch(PDNSException& e) {
2952 if(g_logCommonErrors)
2953 g_log<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2954 }
2955 if(tmsg->wantAnswer) {
2956 const auto& threadInfo = s_threadInfos.at(t_id);
2957 if(write(threadInfo.pipes.writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
2958 delete tmsg;
2959 unixDie("write to thread pipe returned wrong size or error");
2960 }
2961 }
2962
2963 delete tmsg;
2964 }
2965
2966 template<class T> void *voider(const boost::function<T*()>& func)
2967 {
2968 return func();
2969 }
2970
2971 vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
2972 {
2973 a.insert(a.end(), b.begin(), b.end());
2974 return a;
2975 }
2976
2977 vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
2978 {
2979 a.insert(a.end(), b.begin(), b.end());
2980 return a;
2981 }
2982
2983 vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
2984 {
2985 a.insert(a.end(), b.begin(), b.end());
2986 return a;
2987 }
2988
2989
2990 /*
2991 This function should only be called by the handler to gather metrics, wipe the cache,
2992 reload the Lua script (not the Lua config) or change the current trace regex,
2993 and by the SNMP thread to gather metrics. */
2994 template<class T> T broadcastAccFunction(const boost::function<T*()>& func)
2995 {
2996 if (!isHandlerThread()) {
2997 g_log<<Logger::Error<<"broadcastAccFunction has been called by a worker ("<<t_id<<")"<<endl;
2998 exit(1);
2999 }
3000
3001 unsigned int n = 0;
3002 T ret=T();
3003 for (const auto& threadInfo : s_threadInfos) {
3004 if (n++ == t_id) {
3005 continue;
3006 }
3007
3008 const auto& tps = threadInfo.pipes;
3009 ThreadMSG* tmsg = new ThreadMSG();
3010 tmsg->func = boost::bind(voider<T>, func);
3011 tmsg->wantAnswer = true;
3012
3013 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
3014 delete tmsg;
3015 unixDie("write to thread pipe returned wrong size or error");
3016 }
3017
3018 T* resp = nullptr;
3019 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
3020 unixDie("read from thread pipe returned wrong size or error");
3021
3022 if(resp) {
3023 ret += *resp;
3024 delete resp;
3025 resp = nullptr;
3026 }
3027 }
3028 return ret;
3029 }
3030
3031 template string broadcastAccFunction(const boost::function<string*()>& fun); // explicit instantiation
3032 template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun); // explicit instantiation
3033 template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun); // explicit instantiation
3034 template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun); // explicit instantiation
3035 template ThreadTimes broadcastAccFunction(const boost::function<ThreadTimes*()>& fun);
3036
3037 static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
3038 {
3039 try {
3040 string remote;
3041 string msg=s_rcc.recv(&remote);
3042 RecursorControlParser rcp;
3043 RecursorControlParser::func_t* command;
3044
3045 string answer=rcp.getAnswer(msg, &command);
3046
3047 // If we are inside a chroot, we need to strip
3048 if (!arg()["chroot"].empty()) {
3049 size_t len = arg()["chroot"].length();
3050 remote = remote.substr(len);
3051 }
3052
3053 s_rcc.send(answer, &remote);
3054 command();
3055 }
3056 catch(const std::exception& e) {
3057 g_log<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
3058 }
3059 catch(const PDNSException& ae) {
3060 g_log<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
3061 }
3062 }
3063
3064 static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
3065 {
3066 PacketID* pident=any_cast<PacketID>(&var);
3067 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
3068
3069 shared_array<char> buffer(new char[pident->inNeeded]);
3070
3071 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
3072 if(ret > 0) {
3073 pident->inMSG.append(&buffer[0], &buffer[ret]);
3074 pident->inNeeded-=(size_t)ret;
3075 if(!pident->inNeeded || pident->inIncompleteOkay) {
3076 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
3077 PacketID pid=*pident;
3078 string msg=pident->inMSG;
3079
3080 t_fdm->removeReadFD(fd);
3081 MT->sendEvent(pid, &msg);
3082 }
3083 else {
3084 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
3085 }
3086 }
3087 else {
3088 PacketID tmp=*pident;
3089 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
3090 string empty;
3091 MT->sendEvent(tmp, &empty); // this conveys error status
3092 }
3093 }
3094
3095 static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
3096 {
3097 PacketID* pid=any_cast<PacketID>(&var);
3098 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
3099 if(ret > 0) {
3100 pid->outPos+=(ssize_t)ret;
3101 if(pid->outPos==pid->outMSG.size()) {
3102 PacketID tmp=*pid;
3103 t_fdm->removeWriteFD(fd);
3104 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
3105 }
3106 }
3107 else { // error or EOF
3108 PacketID tmp(*pid);
3109 t_fdm->removeWriteFD(fd);
3110 string sent;
3111 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
3112 }
3113 }
3114
3115 // resend event to everybody chained onto it
3116 static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
3117 {
3118 if(iter->key.chain.empty())
3119 return;
3120 // cerr<<"doResends called!\n";
3121 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
3122 resend.fd=-1;
3123 resend.id=*i;
3124 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
3125
3126 MT->sendEvent(resend, &content);
3127 g_stats.chainResends++;
3128 }
3129 }
3130
3131 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
3132 {
3133 PacketID pid=any_cast<PacketID>(var);
3134 ssize_t len;
3135 std::string packet;
3136 packet.resize(g_outgoingEDNSBufsize);
3137 ComboAddress fromaddr;
3138 socklen_t addrlen=sizeof(fromaddr);
3139
3140 len=recvfrom(fd, &packet.at(0), packet.size(), 0, (sockaddr *)&fromaddr, &addrlen);
3141
3142 if(len < (ssize_t) sizeof(dnsheader)) {
3143 if(len < 0)
3144 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
3145 else {
3146 g_stats.serverParseError++;
3147 if(g_logCommonErrors)
3148 g_log<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
3149 ": packet smaller than DNS header"<<endl;
3150 }
3151
3152 t_udpclientsocks->returnSocket(fd);
3153 string empty;
3154
3155 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3156 if(iter != MT->d_waiters.end())
3157 doResends(iter, pid, empty);
3158
3159 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
3160 return;
3161 }
3162
3163 packet.resize(len);
3164 dnsheader dh;
3165 memcpy(&dh, &packet.at(0), sizeof(dh));
3166
3167 PacketID pident;
3168 pident.remote=fromaddr;
3169 pident.id=dh.id;
3170 pident.fd=fd;
3171
3172 if(!dh.qr && g_logCommonErrors) {
3173 g_log<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
3174 }
3175
3176 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
3177 !dh.qr) { // one weird server
3178 pident.domain.clear();
3179 pident.type = 0;
3180 }
3181 else {
3182 try {
3183 if(len > 12)
3184 pident.domain=DNSName(&packet.at(0), len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
3185 }
3186 catch(std::exception& e) {
3187 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
3188 g_log<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
3189 return;
3190 }
3191 }
3192
3193 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
3194 if(iter != MT->d_waiters.end()) {
3195 doResends(iter, pident, packet);
3196 }
3197
3198 retryWithName:
3199
3200 if(!MT->sendEvent(pident, &packet)) {
3201 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
3202 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
3203 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
3204 pident.domain == mthread->key.domain) {
3205 mthread->key.nearMisses++;
3206 }
3207
3208 // be a bit paranoid here since we're weakening our matching
3209 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
3210 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
3211 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
3212 pident.domain = mthread->key.domain;
3213 pident.type = mthread->key.type;
3214 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
3215 }
3216 }
3217 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
3218 if(g_logCommonErrors) {
3219 g_log<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
3220 }
3221 }
3222 else if(fd >= 0) {
3223 t_udpclientsocks->returnSocket(fd);
3224 }
3225 }
3226
3227 FDMultiplexer* getMultiplexer()
3228 {
3229 FDMultiplexer* ret;
3230 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
3231 try {
3232 ret=i.second();
3233 return ret;
3234 }
3235 catch(FDMultiplexerException &fe) {
3236 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
3237 }
3238 catch(...) {
3239 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
3240 }
3241 }
3242 g_log<<Logger::Error<<"No working multiplexer found!"<<endl;
3243 exit(1);
3244 }
3245
3246
3247 static string* doReloadLuaScript()
3248 {
3249 string fname= ::arg()["lua-dns-script"];
3250 try {
3251 if(fname.empty()) {
3252 t_pdl.reset();
3253 g_log<<Logger::Info<<t_id<<" Unloaded current lua script"<<endl;
3254 return new string("unloaded\n");
3255 }
3256 else {
3257 t_pdl = std::make_shared<RecursorLua4>();
3258 t_pdl->loadFile(fname);
3259 }
3260 }
3261 catch(std::exception& e) {
3262 g_log<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
3263 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
3264 }
3265
3266 g_log<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
3267 return new string("(re)loaded '"+fname+"'\n");
3268 }
3269
3270 string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3271 {
3272 if(begin != end)
3273 ::arg().set("lua-dns-script") = *begin;
3274
3275 return broadcastAccFunction<string>(doReloadLuaScript);
3276 }
3277
3278 static string* pleaseUseNewTraceRegex(const std::string& newRegex)
3279 try
3280 {
3281 if(newRegex.empty()) {
3282 t_traceRegex.reset();
3283 return new string("unset\n");
3284 }
3285 else {
3286 t_traceRegex = std::make_shared<Regex>(newRegex);
3287 return new string("ok\n");
3288 }
3289 }
3290 catch(PDNSException& ae)
3291 {
3292 return new string(ae.reason+"\n");
3293 }
3294
3295 string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3296 {
3297 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
3298 }
3299
3300 static void checkLinuxIPv6Limits()
3301 {
3302 #ifdef __linux__
3303 string line;
3304 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
3305 int lim=std::stoi(line);
3306 if(lim < 16384) {
3307 g_log<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
3308 }
3309 }
3310 #endif
3311 }
3312 static void checkOrFixFDS()
3313 {
3314 unsigned int availFDs=getFilenumLimit();
3315 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
3316
3317 if(wantFDs > availFDs) {
3318 unsigned int hardlimit= getFilenumLimit(true);
3319 if(hardlimit >= wantFDs) {
3320 setFilenumLimit(wantFDs);
3321 g_log<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
3322 }
3323 else {
3324 int newval = (hardlimit - 25) / g_numWorkerThreads;
3325 g_log<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
3326 g_maxMThreads = newval;
3327 setFilenumLimit(hardlimit);
3328 }
3329 }
3330 }
3331
3332 static void* recursorThread(unsigned int tid, const string& threadName);
3333
3334 static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
3335 {
3336 t_allowFrom = ng;
3337 return nullptr;
3338 }
3339
3340 int g_argc;
3341 char** g_argv;
3342
3343 void parseACLs()
3344 {
3345 static bool l_initialized;
3346
3347 if(l_initialized) { // only reload configuration file on second call
3348 string configname=::arg()["config-dir"]+"/recursor.conf";
3349 if(::arg()["config-name"]!="") {
3350 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3351 }
3352 cleanSlashes(configname);
3353
3354 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
3355 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
3356 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
3357 ::arg().preParseFile(configname.c_str(), "include-dir");
3358 ::arg().preParse(g_argc, g_argv, "include-dir");
3359
3360 // then process includes
3361 std::vector<std::string> extraConfigs;
3362 ::arg().gatherIncludes(extraConfigs);
3363
3364 for(const std::string& fn : extraConfigs) {
3365 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
3366 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
3367 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
3368 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
3369 }
3370
3371 ::arg().preParse(g_argc, g_argv, "allow-from-file");
3372 ::arg().preParse(g_argc, g_argv, "allow-from");
3373 }
3374
3375 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
3376 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
3377
3378 if(!::arg()["allow-from-file"].empty()) {
3379 string line;
3380 ifstream ifs(::arg()["allow-from-file"].c_str());
3381 if(!ifs) {
3382 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
3383 }
3384
3385 string::size_type pos;
3386 while(getline(ifs,line)) {
3387 pos=line.find('#');
3388 if(pos!=string::npos)
3389 line.resize(pos);
3390 trim(line);
3391 if(line.empty())
3392 continue;
3393
3394 allowFrom->addMask(line);
3395 }
3396 g_log<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
3397 }
3398 else if(!::arg()["allow-from"].empty()) {
3399 vector<string> ips;
3400 stringtok(ips, ::arg()["allow-from"], ", ");
3401
3402 g_log<<Logger::Warning<<"Only allowing queries from: ";
3403 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
3404 allowFrom->addMask(*i);
3405 if(i!=ips.begin())
3406 g_log<<Logger::Warning<<", ";
3407 g_log<<Logger::Warning<<*i;
3408 }
3409 g_log<<Logger::Warning<<endl;
3410 }
3411 else {
3412 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
3413 g_log<<Logger::Warning<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
3414 allowFrom = nullptr;
3415 }
3416
3417 g_initialAllowFrom = allowFrom;
3418 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
3419 oldAllowFrom = nullptr;
3420
3421 l_initialized = true;
3422 }
3423
3424
3425 static void setupDelegationOnly()
3426 {
3427 vector<string> parts;
3428 stringtok(parts, ::arg()["delegation-only"], ", \t");
3429 for(const auto& p : parts) {
3430 SyncRes::addDelegationOnly(DNSName(p));
3431 }
3432 }
3433
3434 static std::map<unsigned int, std::set<int> > parseCPUMap()
3435 {
3436 std::map<unsigned int, std::set<int> > result;
3437
3438 const std::string value = ::arg()["cpu-map"];
3439
3440 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
3441 g_log<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
3442 return result;
3443 }
3444
3445 std::vector<std::string> parts;
3446
3447 stringtok(parts, value, " \t");
3448
3449 for(const auto& part : parts) {
3450 if (part.find('=') == string::npos)
3451 continue;
3452
3453 try {
3454 auto headers = splitField(part, '=');
3455 trim(headers.first);
3456 trim(headers.second);
3457
3458 unsigned int threadId = pdns_stou(headers.first);
3459 std::vector<std::string> cpus;
3460
3461 stringtok(cpus, headers.second, ",");
3462
3463 for(const auto& cpu : cpus) {
3464 int cpuId = std::stoi(cpu);
3465
3466 result[threadId].insert(cpuId);
3467 }
3468 }
3469 catch(const std::exception& e) {
3470 g_log<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
3471 }
3472 }
3473
3474 return result;
3475 }
3476
3477 static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
3478 {
3479 const auto& cpuMapping = cpusMap.find(n);
3480 if (cpuMapping != cpusMap.cend()) {
3481 int rc = mapThreadToCPUList(tid, cpuMapping->second);
3482 if (rc == 0) {
3483 g_log<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
3484 for (const auto cpu : cpuMapping->second) {
3485 g_log<<Logger::Info<<" "<<cpu;
3486 }
3487 g_log<<Logger::Info<<endl;
3488 }
3489 else {
3490 g_log<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
3491 for (const auto cpu : cpuMapping->second) {
3492 g_log<<Logger::Info<<" "<<cpu;
3493 }
3494 g_log<<Logger::Info<<strerror(rc)<<endl;
3495 }
3496 }
3497 }
3498
3499 #ifdef NOD_ENABLED
3500 static void setupNODThread()
3501 {
3502 if (g_nodEnabled) {
3503 uint32_t num_cells = ::arg().asNum("new-domain-db-size");
3504 t_nodDBp = std::make_shared<nod::NODDB>(num_cells);
3505 try {
3506 t_nodDBp->setCacheDir(::arg()["new-domain-history-dir"]);
3507 }
3508 catch (const PDNSException& e) {
3509 g_log<<Logger::Error<<"new-domain-history-dir (" << ::arg()["new-domain-history-dir"] << ") is not readable or does not exist"<<endl;
3510 _exit(1);
3511 }
3512 if (!t_nodDBp->init()) {
3513 g_log<<Logger::Error<<"Could not initialize domain tracking"<<endl;
3514 _exit(1);
3515 }
3516 std::thread t(nod::NODDB::startHousekeepingThread, t_nodDBp, std::this_thread::get_id());
3517 t.detach();
3518 g_nod_pbtag = ::arg()["new-domain-pb-tag"];
3519 }
3520 if (g_udrEnabled) {
3521 uint32_t num_cells = ::arg().asNum("unique-response-db-size");
3522 t_udrDBp = std::make_shared<nod::UniqueResponseDB>(num_cells);
3523 try {
3524 t_udrDBp->setCacheDir(::arg()["unique-response-history-dir"]);
3525 }
3526 catch (const PDNSException& e) {
3527 g_log<<Logger::Error<<"unique-response-history-dir (" << ::arg()["unique-response-history-dir"] << ") is not readable or does not exist"<<endl;
3528 _exit(1);
3529 }
3530 if (!t_udrDBp->init()) {
3531 g_log<<Logger::Error<<"Could not initialize unique response tracking"<<endl;
3532 _exit(1);
3533 }
3534 std::thread t(nod::UniqueResponseDB::startHousekeepingThread, t_udrDBp, std::this_thread::get_id());
3535 t.detach();
3536 g_udr_pbtag = ::arg()["unique-response-pb-tag"];
3537 }
3538 }
3539
3540 void parseNODWhitelist(const std::string& wlist)
3541 {
3542 vector<string> parts;
3543 stringtok(parts, wlist, ",; ");
3544 for(const auto& a : parts) {
3545 g_nodDomainWL.add(DNSName(a));
3546 }
3547 }
3548
3549 static void setupNODGlobal()
3550 {
3551 // Setup NOD subsystem
3552 g_nodEnabled = ::arg().mustDo("new-domain-tracking");
3553 g_nodLookupDomain = DNSName(::arg()["new-domain-lookup"]);
3554 g_nodLog = ::arg().mustDo("new-domain-log");
3555 parseNODWhitelist(::arg()["new-domain-whitelist"]);
3556
3557 // Setup Unique DNS Response subsystem
3558 g_udrEnabled = ::arg().mustDo("unique-response-tracking");
3559 g_udrLog = ::arg().mustDo("unique-response-log");
3560 }
3561 #endif /* NOD_ENABLED */
3562
3563 static int serviceMain(int argc, char*argv[])
3564 {
3565 g_log.setName(s_programname);
3566 g_log.disableSyslog(::arg().mustDo("disable-syslog"));
3567 g_log.setTimestamps(::arg().mustDo("log-timestamp"));
3568
3569 if(!::arg()["logging-facility"].empty()) {
3570 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
3571 if(val >= 0)
3572 g_log.setFacility(val);
3573 else
3574 g_log<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
3575 }
3576
3577 showProductVersion();
3578
3579 g_disthashseed=dns_random(0xffffffff);
3580
3581 checkLinuxIPv6Limits();
3582 try {
3583 vector<string> addrs;
3584 if(!::arg()["query-local-address6"].empty()) {
3585 SyncRes::s_doIPv6=true;
3586 g_log<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
3587
3588 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
3589 for(const string& addr : addrs) {
3590 g_localQueryAddresses6.push_back(ComboAddress(addr));
3591 }
3592 }
3593 else {
3594 g_log<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
3595 }
3596 addrs.clear();
3597 stringtok(addrs, ::arg()["query-local-address"], ", ;");
3598 for(const string& addr : addrs) {
3599 g_localQueryAddresses4.push_back(ComboAddress(addr));
3600 }
3601 }
3602 catch(std::exception& e) {
3603 g_log<<Logger::Error<<"Assigning local query addresses: "<<e.what();
3604 exit(99);
3605 }
3606
3607 // keep this ABOVE loadRecursorLuaConfig!
3608 if(::arg()["dnssec"]=="off")
3609 g_dnssecmode=DNSSECMode::Off;
3610 else if(::arg()["dnssec"]=="process-no-validate")
3611 g_dnssecmode=DNSSECMode::ProcessNoValidate;
3612 else if(::arg()["dnssec"]=="process")
3613 g_dnssecmode=DNSSECMode::Process;
3614 else if(::arg()["dnssec"]=="validate")
3615 g_dnssecmode=DNSSECMode::ValidateAll;
3616 else if(::arg()["dnssec"]=="log-fail")
3617 g_dnssecmode=DNSSECMode::ValidateForLog;
3618 else {
3619 g_log<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
3620 exit(1);
3621 }
3622
3623 g_signatureInceptionSkew = ::arg().asNum("signature-inception-skew");
3624 if (g_signatureInceptionSkew < 0) {
3625 g_log<<Logger::Error<<"A negative value for 'signature-inception-skew' is not allowed"<<endl;
3626 exit(1);
3627 }
3628
3629 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
3630 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
3631
3632 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
3633 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
3634
3635 luaConfigDelayedThreads delayedLuaThreads;
3636 try {
3637 loadRecursorLuaConfig(::arg()["lua-config-file"], delayedLuaThreads);
3638 }
3639 catch (PDNSException &e) {
3640 g_log<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
3641 exit(1);
3642 }
3643
3644 parseACLs();
3645 initPublicSuffixList(::arg()["public-suffix-list-file"]);
3646
3647 if(!::arg()["dont-query"].empty()) {
3648 vector<string> ips;
3649 stringtok(ips, ::arg()["dont-query"], ", ");
3650 ips.push_back("0.0.0.0");
3651 ips.push_back("::");
3652
3653 g_log<<Logger::Warning<<"Will not send queries to: ";
3654 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
3655 SyncRes::addDontQuery(*i);
3656 if(i!=ips.begin())
3657 g_log<<Logger::Warning<<", ";
3658 g_log<<Logger::Warning<<*i;
3659 }
3660 g_log<<Logger::Warning<<endl;
3661 }
3662
3663 g_quiet=::arg().mustDo("quiet");
3664
3665 /* this needs to be done before parseACLs(), which call broadcastFunction() */
3666 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
3667 if(g_weDistributeQueries) {
3668 g_log<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
3669 }
3670
3671 setupDelegationOnly();
3672 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
3673
3674 if(::arg()["trace"]=="fail") {
3675 SyncRes::setDefaultLogMode(SyncRes::Store);
3676 }
3677 else if(::arg().mustDo("trace")) {
3678 SyncRes::setDefaultLogMode(SyncRes::Log);
3679 ::arg().set("quiet")="no";
3680 g_quiet=false;
3681 g_dnssecLOG=true;
3682 }
3683 string myHostname = getHostname();
3684 if (myHostname == "UNKNOWN"){
3685 g_log<<Logger::Warning<<"Unable to get the hostname, NSID and id.server values will be empty"<<endl;
3686 myHostname = "";
3687 }
3688
3689 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
3690 SyncRes::s_minimumECSTTL = ::arg().asNum("ecs-minimum-ttl-override");
3691
3692 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
3693
3694 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
3695 SyncRes::s_maxbogusttl=::arg().asNum("max-cache-bogus-ttl");
3696 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
3697 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
3698 // Cap the packetcache-servfail-ttl to the packetcache-ttl
3699 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
3700 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
3701 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
3702 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
3703 SyncRes::s_serverID=::arg()["server-id"];
3704 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
3705 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
3706 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
3707 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
3708 if(SyncRes::s_serverID.empty()) {
3709 SyncRes::s_serverID = myHostname;
3710 }
3711
3712 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
3713 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
3714 SyncRes::clearECSStats();
3715 SyncRes::s_ecsipv4cachelimit = ::arg().asNum("ecs-ipv4-cache-bits");
3716 SyncRes::s_ecsipv6cachelimit = ::arg().asNum("ecs-ipv6-cache-bits");
3717 SyncRes::s_ecscachelimitttl = ::arg().asNum("ecs-cache-limit-ttl");
3718
3719 if (!::arg().isEmpty("ecs-scope-zero-address")) {
3720 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
3721 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
3722 }
3723 else {
3724 bool found = false;
3725 for (const auto& addr : g_localQueryAddresses4) {
3726 if (!IsAnyAddress(addr)) {
3727 SyncRes::setECSScopeZeroAddress(Netmask(addr, 32));
3728 found = true;
3729 break;
3730 }
3731 }
3732 if (!found) {
3733 for (const auto& addr : g_localQueryAddresses6) {
3734 if (!IsAnyAddress(addr)) {
3735 SyncRes::setECSScopeZeroAddress(Netmask(addr, 128));
3736 found = true;
3737 break;
3738 }
3739 }
3740 if (!found) {
3741 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
3742 }
3743 }
3744 }
3745
3746 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
3747 SyncRes::parseEDNSSubnetAddFor(::arg()["ecs-add-for"]);
3748 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
3749
3750 g_XPFAcl.toMasks(::arg()["xpf-allow-from"]);
3751 g_xpfRRCode = ::arg().asNum("xpf-rr-code");
3752
3753 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
3754
3755 g_initialDomainMap = parseAuthAndForwards();
3756
3757 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3758
3759 g_logCommonErrors=::arg().mustDo("log-common-errors");
3760 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
3761
3762 g_anyToTcp = ::arg().mustDo("any-to-tcp");
3763 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
3764
3765 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
3766
3767 g_numDistributorThreads = ::arg().asNum("distributor-threads");
3768 g_numWorkerThreads = ::arg().asNum("threads");
3769 if (g_numWorkerThreads < 1) {
3770 g_log<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
3771 g_numWorkerThreads = 1;
3772 }
3773
3774 g_numThreads = g_numDistributorThreads + g_numWorkerThreads;
3775 g_maxMThreads = ::arg().asNum("max-mthreads");
3776
3777 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
3778
3779 g_statisticsInterval = ::arg().asNum("statistics-interval");
3780
3781 s_balancingFactor = ::arg().asDouble("distribution-load-factor");
3782 if (s_balancingFactor != 0.0 && s_balancingFactor < 1.0) {
3783 s_balancingFactor = 0.0;
3784 g_log<<Logger::Warning<<"Asked to run with a distribution-load-factor below 1.0, disabling it instead"<<endl;
3785 }
3786
3787 #ifdef SO_REUSEPORT
3788 g_reusePort = ::arg().mustDo("reuseport");
3789 #endif
3790
3791 s_threadInfos.resize(g_numDistributorThreads + g_numWorkerThreads + /* handler */ 1);
3792
3793 if (g_reusePort) {
3794 if (g_weDistributeQueries) {
3795 /* first thread is the handler, then distributors */
3796 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3797 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
3798 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
3799 makeUDPServerSockets(deferredAdds);
3800 makeTCPServerSockets(deferredAdds, tcpSockets);
3801 }
3802 }
3803 else {
3804 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3805 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3806 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
3807 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
3808 makeUDPServerSockets(deferredAdds);
3809 makeTCPServerSockets(deferredAdds, tcpSockets);
3810 }
3811 }
3812 }
3813 else {
3814 std::set<int> tcpSockets;
3815 /* we don't have reuseport so we can only open one socket per
3816 listening addr:port and everyone will listen on it */
3817 makeUDPServerSockets(g_deferredAdds);
3818 makeTCPServerSockets(g_deferredAdds, tcpSockets);
3819
3820 /* every listener (so distributor if g_weDistributeQueries, workers otherwise)
3821 needs to listen to the shared sockets */
3822 if (g_weDistributeQueries) {
3823 /* first thread is the handler, then distributors */
3824 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3825 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3826 }
3827 }
3828 else {
3829 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3830 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3831 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3832 }
3833 }
3834 }
3835
3836 #ifdef NOD_ENABLED
3837 // Setup newly observed domain globals
3838 setupNODGlobal();
3839 #endif /* NOD_ENABLED */
3840
3841 int forks;
3842 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
3843 if(!fork()) // we are child
3844 break;
3845 }
3846
3847 if(::arg().mustDo("daemon")) {
3848 g_log<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
3849 g_log.toConsole(Logger::Critical);
3850 daemonize();
3851 }
3852 signal(SIGUSR1,usr1Handler);
3853 signal(SIGUSR2,usr2Handler);
3854 signal(SIGPIPE,SIG_IGN);
3855
3856 checkOrFixFDS();
3857
3858 #ifdef HAVE_LIBSODIUM
3859 if (sodium_init() == -1) {
3860 g_log<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
3861 exit(99);
3862 }
3863 #endif
3864
3865 openssl_thread_setup();
3866 openssl_seed();
3867 /* setup rng before chroot */
3868 dns_random_init();
3869
3870 if(::arg()["server-id"].empty()) {
3871 ::arg().set("server-id") = myHostname;
3872 }
3873
3874 int newgid=0;
3875 if(!::arg()["setgid"].empty())
3876 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
3877 int newuid=0;
3878 if(!::arg()["setuid"].empty())
3879 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
3880
3881 Utility::dropGroupPrivs(newuid, newgid);
3882
3883 if (!::arg()["chroot"].empty()) {
3884 #ifdef HAVE_SYSTEMD
3885 char *ns;
3886 ns = getenv("NOTIFY_SOCKET");
3887 if (ns != nullptr) {
3888 g_log<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
3889 exit(1);
3890 }
3891 #endif
3892 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
3893 g_log<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
3894 exit(1);
3895 }
3896 else
3897 g_log<<Logger::Info<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
3898 }
3899
3900 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
3901 if(!s_pidfname.empty())
3902 unlink(s_pidfname.c_str()); // remove possible old pid file
3903 writePid();
3904
3905 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
3906
3907 Utility::dropUserPrivs(newuid);
3908 try {
3909 /* we might still have capabilities remaining, for example if we have been started as root
3910 without --setuid (please don't do that) or as an unprivileged user with ambient capabilities
3911 like CAP_NET_BIND_SERVICE.
3912 */
3913 dropCapabilities();
3914 }
3915 catch(const std::exception& e) {
3916 g_log<<Logger::Warning<<e.what()<<endl;
3917 }
3918
3919 startLuaConfigDelayedThreads(delayedLuaThreads, g_luaconfs.getCopy().generation);
3920
3921 makeThreadPipes();
3922
3923 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
3924 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
3925 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
3926 s_maxUDPQueriesPerRound=::arg().asNum("max-udp-queries-per-round");
3927
3928 blacklistStats(StatComponent::API, ::arg()["stats-api-blacklist"]);
3929 blacklistStats(StatComponent::Carbon, ::arg()["stats-carbon-blacklist"]);
3930 blacklistStats(StatComponent::RecControl, ::arg()["stats-rec-control-blacklist"]);
3931 blacklistStats(StatComponent::SNMP, ::arg()["stats-snmp-blacklist"]);
3932
3933 if (::arg().mustDo("snmp-agent")) {
3934 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
3935 g_snmpAgent->run();
3936 }
3937
3938 int port = ::arg().asNum("udp-source-port-min");
3939 if(port < 1024 || port > 65535){
3940 g_log<<Logger::Error<<"Unable to launch, udp-source-port-min is not a valid port number"<<endl;
3941 exit(99); // this isn't going to fix itself either
3942 }
3943 s_minUdpSourcePort = port;
3944 port = ::arg().asNum("udp-source-port-max");
3945 if(port < 1024 || port > 65535 || port < s_minUdpSourcePort){
3946 g_log<<Logger::Error<<"Unable to launch, udp-source-port-max is not a valid port number or is smaller than udp-source-port-min"<<endl;
3947 exit(99); // this isn't going to fix itself either
3948 }
3949 s_maxUdpSourcePort = port;
3950 std::vector<string> parts {};
3951 stringtok(parts, ::arg()["udp-source-port-avoid"], ", ");
3952 for (const auto &part : parts)
3953 {
3954 port = std::stoi(part);
3955 if(port < 1024 || port > 65535){
3956 g_log<<Logger::Error<<"Unable to launch, udp-source-port-avoid contains an invalid port number: "<<part<<endl;
3957 exit(99); // this isn't going to fix itself either
3958 }
3959 s_avoidUdpSourcePorts.insert(port);
3960 }
3961
3962 unsigned int currentThreadId = 1;
3963 const auto cpusMap = parseCPUMap();
3964
3965 if(g_numThreads == 1) {
3966 g_log<<Logger::Warning<<"Operating unthreaded"<<endl;
3967 #ifdef HAVE_SYSTEMD
3968 sd_notify(0, "READY=1");
3969 #endif
3970
3971 /* This thread handles the web server, carbon, statistics and the control channel */
3972 auto& handlerInfos = s_threadInfos.at(0);
3973 handlerInfos.isHandler = true;
3974 handlerInfos.thread = std::thread(recursorThread, 0, "main");
3975
3976 setCPUMap(cpusMap, currentThreadId, pthread_self());
3977
3978 auto& infos = s_threadInfos.at(currentThreadId);
3979 infos.isListener = true;
3980 infos.isWorker = true;
3981 recursorThread(currentThreadId++, "worker");
3982 }
3983 else {
3984
3985 if (g_weDistributeQueries) {
3986 g_log<<Logger::Warning<<"Launching "<< g_numDistributorThreads <<" distributor threads"<<endl;
3987 for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
3988 auto& infos = s_threadInfos.at(currentThreadId);
3989 infos.isListener = true;
3990 infos.thread = std::thread(recursorThread, currentThreadId++, "distr");
3991
3992 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
3993 }
3994 }
3995
3996 g_log<<Logger::Warning<<"Launching "<< g_numWorkerThreads <<" worker threads"<<endl;
3997
3998 for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
3999 auto& infos = s_threadInfos.at(currentThreadId);
4000 infos.isListener = g_weDistributeQueries ? false : true;
4001 infos.isWorker = true;
4002 infos.thread = std::thread(recursorThread, currentThreadId++, "worker");
4003
4004 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
4005 }
4006
4007 #ifdef HAVE_SYSTEMD
4008 sd_notify(0, "READY=1");
4009 #endif
4010
4011 /* This thread handles the web server, carbon, statistics and the control channel */
4012 auto& infos = s_threadInfos.at(0);
4013 infos.isHandler = true;
4014 infos.thread = std::thread(recursorThread, 0, "web+stat");
4015
4016 s_threadInfos.at(0).thread.join();
4017 }
4018 return 0;
4019 }
4020
4021 static void* recursorThread(unsigned int n, const string& threadName)
4022 try
4023 {
4024 t_id=n;
4025 auto& threadInfo = s_threadInfos.at(t_id);
4026
4027 static string threadPrefix = "pdns-r/";
4028 setThreadName(threadPrefix + threadName);
4029
4030 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
4031 SyncRes::setDomainMap(g_initialDomainMap);
4032 t_allowFrom = g_initialAllowFrom;
4033 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
4034 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
4035 primeHints();
4036
4037 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
4038
4039 g_log<<Logger::Warning<<"Done priming cache with root hints"<<endl;
4040
4041 #ifdef NOD_ENABLED
4042 if (threadInfo.isWorker)
4043 setupNODThread();
4044 #endif /* NOD_ENABLED */
4045
4046 /* the listener threads handle TCP queries */
4047 if(threadInfo.isWorker || threadInfo.isListener) {
4048 try {
4049 if(!::arg()["lua-dns-script"].empty()) {
4050 t_pdl = std::make_shared<RecursorLua4>();
4051 t_pdl->loadFile(::arg()["lua-dns-script"]);
4052 g_log<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
4053 }
4054 }
4055 catch(std::exception &e) {
4056 g_log<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
4057 _exit(99);
4058 }
4059 }
4060
4061 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
4062 if(ringsize) {
4063 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4064 if(g_weDistributeQueries)
4065 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries") / g_numDistributorThreads);
4066 else
4067 t_remotes->set_capacity(ringsize);
4068 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4069 t_servfailremotes->set_capacity(ringsize);
4070 t_bogusremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4071 t_bogusremotes->set_capacity(ringsize);
4072 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4073 t_largeanswerremotes->set_capacity(ringsize);
4074 t_timeouts = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4075 t_timeouts->set_capacity(ringsize);
4076
4077 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4078 t_queryring->set_capacity(ringsize);
4079 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4080 t_servfailqueryring->set_capacity(ringsize);
4081 t_bogusqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4082 t_bogusqueryring->set_capacity(ringsize);
4083 }
4084
4085 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
4086 threadInfo.mt = MT.get();
4087
4088 #ifdef HAVE_PROTOBUF
4089 /* start protobuf export threads if needed */
4090 auto luaconfsLocal = g_luaconfs.getLocal();
4091 checkProtobufExport(luaconfsLocal);
4092 checkOutgoingProtobufExport(luaconfsLocal);
4093 #endif /* HAVE_PROTOBUF */
4094
4095 PacketID pident;
4096
4097 t_fdm=getMultiplexer();
4098
4099 if(threadInfo.isHandler) {
4100 if(::arg().mustDo("webserver")) {
4101 g_log<<Logger::Warning << "Enabling web server" << endl;
4102 try {
4103 new RecursorWebServer(t_fdm);
4104 }
4105 catch(PDNSException &e) {
4106 g_log<<Logger::Error<<"Exception: "<<e.reason<<endl;
4107 exit(99);
4108 }
4109 }
4110 g_log<<Logger::Info<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
4111 }
4112 else {
4113
4114 t_fdm->addReadFD(threadInfo.pipes.readToThread, handlePipeRequest);
4115 t_fdm->addReadFD(threadInfo.pipes.readQueriesToThread, handlePipeRequest);
4116
4117 if (threadInfo.isListener) {
4118 if (g_reusePort) {
4119 /* then every listener has its own FDs */
4120 for(const auto deferred : threadInfo.deferredAdds) {
4121 t_fdm->addReadFD(deferred.first, deferred.second);
4122 }
4123 }
4124 else {
4125 /* otherwise all listeners are listening on the same ones */
4126 for(const auto deferred : g_deferredAdds) {
4127 t_fdm->addReadFD(deferred.first, deferred.second);
4128 }
4129 }
4130 }
4131 }
4132
4133 registerAllStats();
4134
4135 if(threadInfo.isHandler) {
4136 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
4137 }
4138
4139 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
4140
4141 bool listenOnTCP(true);
4142
4143 time_t last_stat = 0;
4144 time_t last_carbon=0, last_lua_maintenance=0;
4145 time_t carbonInterval=::arg().asNum("carbon-interval");
4146 time_t luaMaintenanceInterval=::arg().asNum("lua-maintenance-interval");
4147 counter.store(0); // used to periodically execute certain tasks
4148 for(;;) {
4149 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
4150
4151 if(!(counter%500)) {
4152 MT->makeThread(houseKeeping, 0);
4153 }
4154
4155 if(!(counter%55)) {
4156 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
4157 expired_t expired=t_fdm->getTimeouts(g_now);
4158
4159 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
4160 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4161 if(g_logCommonErrors)
4162 g_log<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4163 t_fdm->removeReadFD(i->first);
4164 }
4165 }
4166
4167 counter++;
4168
4169 if(threadInfo.isHandler) {
4170 if(statsWanted || (g_statisticsInterval > 0 && (g_now.tv_sec - last_stat) >= g_statisticsInterval)) {
4171 doStats();
4172 last_stat = g_now.tv_sec;
4173 }
4174
4175 Utility::gettimeofday(&g_now, 0);
4176
4177 if((g_now.tv_sec - last_carbon) >= carbonInterval) {
4178 MT->makeThread(doCarbonDump, 0);
4179 last_carbon = g_now.tv_sec;
4180 }
4181 }
4182 if (t_pdl != nullptr) {
4183 // lua-dns-script directive is present, call the maintenance callback if needed
4184 /* remember that the listener threads handle TCP queries */
4185 if (threadInfo.isWorker || threadInfo.isListener) {
4186 // Only on threads processing queries
4187 if(g_now.tv_sec - last_lua_maintenance >= luaMaintenanceInterval) {
4188 t_pdl->maintenance();
4189 last_lua_maintenance = g_now.tv_sec;
4190 }
4191 }
4192 }
4193
4194 t_fdm->run(&g_now);
4195 // 'run' updates g_now for us
4196
4197 if(threadInfo.isListener) {
4198 if(listenOnTCP) {
4199 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
4200 for(const auto fd : threadInfo.tcpSockets) {
4201 t_fdm->removeReadFD(fd);
4202 }
4203 listenOnTCP=false;
4204 }
4205 }
4206 else {
4207 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
4208 for(const auto fd : threadInfo.tcpSockets) {
4209 t_fdm->addReadFD(fd, handleNewTCPQuestion);
4210 }
4211 listenOnTCP=true;
4212 }
4213 }
4214 }
4215 }
4216 }
4217 catch(PDNSException &ae) {
4218 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
4219 return 0;
4220 }
4221 catch(std::exception &e) {
4222 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
4223 return 0;
4224 }
4225 catch(...) {
4226 g_log<<Logger::Error<<"any other exception in main: "<<endl;
4227 return 0;
4228 }
4229
4230
4231 int main(int argc, char **argv)
4232 {
4233 g_argc = argc;
4234 g_argv = argv;
4235 g_stats.startupTime=time(0);
4236 Utility::srandom();
4237 versionSetProduct(ProductRecursor);
4238 reportBasicTypes();
4239 reportOtherTypes();
4240
4241 int ret = EXIT_SUCCESS;
4242
4243 try {
4244 ::arg().set("stack-size","stack size per mthread")="200000";
4245 ::arg().set("soa-minimum-ttl","Don't change")="0";
4246 ::arg().set("no-shuffle","Don't change")="off";
4247 ::arg().set("local-port","port to listen on")="53";
4248 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
4249 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
4250 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
4251 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
4252 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
4253 ::arg().set("signature-inception-skew", "Allow the signature inception to be off by this number of seconds")="60";
4254 ::arg().set("daemon","Operate as a daemon")="no";
4255 ::arg().setSwitch("write-pid","Write a PID file")="yes";
4256 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
4257 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
4258 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
4259 ::arg().set("log-common-errors","If we should log rather common errors")="no";
4260 ::arg().set("chroot","switch to chroot jail")="";
4261 ::arg().set("setgid","If set, change group id to this gid for more security")="";
4262 ::arg().set("setuid","If set, change user id to this uid for more security")="";
4263 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
4264 ::arg().set("threads", "Launch this number of threads")="2";
4265 ::arg().set("distributor-threads", "Launch this number of distributor threads, distributing queries to other threads")="0";
4266 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
4267 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
4268 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
4269 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
4270 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
4271 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
4272 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
4273 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
4274 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
4275 ::arg().set("webserver-loglevel", "Amount of logging in the webserver (none, normal, detailed)") = "normal";
4276 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
4277 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
4278 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
4279 ::arg().set("carbon-namespace", "If set overwrites the first part of the carbon string")="pdns";
4280 ::arg().set("carbon-instance", "If set overwrites the the instance name default")="recursor";
4281
4282 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
4283 ::arg().set("quiet","Suppress logging of questions and answers")="";
4284 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
4285 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
4286 ::arg().set("socket-owner","Owner of socket")="";
4287 ::arg().set("socket-group","Group of socket")="";
4288 ::arg().set("socket-mode", "Permissions for socket")="";
4289
4290 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
4291 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
4292 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
4293 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
4294 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
4295 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
4296 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
4297 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
4298 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
4299 ::arg().set("hint-file", "If set, load root hints from this file")="";
4300 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
4301 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
4302 ::arg().set("max-cache-bogus-ttl", "maximum number of seconds to keep a Bogus (positive or negative) cached entry in memory")="3600";
4303 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
4304 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
4305 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
4306 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
4307 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname, set custom or 'disabled'")="";
4308 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
4309 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
4310 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
4311 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
4312 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
4313 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4314 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
4315 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
4316 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4317 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
4318 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
4319 ::arg().set("lua-config-file", "More powerful configuration options")="";
4320
4321 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
4322 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
4323 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
4324 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
4325 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
4326 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
4327 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
4328 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
4329 ::arg().set("lua-maintenance-interval", "Number of seconds between calls to the lua user defined maintenance() function")="1";
4330 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
4331 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
4332 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
4333 ::arg().set("ecs-ipv4-cache-bits", "Maximum number of bits of IPv4 mask to cache ECS response")="24";
4334 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
4335 ::arg().set("ecs-ipv6-cache-bits", "Maximum number of bits of IPv6 mask to cache ECS response")="56";
4336 ::arg().set("ecs-minimum-ttl-override", "Set under adverse conditions, a minimum TTL for records in ECS-specific answers")="0";
4337 ::arg().set("ecs-cache-limit-ttl", "Minimum TTL to cache ECS response")="0";
4338 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
4339 ::arg().set("ecs-add-for", "List of client netmasks for which EDNS Client Subnet will be added")="0.0.0.0/0, ::/0, " LOCAL_NETS_INVERSE;
4340 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
4341 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
4342 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
4343 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
4344 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
4345 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
4346 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
4347 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1232";
4348 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1232";
4349 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
4350 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
4351 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
4352 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
4353 ::arg().set("max-udp-queries-per-round", "Maximum number of UDP queries processed per recvmsg() round, before returning back to normal processing")="10000";
4354
4355 ::arg().set("include-dir","Include *.conf files from this directory")="";
4356 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
4357
4358 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
4359
4360 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
4361 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
4362
4363 std::string defaultBlacklistedStats = "cache-bytes, packetcache-bytes, special-memory-usage";
4364 for (size_t idx = 0; idx < 32; idx++) {
4365 defaultBlacklistedStats += ", ecs-v4-response-bits-" + std::to_string(idx + 1);
4366 }
4367 for (size_t idx = 0; idx < 128; idx++) {
4368 defaultBlacklistedStats += ", ecs-v6-response-bits-" + std::to_string(idx + 1);
4369 }
4370 ::arg().set("stats-api-blacklist", "List of statistics that are disabled when retrieving the complete list of statistics via the API")=defaultBlacklistedStats;
4371 ::arg().set("stats-carbon-blacklist", "List of statistics that are prevented from being exported via Carbon")=defaultBlacklistedStats;
4372 ::arg().set("stats-rec-control-blacklist", "List of statistics that are prevented from being exported via rec_control get-all")=defaultBlacklistedStats;
4373 ::arg().set("stats-snmp-blacklist", "List of statistics that are prevented from being exported via SNMP")=defaultBlacklistedStats;
4374
4375 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
4376 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
4377
4378 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
4379
4380 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
4381
4382 ::arg().set("xpf-allow-from","XPF information is only processed from these subnets")="";
4383 ::arg().set("xpf-rr-code","XPF option code to use")="0";
4384
4385 ::arg().set("udp-source-port-min", "Minimum UDP port to bind on")="1024";
4386 ::arg().set("udp-source-port-max", "Maximum UDP port to bind on")="65535";
4387 ::arg().set("udp-source-port-avoid", "List of comma separated UDP port number to avoid")="11211";
4388 ::arg().set("rng", "Specify random number generator to use. Valid values are auto,sodium,openssl,getrandom,arc4random,urandom.")="auto";
4389 ::arg().set("public-suffix-list-file", "Path to the Public Suffix List file, if any")="";
4390 ::arg().set("distribution-load-factor", "The load factor used when PowerDNS is distributing queries to worker threads")="0.0";
4391 #ifdef NOD_ENABLED
4392 ::arg().set("new-domain-tracking", "Track newly observed domains (i.e. never seen before).")="no";
4393 ::arg().set("new-domain-log", "Log newly observed domains.")="yes";
4394 ::arg().set("new-domain-lookup", "Perform a DNS lookup newly observed domains as a subdomain of the configured domain")="";
4395 ::arg().set("new-domain-history-dir", "Persist new domain tracking data here to persist between restarts")=string(NODCACHEDIR)+"/nod";
4396 ::arg().set("new-domain-whitelist", "List of domains (and implicitly all subdomains) which will never be considered a new domain")="";
4397 ::arg().set("new-domain-db-size", "Size of the DB used to track new domains in terms of number of cells. Defaults to 67108864")="67108864";
4398 ::arg().set("new-domain-pb-tag", "If protobuf is configured, the tag to use for messages containing newly observed domains. Defaults to 'pdns-nod'")="pdns-nod";
4399 ::arg().set("unique-response-tracking", "Track unique responses (tuple of query name, type and RR).")="no";
4400 ::arg().set("unique-response-log", "Log unique responses")="yes";
4401 ::arg().set("unique-response-history-dir", "Persist unique response tracking data here to persist between restarts")=string(NODCACHEDIR)+"/udr";
4402 ::arg().set("unique-response-db-size", "Size of the DB used to track unique responses in terms of number of cells. Defaults to 67108864")="67108864";
4403 ::arg().set("unique-response-pb-tag", "If protobuf is configured, the tag to use for messages containing unique DNS responses. Defaults to 'pdns-udr'")="pdns-udr";
4404 #endif /* NOD_ENABLED */
4405 ::arg().setCmd("help","Provide a helpful message");
4406 ::arg().setCmd("version","Print version string");
4407 ::arg().setCmd("config","Output blank configuration");
4408 g_log.toConsole(Logger::Info);
4409 ::arg().laxParse(argc,argv); // do a lax parse
4410
4411 string configname=::arg()["config-dir"]+"/recursor.conf";
4412 if(::arg()["config-name"]!="") {
4413 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
4414 s_programname+="-"+::arg()["config-name"];
4415 }
4416 cleanSlashes(configname);
4417
4418 if(!::arg().getCommands().empty()) {
4419 cerr<<"Fatal: non-option on the command line, perhaps a '--setting=123' statement missed the '='?"<<endl;
4420 exit(99);
4421 }
4422
4423 if(::arg().mustDo("config")) {
4424 cout<<::arg().configstring()<<endl;
4425 exit(0);
4426 }
4427
4428 if(!::arg().file(configname.c_str()))
4429 g_log<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
4430
4431 ::arg().parse(argc,argv);
4432
4433 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() ) {
4434 g_log<<Logger::Error<<"Using chroot and enabling the API is not possible"<<endl;
4435 exit(EXIT_FAILURE);
4436 }
4437
4438 if (::arg()["socket-dir"].empty()) {
4439 if (::arg()["chroot"].empty())
4440 ::arg().set("socket-dir") = LOCALSTATEDIR;
4441 else
4442 ::arg().set("socket-dir") = "/";
4443 }
4444
4445 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
4446
4447 if(::arg().asNum("threads")==1) {
4448 if (::arg().mustDo("pdns-distributes-queries")) {
4449 g_log<<Logger::Warning<<"Only one thread, no need to distribute queries ourselves"<<endl;
4450 ::arg().set("pdns-distributes-queries")="no";
4451 }
4452 }
4453
4454 if(::arg().mustDo("pdns-distributes-queries") && ::arg().asNum("distributor-threads") <= 0) {
4455 g_log<<Logger::Warning<<"Asked to run with pdns-distributes-queries set but no distributor threads, raising to 1"<<endl;
4456 ::arg().set("distributor-threads")="1";
4457 }
4458
4459 if (!::arg().mustDo("pdns-distributes-queries")) {
4460 ::arg().set("distributor-threads")="0";
4461 }
4462
4463 if(::arg().mustDo("help")) {
4464 cout<<"syntax:"<<endl<<endl;
4465 cout<<::arg().helpstring(::arg()["help"])<<endl;
4466 exit(0);
4467 }
4468 if(::arg().mustDo("version")) {
4469 showProductVersion();
4470 showBuildConfiguration();
4471 exit(0);
4472 }
4473
4474 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
4475
4476 if (logUrgency < Logger::Error)
4477 logUrgency = Logger::Error;
4478 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
4479 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
4480 }
4481 g_log.setLoglevel(logUrgency);
4482 g_log.toConsole(logUrgency);
4483
4484 serviceMain(argc, argv);
4485 }
4486 catch(PDNSException &ae) {
4487 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
4488 ret=EXIT_FAILURE;
4489 }
4490 catch(std::exception &e) {
4491 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
4492 ret=EXIT_FAILURE;
4493 }
4494 catch(...) {
4495 g_log<<Logger::Error<<"any other exception in main: "<<endl;
4496 ret=EXIT_FAILURE;
4497 }
4498
4499 return ret;
4500 }