]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/pdns_recursor.cc
rec: Add a 'rebalanced-queries' metric
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25
26 #include <netdb.h>
27 #include <sys/stat.h>
28 #include <unistd.h>
29 #ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
30 #include <boost/container/flat_set.hpp>
31 #endif
32 #include "ws-recursor.hh"
33 #include <thread>
34 #include "threadname.hh"
35 #include "recpacketcache.hh"
36 #include "utility.hh"
37 #include "dns_random.hh"
38 #ifdef HAVE_LIBSODIUM
39 #include <sodium.h>
40 #endif
41 #include "opensslsigners.hh"
42 #include <iostream>
43 #include <errno.h>
44 #include <boost/static_assert.hpp>
45 #include <map>
46 #include <set>
47 #include "recursor_cache.hh"
48 #include "cachecleaner.hh"
49 #include <stdio.h>
50 #include <signal.h>
51 #include <stdlib.h>
52 #include "misc.hh"
53 #include "mtasker.hh"
54 #include <utility>
55 #include "arguments.hh"
56 #include "syncres.hh"
57 #include <fcntl.h>
58 #include <fstream>
59 #include "sortlist.hh"
60 #include "sstuff.hh"
61 #include <boost/tuple/tuple.hpp>
62 #include <boost/tuple/tuple_comparison.hpp>
63 #include <boost/shared_array.hpp>
64 #include <boost/function.hpp>
65 #include <boost/algorithm/string.hpp>
66 #ifdef MALLOC_TRACE
67 #include "malloctrace.hh"
68 #endif
69 #include <netinet/tcp.h>
70 #include "capabilities.hh"
71 #include "dnsparser.hh"
72 #include "dnswriter.hh"
73 #include "dnsrecords.hh"
74 #include "zoneparser-tng.hh"
75 #include "rec_channel.hh"
76 #include "logger.hh"
77 #include "iputils.hh"
78 #include "mplexer.hh"
79 #include "config.h"
80 #include "lua-recursor4.hh"
81 #include "version.hh"
82 #include "responsestats.hh"
83 #include "secpoll-recursor.hh"
84 #include "dnsname.hh"
85 #include "filterpo.hh"
86 #include "rpzloader.hh"
87 #include "validate-recursor.hh"
88 #include "rec-lua-conf.hh"
89 #include "ednsoptions.hh"
90 #include "gettime.hh"
91 #include "pubsuffix.hh"
92 #ifdef NOD_ENABLED
93 #include "nod.hh"
94 #endif /* NOD_ENABLED */
95
96 #include "rec-protobuf.hh"
97 #include "rec-snmp.hh"
98
99 #ifdef HAVE_SYSTEMD
100 #include <systemd/sd-daemon.h>
101 #endif
102
103 #include "namespaces.hh"
104
105 #ifdef HAVE_PROTOBUF
106 #include "uuid-utils.hh"
107 #endif
108
109 #include "xpf.hh"
110
111 typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
112
113 static thread_local std::shared_ptr<RecursorLua4> t_pdl;
114 static thread_local unsigned int t_id = 0;
115 static thread_local std::shared_ptr<Regex> t_traceRegex;
116 static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
117 #ifdef HAVE_PROTOBUF
118 static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_protobufServers{nullptr};
119 static thread_local uint64_t t_protobufServersGeneration;
120 static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_outgoingProtobufServers{nullptr};
121 static thread_local uint64_t t_outgoingProtobufServersGeneration;
122 #endif /* HAVE_PROTOBUF */
123
124 thread_local std::unique_ptr<MT_t> MT; // the big MTasker
125 thread_local std::unique_ptr<MemRecursorCache> t_RC;
126 thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
127 thread_local FDMultiplexer* t_fdm{nullptr};
128 thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes, t_bogusremotes;
129 thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring, t_bogusqueryring;
130 thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
131 #ifdef NOD_ENABLED
132 thread_local std::shared_ptr<nod::NODDB> t_nodDBp;
133 thread_local std::shared_ptr<nod::UniqueResponseDB> t_udrDBp;
134 #endif /* NOD_ENABLED */
135 __thread struct timeval g_now; // timestamp, updated (too) frequently
136
137 typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
138
139 // for communicating with our threads
140 // effectively readonly after startup
141 struct RecThreadInfo
142 {
143 struct ThreadPipeSet
144 {
145 int writeToThread{-1};
146 int readToThread{-1};
147 int writeFromThread{-1};
148 int readFromThread{-1};
149 int writeQueriesToThread{-1}; // this one is non-blocking
150 int readQueriesToThread{-1};
151 };
152
153 /* FD corresponding to TCP sockets this thread is listening
154 on.
155 These FDs are also in deferredAdds when we have one
156 socket per listener, and in g_deferredAdds instead. */
157 std::set<int> tcpSockets;
158 /* FD corresponding to listening sockets if we have one socket per
159 listener (with reuseport), otherwise all listeners share the
160 same FD and g_deferredAdds is then used instead */
161 deferredAdd_t deferredAdds;
162 struct ThreadPipeSet pipes;
163 std::thread thread;
164 MT_t* mt{nullptr};
165 uint64_t numberOfDistributedQueries{0};
166 /* handle the web server, carbon, statistics and the control channel */
167 bool isHandler{false};
168 /* accept incoming queries (and distributes them to the workers if pdns-distributes-queries is set) */
169 bool isListener{false};
170 /* process queries */
171 bool isWorker{false};
172 };
173
174 /* first we have the handler thread, t_id == 0 (some other
175 helper threads like SNMP might have t_id == 0 as well)
176 then the distributor threads if any
177 and finally the workers */
178 static std::vector<RecThreadInfo> s_threadInfos;
179 /* without reuseport, all listeners share the same sockets */
180 static deferredAdd_t g_deferredAdds;
181
182 typedef vector<int> tcpListenSockets_t;
183 typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
184
185 static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
186 static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
187 static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
188 static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
189 static AtomicCounter counter;
190 static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
191 static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
192 static NetmaskGroup g_XPFAcl;
193 static size_t g_tcpMaxQueriesPerConn;
194 static size_t s_maxUDPQueriesPerRound;
195 static uint64_t g_latencyStatSize;
196 static uint32_t g_disthashseed;
197 static unsigned int g_maxTCPPerClient;
198 static unsigned int g_maxMThreads;
199 static unsigned int g_numDistributorThreads;
200 static unsigned int g_numWorkerThreads;
201 static int g_tcpTimeout;
202 static uint16_t g_udpTruncationThreshold;
203 static uint16_t g_xpfRRCode{0};
204 static std::atomic<bool> statsWanted;
205 static std::atomic<bool> g_quiet;
206 static bool g_logCommonErrors;
207 static bool g_anyToTcp;
208 static bool g_weDistributeQueries; // if true, 1 or more threads listen on the incoming query sockets and distribute them to workers
209 static bool g_reusePort{false};
210 static bool g_gettagNeedsEDNSOptions{false};
211 static time_t g_statisticsInterval;
212 static bool g_useIncomingECS;
213 std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
214 #ifdef NOD_ENABLED
215 static bool g_nodEnabled;
216 static DNSName g_nodLookupDomain;
217 static bool g_nodLog;
218 static SuffixMatchNode g_nodDomainWL;
219 static std::string g_nod_pbtag;
220 static bool g_udrEnabled;
221 static bool g_udrLog;
222 static std::string g_udr_pbtag;
223 #endif /* NOD_ENABLED */
224 #ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
225 static boost::container::flat_set<uint16_t> s_avoidUdpSourcePorts;
226 #else
227 static std::set<uint16_t> s_avoidUdpSourcePorts;
228 #endif
229 static uint16_t s_minUdpSourcePort;
230 static uint16_t s_maxUdpSourcePort;
231 static double s_balancingFactor;
232
233 RecursorControlChannel s_rcc; // only active in the handler thread
234 RecursorStats g_stats;
235 string s_programname="pdns_recursor";
236 string s_pidfname;
237 bool g_lowercaseOutgoing;
238 unsigned int g_networkTimeoutMsec;
239 unsigned int g_numThreads;
240 uint16_t g_outgoingEDNSBufsize;
241 bool g_logRPZChanges{false};
242
243 #define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
244 #define LOCAL_NETS_INVERSE "!127.0.0.0/8, !10.0.0.0/8, !100.64.0.0/10, !169.254.0.0/16, !192.168.0.0/16, !172.16.0.0/12, !::1/128, !fc00::/7, !fe80::/10"
245 // Bad Nets taken from both:
246 // http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
247 // and
248 // http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
249 // where such a network may not be considered a valid destination
250 #define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
251 #define DONT_QUERY LOCAL_NETS ", " BAD_NETS
252
253 //! used to send information to a newborn mthread
254 struct DNSComboWriter {
255 DNSComboWriter(const std::string& query, const struct timeval& now): d_mdp(true, query), d_now(now), d_query(query)
256 {
257 }
258
259 DNSComboWriter(const std::string& query, const struct timeval& now, std::vector<std::string>&& policyTags, LuaContext::LuaObject&& data): d_mdp(true, query), d_now(now), d_query(query), d_policyTags(std::move(policyTags)), d_data(std::move(data))
260 {
261 }
262
263 void setRemote(const ComboAddress& sa)
264 {
265 d_remote=sa;
266 }
267
268 void setSource(const ComboAddress& sa)
269 {
270 d_source=sa;
271 }
272
273 void setLocal(const ComboAddress& sa)
274 {
275 d_local=sa;
276 }
277
278 void setDestination(const ComboAddress& sa)
279 {
280 d_destination=sa;
281 }
282
283 void setSocket(int sock)
284 {
285 d_socket=sock;
286 }
287
288 string getRemote() const
289 {
290 if (d_source == d_remote) {
291 return d_source.toStringWithPort();
292 }
293 return d_source.toStringWithPort() + " (proxied by " + d_remote.toStringWithPort() + ")";
294 }
295
296 MOADNSParser d_mdp;
297 struct timeval d_now;
298 /* Remote client, might differ from d_source
299 in case of XPF, in which case d_source holds
300 the IP of the client and d_remote of the proxy
301 */
302 ComboAddress d_remote;
303 ComboAddress d_source;
304 /* Destination address, might differ from
305 d_destination in case of XPF, in which case
306 d_destination holds the IP of the proxy and
307 d_local holds our own. */
308 ComboAddress d_local;
309 ComboAddress d_destination;
310 #ifdef HAVE_PROTOBUF
311 boost::uuids::uuid d_uuid;
312 string d_requestorId;
313 string d_deviceId;
314 #endif
315 std::string d_query;
316 std::vector<std::string> d_policyTags;
317 LuaContext::LuaObject d_data;
318 EDNSSubnetOpts d_ednssubnet;
319 shared_ptr<TCPConnection> d_tcpConnection;
320 int d_socket;
321 unsigned int d_tag{0};
322 uint32_t d_qhash{0};
323 uint32_t d_ttlCap{std::numeric_limits<uint32_t>::max()};
324 uint16_t d_ecsBegin{0};
325 uint16_t d_ecsEnd{0};
326 bool d_variable{false};
327 bool d_ecsFound{false};
328 bool d_ecsParsed{false};
329 bool d_tcp;
330 };
331
332 MT_t* getMT()
333 {
334 return MT ? MT.get() : nullptr;
335 }
336
337 ArgvMap &arg()
338 {
339 static ArgvMap theArg;
340 return theArg;
341 }
342
343 unsigned int getRecursorThreadId()
344 {
345 return t_id;
346 }
347
348 int getMTaskerTID()
349 {
350 return MT->getTid();
351 }
352
353 static bool isDistributorThread()
354 {
355 if (t_id == 0) {
356 return false;
357 }
358
359 return g_weDistributeQueries && s_threadInfos.at(t_id).isListener;
360 }
361
362 static bool isHandlerThread()
363 {
364 if (t_id == 0) {
365 return true;
366 }
367
368 return s_threadInfos.at(t_id).isHandler;
369 }
370
371 static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
372
373 // -1 is error, 0 is timeout, 1 is success
374 int asendtcp(const string& data, Socket* sock)
375 {
376 PacketID pident;
377 pident.sock=sock;
378 pident.outMSG=data;
379
380 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
381 string packet;
382
383 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
384
385 if(!ret || ret==-1) { // timeout
386 t_fdm->removeWriteFD(sock->getHandle());
387 }
388 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
389 return -1;
390 }
391 return ret;
392 }
393
394 static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
395
396 // -1 is error, 0 is timeout, 1 is success
397 int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
398 {
399 data.clear();
400 PacketID pident;
401 pident.sock=sock;
402 pident.inNeeded=len;
403 pident.inIncompleteOkay=incompleteOkay;
404 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
405
406 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
407 if(!ret || ret==-1) { // timeout
408 t_fdm->removeReadFD(sock->getHandle());
409 }
410 else if(data.empty()) {// error, EOF or other
411 return -1;
412 }
413
414 return ret;
415 }
416
417 static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
418 {
419 PacketID pident=*any_cast<PacketID>(&var);
420 char resp[512];
421 ComboAddress fromaddr;
422 socklen_t addrlen=sizeof(fromaddr);
423
424 ssize_t ret=recvfrom(fd, resp, sizeof(resp), 0, (sockaddr *)&fromaddr, &addrlen);
425 if (fromaddr != pident.remote) {
426 g_log<<Logger::Notice<<"Response received from the wrong remote host ("<<fromaddr.toStringWithPort()<<" instead of "<<pident.remote.toStringWithPort()<<"), discarding"<<endl;
427
428 }
429
430 t_fdm->removeReadFD(fd);
431 if(ret >= 0) {
432 string data(resp, (size_t) ret);
433 MT->sendEvent(pident, &data);
434 }
435 else {
436 string empty;
437 MT->sendEvent(pident, &empty);
438 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
439 }
440 }
441 string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
442 {
443 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
444 s.setNonBlocking();
445 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
446
447 s.bind(local);
448 s.connect(dest);
449 s.send(query);
450
451 PacketID pident;
452 pident.sock=&s;
453 pident.remote=dest;
454 pident.type=0;
455 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
456
457 string data;
458
459 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
460
461 if(!ret || ret==-1) { // timeout
462 t_fdm->removeReadFD(s.getHandle());
463 }
464 else if(data.empty()) {// error, EOF or other
465 // we could special case this
466 return data;
467 }
468 return data;
469 }
470
471 //! pick a random query local address
472 ComboAddress getQueryLocalAddress(int family, uint16_t port)
473 {
474 ComboAddress ret;
475 if(family==AF_INET) {
476 if(g_localQueryAddresses4.empty())
477 ret = g_local4;
478 else
479 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
480 ret.sin4.sin_port = htons(port);
481 }
482 else {
483 if(g_localQueryAddresses6.empty())
484 ret = g_local6;
485 else
486 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
487
488 ret.sin6.sin6_port = htons(port);
489 }
490 return ret;
491 }
492
493 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
494
495 static void setSocketBuffer(int fd, int optname, uint32_t size)
496 {
497 uint32_t psize=0;
498 socklen_t len=sizeof(psize);
499
500 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
501 g_log<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
502 return;
503 }
504
505 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
506 g_log<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
507 }
508
509
510 static void setSocketReceiveBuffer(int fd, uint32_t size)
511 {
512 setSocketBuffer(fd, SO_RCVBUF, size);
513 }
514
515 static void setSocketSendBuffer(int fd, uint32_t size)
516 {
517 setSocketBuffer(fd, SO_SNDBUF, size);
518 }
519
520
521 // you can ask this class for a UDP socket to send a query from
522 // this socket is not yours, don't even think about deleting it
523 // but after you call 'returnSocket' on it, don't assume anything anymore
524 class UDPClientSocks
525 {
526 unsigned int d_numsocks;
527 public:
528 UDPClientSocks() : d_numsocks(0)
529 {
530 }
531
532 typedef set<int> socks_t;
533 socks_t d_socks;
534
535 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
536 int getSocket(const ComboAddress& toaddr, int* fd)
537 {
538 *fd=makeClientSocket(toaddr.sin4.sin_family);
539 if(*fd < 0) // temporary error - receive exception otherwise
540 return -2;
541
542 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
543 int err = errno;
544 // returnSocket(*fd);
545 try {
546 closesocket(*fd);
547 }
548 catch(const PDNSException& e) {
549 g_log<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
550 }
551
552 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
553 return -2;
554 return -1;
555 }
556
557 d_socks.insert(*fd);
558 d_numsocks++;
559 return 0;
560 }
561
562 void returnSocket(int fd)
563 {
564 socks_t::iterator i=d_socks.find(fd);
565 if(i==d_socks.end()) {
566 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd)+") not in the pool");
567 }
568 returnSocketLocked(i);
569 }
570
571 // return a socket to the pool, or simply erase it
572 void returnSocketLocked(socks_t::iterator& i)
573 {
574 if(i==d_socks.end()) {
575 throw PDNSException("Trying to return a socket not in the pool");
576 }
577 try {
578 t_fdm->removeReadFD(*i);
579 }
580 catch(FDMultiplexerException& e) {
581 // we sometimes return a socket that has not yet been assigned to t_fdm
582 }
583 try {
584 closesocket(*i);
585 }
586 catch(const PDNSException& e) {
587 g_log<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
588 }
589
590 d_socks.erase(i++);
591 --d_numsocks;
592 }
593
594 // returns -1 for errors which might go away, throws for ones that won't
595 static int makeClientSocket(int family)
596 {
597 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
598
599 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
600 return ret;
601
602 if(ret<0)
603 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
604
605 // setCloseOnExec(ret); // we're not going to exec
606
607 int tries=10;
608 ComboAddress sin;
609 while(--tries) {
610 uint16_t port;
611
612 if(tries==1) // fall back to kernel 'random'
613 port = 0;
614 else {
615 do {
616 port = s_minUdpSourcePort + dns_random(s_maxUdpSourcePort - s_minUdpSourcePort + 1);
617 }
618 while (s_avoidUdpSourcePorts.count(port));
619 }
620
621 sin=getQueryLocalAddress(family, port); // does htons for us
622
623 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
624 break;
625 }
626 if(!tries)
627 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
628
629 setReceiveSocketErrors(ret, family);
630 setNonBlocking(ret);
631 return ret;
632 }
633 };
634
635 static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
636
637 /* these two functions are used by LWRes */
638 // -2 is OS error, -1 is error that depends on the remote, > 0 is success
639 int asendto(const char *data, size_t len, int flags,
640 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
641 {
642
643 PacketID pident;
644 pident.domain = domain;
645 pident.remote = toaddr;
646 pident.type = qtype;
647
648 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
649 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
650
651 for(; chain.first != chain.second; chain.first++) {
652 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
653 /*
654 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
655 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
656 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
657 */
658 chain.first->key.chain.insert(id); // we can chain
659 *fd=-1; // gets used in waitEvent / sendEvent later on
660 return 1;
661 }
662 }
663
664 int ret=t_udpclientsocks->getSocket(toaddr, fd);
665 if(ret < 0)
666 return ret;
667
668 pident.fd=*fd;
669 pident.id=id;
670
671 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
672 ret = send(*fd, data, len, 0);
673
674 int tmp = errno;
675
676 if(ret < 0)
677 t_udpclientsocks->returnSocket(*fd);
678
679 errno = tmp; // this is for logging purposes only
680 return ret;
681 }
682
683 // -1 is error, 0 is timeout, 1 is success
684 int arecvfrom(std::string& packet, int flags, const ComboAddress& fromaddr, size_t *d_len,
685 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
686 {
687 static optional<unsigned int> nearMissLimit;
688 if(!nearMissLimit)
689 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
690
691 PacketID pident;
692 pident.fd=fd;
693 pident.id=id;
694 pident.domain=domain;
695 pident.type = qtype;
696 pident.remote=fromaddr;
697
698 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
699
700 if(ret > 0) {
701 if(packet.empty()) // means "error"
702 return -1;
703
704 *d_len=packet.size();
705
706 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
707 g_log<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
708 g_stats.spoofCount++;
709 return -1;
710 }
711 }
712 else {
713 if(fd >= 0)
714 t_udpclientsocks->returnSocket(fd);
715 }
716 return ret;
717 }
718
719 static void writePid(void)
720 {
721 if(!::arg().mustDo("write-pid"))
722 return;
723 ofstream of(s_pidfname.c_str(), std::ios_base::app);
724 if(of)
725 of<< Utility::getpid() <<endl;
726 else
727 g_log<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
728 }
729
730 TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : data(2, 0), d_remote(addr), d_fd(fd)
731 {
732 ++s_currentConnections;
733 (*t_tcpClientCounts)[d_remote]++;
734 }
735
736 TCPConnection::~TCPConnection()
737 {
738 try {
739 if(closesocket(d_fd) < 0)
740 g_log<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
741 }
742 catch(const PDNSException& e) {
743 g_log<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
744 }
745
746 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
747 t_tcpClientCounts->erase(d_remote);
748 --s_currentConnections;
749 }
750
751 AtomicCounter TCPConnection::s_currentConnections;
752
753 static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
754
755 // the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
756 static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
757 {
758 if(packetsize > 1000 && t_largeanswerremotes)
759 t_largeanswerremotes->push_back(remote);
760 switch(res) {
761 case RCode::ServFail:
762 if(t_servfailremotes) {
763 t_servfailremotes->push_back(remote);
764 if(query && t_servfailqueryring) // packet cache
765 t_servfailqueryring->push_back(make_pair(*query, qtype));
766 }
767 g_stats.servFails++;
768 break;
769 case RCode::NXDomain:
770 g_stats.nxDomains++;
771 break;
772 case RCode::NoError:
773 g_stats.noErrors++;
774 break;
775 }
776 }
777
778 static string makeLoginfo(const std::unique_ptr<DNSComboWriter>& dc)
779 try
780 {
781 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->getRemote())+")";
782 }
783 catch(...)
784 {
785 return "Exception making error message for exception";
786 }
787
788 #ifdef HAVE_PROTOBUF
789 static void protobufLogQuery(uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId)
790 {
791 if (!t_protobufServers) {
792 return;
793 }
794
795 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
796 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
797 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
798 message.setServerIdentity(SyncRes::s_serverID);
799 message.setEDNSSubnet(ednssubnet, ednssubnet.isIpv4() ? maskV4 : maskV6);
800 message.setRequestorId(requestorId);
801 message.setDeviceId(deviceId);
802
803 if (!policyTags.empty()) {
804 message.setPolicyTags(policyTags);
805 }
806
807 // cerr <<message.toDebugString()<<endl;
808 std::string str;
809 message.serialize(str);
810
811 for (auto& server : *t_protobufServers) {
812 server->queueData(str);
813 }
814 }
815
816 static void protobufLogResponse(const RecProtoBufMessage& message)
817 {
818 if (!t_protobufServers) {
819 return;
820 }
821
822 // cerr <<message.toDebugString()<<endl;
823 std::string str;
824 message.serialize(str);
825
826 for (auto& server : *t_protobufServers) {
827 server->queueData(str);
828 }
829 }
830 #endif
831
832 /**
833 * Chases the CNAME provided by the PolicyCustom RPZ policy.
834 *
835 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
836 * @param qtype: The QType of the original query
837 * @param sr: A SyncRes
838 * @param res: An integer that will contain the RCODE of the lookup we do
839 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
840 */
841 static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
842 {
843 if (spoofed.d_type == QType::CNAME) {
844 bool oldWantsRPZ = sr.getWantsRPZ();
845 sr.setWantsRPZ(false);
846 vector<DNSRecord> ans;
847 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, QClass::IN, ans);
848 for (const auto& rec : ans) {
849 if(rec.d_place == DNSResourceRecord::ANSWER) {
850 ret.push_back(rec);
851 }
852 }
853 // Reset the RPZ state of the SyncRes
854 sr.setWantsRPZ(oldWantsRPZ);
855 }
856 }
857
858 static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, uint32_t ttlCap, const uint16_t maxAnswerSize)
859 {
860 pw.startRecord(rec.d_name, rec.d_type, (rec.d_ttl > ttlCap ? ttlCap : rec.d_ttl), rec.d_class, rec.d_place);
861
862 if(rec.d_type != QType::OPT) // their TTL ain't real
863 minTTL = min(minTTL, rec.d_ttl);
864
865 rec.d_content->toPacket(pw);
866 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
867 pw.rollback();
868 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
869 pw.getHeader()->tc=1;
870 pw.truncate();
871 }
872 return false;
873 }
874
875 return true;
876 }
877
878 #ifdef HAVE_PROTOBUF
879 static std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> startProtobufServers(const ProtobufExportConfig& config)
880 {
881 auto result = std::make_shared<std::vector<std::unique_ptr<RemoteLogger>>>();
882
883 for (const auto& server : config.servers) {
884 try {
885 result->emplace_back(new RemoteLogger(server, config.timeout, 100*config.maxQueuedEntries, config.reconnectWaitTime, config.asyncConnect));
886 }
887 catch(const std::exception& e) {
888 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.what()<<endl;
889 }
890 catch(const PDNSException& e) {
891 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.reason<<endl;
892 }
893 }
894
895 return result;
896 }
897
898 static bool checkProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
899 {
900 if (!luaconfsLocal->protobufExportConfig.enabled) {
901 if (t_protobufServers) {
902 for (auto& server : *t_protobufServers) {
903 server->stop();
904 }
905 t_protobufServers.reset();
906 }
907
908 return false;
909 }
910
911 /* if the server was not running, or if it was running according to a
912 previous configuration */
913 if (!t_protobufServers ||
914 t_protobufServersGeneration < luaconfsLocal->generation) {
915
916 if (t_protobufServers) {
917 for (auto& server : *t_protobufServers) {
918 server->stop();
919 }
920 }
921 t_protobufServers.reset();
922
923 t_protobufServers = startProtobufServers(luaconfsLocal->protobufExportConfig);
924 t_protobufServersGeneration = luaconfsLocal->generation;
925 }
926
927 return true;
928 }
929
930 static bool checkOutgoingProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
931 {
932 if (!luaconfsLocal->outgoingProtobufExportConfig.enabled) {
933 if (t_outgoingProtobufServers) {
934 for (auto& server : *t_outgoingProtobufServers) {
935 server->stop();
936 }
937 }
938 t_outgoingProtobufServers.reset();
939
940 return false;
941 }
942
943 /* if the server was not running, or if it was running according to a
944 previous configuration */
945 if (!t_outgoingProtobufServers ||
946 t_outgoingProtobufServersGeneration < luaconfsLocal->generation) {
947
948 if (t_outgoingProtobufServers) {
949 for (auto& server : *t_outgoingProtobufServers) {
950 server->stop();
951 }
952 }
953 t_outgoingProtobufServers.reset();
954
955 t_outgoingProtobufServers = startProtobufServers(luaconfsLocal->outgoingProtobufExportConfig);
956 t_outgoingProtobufServersGeneration = luaconfsLocal->generation;
957 }
958
959 return true;
960 }
961 #endif /* HAVE_PROTOBUF */
962
963 #ifdef NOD_ENABLED
964 static bool nodCheckNewDomain(const DNSName& dname)
965 {
966 static const QType qt(QType::A);
967 static const uint16_t qc(QClass::IN);
968 bool ret = false;
969 // First check the (sub)domain isn't whitelisted for NOD purposes
970 if (!g_nodDomainWL.check(dname)) {
971 // Now check the NODDB (note this is probablistic so can have FNs/FPs)
972 if (t_nodDBp && t_nodDBp->isNewDomain(dname)) {
973 if (g_nodLog) {
974 // This should probably log to a dedicated log file
975 g_log<<Logger::Notice<<"Newly observed domain nod="<<dname.toLogString()<<endl;
976 }
977 if (!(g_nodLookupDomain.isRoot())) {
978 // Send a DNS A query to <domain>.g_nodLookupDomain
979 DNSName qname = dname;
980 vector<DNSRecord> dummy;
981 qname += g_nodLookupDomain;
982 directResolve(qname, qt, qc, dummy);
983 }
984 ret = true;
985 }
986 }
987 return ret;
988 }
989
990 static void nodAddDomain(const DNSName& dname)
991 {
992 // Don't bother adding domains on the nod whitelist
993 if (!g_nodDomainWL.check(dname)) {
994 if (t_nodDBp) {
995 // This keeps the nod info up to date
996 t_nodDBp->addDomain(dname);
997 }
998 }
999 }
1000
1001 static bool udrCheckUniqueDNSRecord(const DNSName& dname, uint16_t qtype, const DNSRecord& record)
1002 {
1003 bool ret = false;
1004 if (record.d_place == DNSResourceRecord::ANSWER ||
1005 record.d_place == DNSResourceRecord::ADDITIONAL) {
1006 // Create a string that represent a triplet of (qname, qtype and RR[type, name, content])
1007 std::stringstream ss;
1008 ss << dname.toDNSStringLC() << ":" << qtype << ":" << qtype << ":" << record.d_type << ":" << record.d_name.toDNSStringLC() << ":" << record.d_content->getZoneRepresentation();
1009 if (t_udrDBp && t_udrDBp->isUniqueResponse(ss.str())) {
1010 if (g_udrLog) {
1011 // This should also probably log to a dedicated file.
1012 g_log<<Logger::Notice<<"Unique response observed: qname="<<dname.toLogString()<<" qtype="<<QType(qtype).getName()<< " rrtype=" << QType(record.d_type).getName() << " rrname=" << record.d_name.toLogString() << " rrcontent=" << record.d_content->getZoneRepresentation() << endl;
1013 }
1014 ret = true;
1015 }
1016 }
1017 return ret;
1018 }
1019 #endif /* NOD_ENABLED */
1020
1021 static void startDoResolve(void *p)
1022 {
1023 auto dc=std::unique_ptr<DNSComboWriter>(reinterpret_cast<DNSComboWriter*>(p));
1024 try {
1025 if (t_queryring)
1026 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
1027
1028 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
1029 EDNSOpts edo;
1030 std::vector<pair<uint16_t, string> > ednsOpts;
1031 bool variableAnswer = dc->d_variable;
1032 bool haveEDNS=false;
1033 #ifdef NOD_ENABLED
1034 bool hasUDR = false;
1035 #endif /* NOD_ENABLED */
1036 DNSPacketWriter::optvect_t returnedEdnsOptions; // Here we stuff all the options for the return packet
1037 uint8_t ednsExtRCode = 0;
1038 if(getEDNSOpts(dc->d_mdp, &edo)) {
1039 haveEDNS=true;
1040 if (edo.d_version != 0) {
1041 ednsExtRCode = ERCode::BADVERS;
1042 }
1043
1044 if(!dc->d_tcp) {
1045 /* rfc6891 6.2.3:
1046 "Values lower than 512 MUST be treated as equal to 512."
1047 */
1048 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
1049 }
1050 ednsOpts = edo.d_options;
1051 maxanswersize -= 11; // EDNS header size
1052
1053 for (const auto& o : edo.d_options) {
1054 if (o.first == EDNSOptionCode::ECS && g_useIncomingECS && !dc->d_ecsParsed) {
1055 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
1056 } else if (o.first == EDNSOptionCode::NSID) {
1057 const static string mode_server_id = ::arg()["server-id"];
1058 if(mode_server_id != "disabled" && !mode_server_id.empty() &&
1059 maxanswersize > (2 + 2 + mode_server_id.size())) {
1060 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::NSID, mode_server_id));
1061 variableAnswer = true; // Can't packetcache an answer with NSID
1062 // Option Code and Option Length are both 2
1063 maxanswersize -= 2 + 2 + mode_server_id.size();
1064 }
1065 }
1066 }
1067 }
1068 /* perhaps there was no EDNS or no ECS but by now we looked */
1069 dc->d_ecsParsed = true;
1070 vector<DNSRecord> ret;
1071 vector<uint8_t> packet;
1072
1073 auto luaconfsLocal = g_luaconfs.getLocal();
1074 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
1075 bool wantsRPZ(true);
1076 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
1077 bool logResponse = false;
1078 #ifdef HAVE_PROTOBUF
1079 if (checkProtobufExport(luaconfsLocal)) {
1080 logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
1081 Netmask requestorNM(dc->d_source, dc->d_source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1082 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
1083 pbMessage = RecProtoBufMessage(RecProtoBufMessage::Response, dc->d_uuid, &requestor, &dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass, dc->d_mdp.d_header.id, dc->d_tcp, 0);
1084 pbMessage->setServerIdentity(SyncRes::s_serverID);
1085 pbMessage->setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1086 }
1087 #endif /* HAVE_PROTOBUF */
1088
1089 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
1090
1091 pw.getHeader()->aa=0;
1092 pw.getHeader()->ra=1;
1093 pw.getHeader()->qr=1;
1094 pw.getHeader()->tc=0;
1095 pw.getHeader()->id=dc->d_mdp.d_header.id;
1096 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
1097 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
1098
1099 /* This is the lowest TTL seen in the records of the response,
1100 so we can't cache it for longer than this value.
1101 If we have a TTL cap, this value can't be larger than the
1102 cap no matter what. */
1103 uint32_t minTTL = dc->d_ttlCap;
1104
1105 SyncRes sr(dc->d_now);
1106
1107 bool DNSSECOK=false;
1108 if(t_pdl) {
1109 sr.setLuaEngine(t_pdl);
1110 }
1111 if(g_dnssecmode != DNSSECMode::Off) {
1112 sr.setDoDNSSEC(true);
1113
1114 // Does the requestor want DNSSEC records?
1115 if(edo.d_extFlags & EDNSOpts::DNSSECOK) {
1116 DNSSECOK=true;
1117 g_stats.dnssecQueries++;
1118 }
1119 if (dc->d_mdp.d_header.cd) {
1120 /* Per rfc6840 section 5.9, "When processing a request with
1121 the Checking Disabled (CD) bit set, a resolver SHOULD attempt
1122 to return all response data, even data that has failed DNSSEC
1123 validation. */
1124 ++g_stats.dnssecCheckDisabledQueries;
1125 }
1126 if (dc->d_mdp.d_header.ad) {
1127 /* Per rfc6840 section 5.7, "the AD bit in a query as a signal
1128 indicating that the requester understands and is interested in the
1129 value of the AD bit in the response. This allows a requester to
1130 indicate that it understands the AD bit without also requesting
1131 DNSSEC data via the DO bit. */
1132 ++g_stats.dnssecAuthenticDataQueries;
1133 }
1134 } else {
1135 // Ignore the client-set CD flag
1136 pw.getHeader()->cd=0;
1137 }
1138 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
1139
1140 #ifdef HAVE_PROTOBUF
1141 sr.setInitialRequestId(dc->d_uuid);
1142 sr.setOutgoingProtobufServers(t_outgoingProtobufServers);
1143 #endif
1144
1145 sr.setQuerySource(dc->d_remote, g_useIncomingECS && !dc->d_ednssubnet.source.empty() ? boost::optional<const EDNSSubnetOpts&>(dc->d_ednssubnet) : boost::none);
1146
1147 bool tracedQuery=false; // we could consider letting Lua know about this too
1148 bool shouldNotValidate = false;
1149
1150 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
1151 int res = RCode::NoError;
1152 DNSFilterEngine::Policy appliedPolicy;
1153 std::vector<DNSRecord> spoofed;
1154 RecursorLua4::DNSQuestion dq(dc->d_source, dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ, logResponse);
1155 dq.ednsFlags = &edo.d_extFlags;
1156 dq.ednsOptions = &ednsOpts;
1157 dq.tag = dc->d_tag;
1158 dq.discardedPolicies = &sr.d_discardedPolicies;
1159 dq.policyTags = &dc->d_policyTags;
1160 dq.appliedPolicy = &appliedPolicy;
1161 dq.currentRecords = &ret;
1162 dq.dh = &dc->d_mdp.d_header;
1163 dq.data = dc->d_data;
1164 #ifdef HAVE_PROTOBUF
1165 dq.requestorId = dc->d_requestorId;
1166 dq.deviceId = dc->d_deviceId;
1167 #endif
1168
1169 if(ednsExtRCode != 0) {
1170 goto sendit;
1171 }
1172
1173 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
1174 pw.getHeader()->tc = 1;
1175 res = 0;
1176 variableAnswer = true;
1177 goto sendit;
1178 }
1179
1180 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
1181 sr.setLogMode(SyncRes::Store);
1182 tracedQuery=true;
1183 }
1184
1185
1186 if(!g_quiet || tracedQuery) {
1187 g_log<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
1188 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
1189 if(!dc->d_ednssubnet.source.empty()) {
1190 g_log<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
1191 }
1192 g_log<<endl;
1193 }
1194
1195 sr.setId(MT->getTid());
1196 if(!dc->d_mdp.d_header.rd)
1197 sr.setCacheOnly();
1198
1199 if (t_pdl) {
1200 t_pdl->prerpz(dq, res);
1201 }
1202
1203 // Check if the query has a policy attached to it
1204 if (wantsRPZ) {
1205 appliedPolicy = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_source, sr.d_discardedPolicies);
1206 }
1207
1208 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
1209 if(!t_pdl || !t_pdl->preresolve(dq, res)) {
1210
1211 sr.setWantsRPZ(wantsRPZ);
1212 if(wantsRPZ) {
1213 switch(appliedPolicy.d_kind) {
1214 case DNSFilterEngine::PolicyKind::NoAction:
1215 break;
1216 case DNSFilterEngine::PolicyKind::Drop:
1217 g_stats.policyDrops++;
1218 g_stats.policyResults[appliedPolicy.d_kind]++;
1219 return;
1220 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1221 g_stats.policyResults[appliedPolicy.d_kind]++;
1222 res=RCode::NXDomain;
1223 goto haveAnswer;
1224 case DNSFilterEngine::PolicyKind::NODATA:
1225 g_stats.policyResults[appliedPolicy.d_kind]++;
1226 res=RCode::NoError;
1227 goto haveAnswer;
1228 case DNSFilterEngine::PolicyKind::Custom:
1229 g_stats.policyResults[appliedPolicy.d_kind]++;
1230 res=RCode::NoError;
1231 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1232 for (const auto& dr : spoofed) {
1233 ret.push_back(dr);
1234 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1235 }
1236 goto haveAnswer;
1237 case DNSFilterEngine::PolicyKind::Truncate:
1238 if(!dc->d_tcp) {
1239 g_stats.policyResults[appliedPolicy.d_kind]++;
1240 res=RCode::NoError;
1241 pw.getHeader()->tc=1;
1242 goto haveAnswer;
1243 }
1244 break;
1245 }
1246 }
1247
1248 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
1249 try {
1250 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
1251 shouldNotValidate = sr.wasOutOfBand();
1252 }
1253 catch(ImmediateServFailException &e) {
1254 if(g_logCommonErrors)
1255 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
1256 res = RCode::ServFail;
1257 }
1258
1259 dq.validationState = sr.getValidationState();
1260
1261 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
1262 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
1263 appliedPolicy = sr.d_appliedPolicy;
1264 g_stats.policyResults[appliedPolicy.d_kind]++;
1265 switch(appliedPolicy.d_kind) {
1266 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
1267 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
1268 case DNSFilterEngine::PolicyKind::Drop:
1269 g_stats.policyDrops++;
1270 return;
1271 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1272 ret.clear();
1273 res=RCode::NXDomain;
1274 goto haveAnswer;
1275
1276 case DNSFilterEngine::PolicyKind::NODATA:
1277 ret.clear();
1278 res=RCode::NoError;
1279 goto haveAnswer;
1280
1281 case DNSFilterEngine::PolicyKind::Truncate:
1282 if(!dc->d_tcp) {
1283 ret.clear();
1284 res=RCode::NoError;
1285 pw.getHeader()->tc=1;
1286 goto haveAnswer;
1287 }
1288 break;
1289
1290 case DNSFilterEngine::PolicyKind::Custom:
1291 ret.clear();
1292 res=RCode::NoError;
1293 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1294 for (const auto& dr : spoofed) {
1295 ret.push_back(dr);
1296 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1297 }
1298 goto haveAnswer;
1299 }
1300 }
1301
1302 if (wantsRPZ) {
1303 appliedPolicy = luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies);
1304 }
1305
1306 if(t_pdl) {
1307 if(res == RCode::NoError) {
1308 auto i=ret.cbegin();
1309 for(; i!= ret.cend(); ++i)
1310 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
1311 break;
1312 if(i == ret.cend() && t_pdl->nodata(dq, res))
1313 shouldNotValidate = true;
1314
1315 }
1316 else if(res == RCode::NXDomain && t_pdl->nxdomain(dq, res))
1317 shouldNotValidate = true;
1318
1319 if(t_pdl->postresolve(dq, res))
1320 shouldNotValidate = true;
1321 }
1322
1323 if (wantsRPZ) { //XXX This block is repeated, see above
1324 g_stats.policyResults[appliedPolicy.d_kind]++;
1325 switch(appliedPolicy.d_kind) {
1326 case DNSFilterEngine::PolicyKind::NoAction:
1327 break;
1328 case DNSFilterEngine::PolicyKind::Drop:
1329 g_stats.policyDrops++;
1330 return;
1331 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1332 ret.clear();
1333 res=RCode::NXDomain;
1334 goto haveAnswer;
1335
1336 case DNSFilterEngine::PolicyKind::NODATA:
1337 ret.clear();
1338 res=RCode::NoError;
1339 goto haveAnswer;
1340
1341 case DNSFilterEngine::PolicyKind::Truncate:
1342 if(!dc->d_tcp) {
1343 ret.clear();
1344 res=RCode::NoError;
1345 pw.getHeader()->tc=1;
1346 goto haveAnswer;
1347 }
1348 break;
1349
1350 case DNSFilterEngine::PolicyKind::Custom:
1351 ret.clear();
1352 res=RCode::NoError;
1353 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1354 for (const auto& dr : spoofed) {
1355 ret.push_back(dr);
1356 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1357 }
1358 goto haveAnswer;
1359 }
1360 }
1361 }
1362 haveAnswer:;
1363 if(res == PolicyDecision::DROP) {
1364 g_stats.policyDrops++;
1365 return;
1366 }
1367 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1368 {
1369 string trace(sr.getTrace());
1370 if(!trace.empty()) {
1371 vector<string> lines;
1372 boost::split(lines, trace, boost::is_any_of("\n"));
1373 for(const string& line : lines) {
1374 if(!line.empty())
1375 g_log<<Logger::Warning<< line << endl;
1376 }
1377 }
1378 }
1379
1380 if(res == -1) {
1381 pw.getHeader()->rcode=RCode::ServFail;
1382 // no commit here, because no record
1383 g_stats.servFails++;
1384 }
1385 else {
1386 pw.getHeader()->rcode=res;
1387
1388 // Does the validation mode or query demand validation?
1389 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1390 try {
1391 if(sr.doLog()) {
1392 g_log<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<endl;
1393 }
1394
1395 auto state = sr.getValidationState();
1396
1397 if(state == Secure) {
1398 if(sr.doLog()) {
1399 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates correctly"<<endl;
1400 }
1401
1402 // Is the query source interested in the value of the ad-bit?
1403 if (dc->d_mdp.d_header.ad || DNSSECOK)
1404 pw.getHeader()->ad=1;
1405 }
1406 else if(state == Insecure) {
1407 if(sr.doLog()) {
1408 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Insecure"<<endl;
1409 }
1410
1411 pw.getHeader()->ad=0;
1412 }
1413 else if(state == Bogus) {
1414 if(t_bogusremotes)
1415 t_bogusremotes->push_back(dc->d_source);
1416 if(t_bogusqueryring)
1417 t_bogusqueryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
1418 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
1419 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Bogus"<<endl;
1420 }
1421
1422 // Does the query or validation mode sending out a SERVFAIL on validation errors?
1423 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
1424 if(sr.doLog()) {
1425 g_log<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
1426 }
1427
1428 pw.getHeader()->rcode=RCode::ServFail;
1429 goto sendit;
1430 } else {
1431 if(sr.doLog()) {
1432 g_log<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
1433 }
1434 }
1435 }
1436 }
1437 catch(ImmediateServFailException &e) {
1438 if(g_logCommonErrors)
1439 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
1440 pw.getHeader()->rcode=RCode::ServFail;
1441 goto sendit;
1442 }
1443 }
1444
1445 if(ret.size()) {
1446 orderAndShuffle(ret);
1447 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_source)) {
1448 stable_sort(ret.begin(), ret.end(), *sl);
1449 variableAnswer=true;
1450 }
1451 }
1452
1453 bool needCommit = false;
1454 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
1455 if( ! DNSSECOK &&
1456 ( i->d_type == QType::NSEC3 ||
1457 (
1458 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1459 (
1460 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1461 i->d_place != DNSResourceRecord::ANSWER
1462 )
1463 )
1464 )
1465 ) {
1466 continue;
1467 }
1468
1469 if (!addRecordToPacket(pw, *i, minTTL, dc->d_ttlCap, maxanswersize)) {
1470 needCommit = false;
1471 break;
1472 }
1473 needCommit = true;
1474
1475 #ifdef NOD_ENABLED
1476 bool udr = false;
1477 if (g_udrEnabled) {
1478 udr = udrCheckUniqueDNSRecord(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, *i);
1479 if (!hasUDR && udr)
1480 hasUDR = true;
1481 }
1482 #endif /* NOD ENABLED */
1483
1484 #ifdef HAVE_PROTOBUF
1485 if (t_protobufServers) {
1486 #ifdef NOD_ENABLED
1487 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes, udr);
1488 #else
1489 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes);
1490 #endif /* NOD_ENABLED */
1491 }
1492 #endif
1493 }
1494 if(needCommit)
1495 pw.commit();
1496 }
1497 sendit:;
1498
1499 if(g_useIncomingECS && dc->d_ecsFound && !sr.wasVariable() && !variableAnswer) {
1500 // cerr<<"Stuffing in a 0 scope because answer is static"<<endl;
1501 EDNSSubnetOpts eo;
1502 eo.source = dc->d_ednssubnet.source;
1503 ComboAddress sa;
1504 sa.reset();
1505 sa.sin4.sin_family = eo.source.getNetwork().sin4.sin_family;
1506 eo.scope = Netmask(sa, 0);
1507
1508 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::ECS, makeEDNSSubnetOptsString(eo)));
1509 }
1510
1511 if (haveEDNS) {
1512 /* we try to add the EDNS OPT RR even for truncated answers,
1513 as rfc6891 states:
1514 "The minimal response MUST be the DNS header, question section, and an
1515 OPT record. This MUST also occur when a truncated response (using
1516 the DNS header's TC bit) is returned."
1517 */
1518 pw.addOpt(512, ednsExtRCode, DNSSECOK ? EDNSOpts::DNSSECOK : 0, returnedEdnsOptions);
1519 pw.commit();
1520 }
1521
1522 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
1523 updateResponseStats(res, dc->d_source, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1524 #ifdef NOD_ENABLED
1525 bool nod = false;
1526 if (g_nodEnabled) {
1527 if (nodCheckNewDomain(dc->d_mdp.d_qname))
1528 nod = true;
1529 }
1530 #endif /* NOD_ENABLED */
1531 #ifdef HAVE_PROTOBUF
1532 if (t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && (!appliedPolicy.d_name || appliedPolicy.d_name->empty()) && dc->d_policyTags.empty())) {
1533 pbMessage->setBytes(packet.size());
1534 pbMessage->setResponseCode(pw.getHeader()->rcode);
1535 if (appliedPolicy.d_name) {
1536 pbMessage->setAppliedPolicy(*appliedPolicy.d_name);
1537 pbMessage->setAppliedPolicyType(appliedPolicy.d_type);
1538 }
1539 pbMessage->setPolicyTags(dc->d_policyTags);
1540 pbMessage->setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1541 pbMessage->setRequestorId(dq.requestorId);
1542 pbMessage->setDeviceId(dq.deviceId);
1543 #ifdef NOD_ENABLED
1544 if (g_nodEnabled) {
1545 if (nod) {
1546 pbMessage->setNOD(true);
1547 pbMessage->addPolicyTag(g_nod_pbtag);
1548 }
1549 if (hasUDR) {
1550 pbMessage->addPolicyTag(g_udr_pbtag);
1551 }
1552 }
1553 #endif /* NOD_ENABLED */
1554 protobufLogResponse(*pbMessage);
1555 #ifdef NOD_ENABLED
1556 if (g_nodEnabled) {
1557 pbMessage->setNOD(false);
1558 pbMessage->clearUDR();
1559 if (nod)
1560 pbMessage->removePolicyTag(g_nod_pbtag);
1561 if (hasUDR)
1562 pbMessage->removePolicyTag(g_udr_pbtag);
1563 }
1564 #endif /* NOD_ENABLED */
1565 }
1566 #endif
1567 if(!dc->d_tcp) {
1568 struct msghdr msgh;
1569 struct iovec iov;
1570 char cbuf[256];
1571 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
1572 msgh.msg_control=NULL;
1573
1574 if(g_fromtosockets.count(dc->d_socket)) {
1575 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
1576 }
1577 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
1578 g_log<<Logger::Warning<<"Sending UDP reply to client "<<dc->getRemote()<<" failed with: "<<strerror(errno)<<endl;
1579
1580 if(variableAnswer || sr.wasVariable()) {
1581 g_stats.variableResponses++;
1582 }
1583 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
1584 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, std::move(dc->d_query), dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
1585 string((const char*)&*packet.begin(), packet.size()),
1586 g_now.tv_sec,
1587 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
1588 min(minTTL,SyncRes::s_packetcachettl),
1589 dq.validationState,
1590 dc->d_ecsBegin,
1591 dc->d_ecsEnd,
1592 std::move(pbMessage));
1593 }
1594 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
1595 }
1596 else {
1597 char buf[2];
1598 buf[0]=packet.size()/256;
1599 buf[1]=packet.size()%256;
1600
1601 Utility::iovec iov[2];
1602
1603 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1604 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
1605
1606 int wret=Utility::writev(dc->d_socket, iov, 2);
1607 bool hadError=true;
1608
1609 if(wret == 0)
1610 g_log<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
1611 else if(wret < 0 )
1612 g_log<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
1613 else if((unsigned int)wret != 2 + packet.size())
1614 g_log<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
1615 else
1616 hadError=false;
1617
1618 // update tcp connection status, either by closing or moving to 'BYTE0'
1619
1620 if(hadError) {
1621 // no need to remove us from FDM, we weren't there
1622 dc->d_socket = -1;
1623 }
1624 else {
1625 dc->d_tcpConnection->queriesCount++;
1626 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1627 dc->d_socket = -1;
1628 }
1629 else {
1630 dc->d_tcpConnection->state=TCPConnection::BYTE0;
1631 Utility::gettimeofday(&g_now, 0); // needs to be updated
1632 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection);
1633 t_fdm->setReadTTD(dc->d_socket, g_now, g_tcpTimeout);
1634 }
1635 }
1636 }
1637 float spent=makeFloat(sr.getNow()-dc->d_now);
1638 if(!g_quiet) {
1639 g_log<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1640 g_log<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
1641 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
1642 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
1643
1644 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1645 g_log<< ", dnssec="<<vStates[sr.getValidationState()];
1646 }
1647
1648 g_log<<endl;
1649
1650 }
1651
1652 if (sr.d_outqueries || sr.d_authzonequeries) {
1653 t_RC->cacheMisses++;
1654 }
1655 else {
1656 t_RC->cacheHits++;
1657 }
1658
1659 if(spent < 0.001)
1660 g_stats.answers0_1++;
1661 else if(spent < 0.010)
1662 g_stats.answers1_10++;
1663 else if(spent < 0.1)
1664 g_stats.answers10_100++;
1665 else if(spent < 1.0)
1666 g_stats.answers100_1000++;
1667 else
1668 g_stats.answersSlow++;
1669
1670 uint64_t newLat=(uint64_t)(spent*1000000);
1671 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
1672 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
1673 // no worries, we do this for packet cache hits elsewhere
1674
1675 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
1676 if(ourtime < 1)
1677 g_stats.ourtime0_1++;
1678 else if(ourtime < 2)
1679 g_stats.ourtime1_2++;
1680 else if(ourtime < 4)
1681 g_stats.ourtime2_4++;
1682 else if(ourtime < 8)
1683 g_stats.ourtime4_8++;
1684 else if(ourtime < 16)
1685 g_stats.ourtime8_16++;
1686 else if(ourtime < 32)
1687 g_stats.ourtime16_32++;
1688 else {
1689 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1690 g_stats.ourtimeSlow++;
1691 }
1692 if(ourtime >= 0.0) {
1693 newLat=ourtime*1000; // usec
1694 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
1695 }
1696 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
1697 }
1698 catch(PDNSException &ae) {
1699 g_log<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
1700 }
1701 catch(const MOADNSException &mde) {
1702 g_log<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<mde.what()<<endl;
1703 }
1704 catch(std::exception& e) {
1705 g_log<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
1706
1707 // Luawrapper nests the exception from Lua, so we unnest it here
1708 try {
1709 std::rethrow_if_nested(e);
1710 } catch(const std::exception& ne) {
1711 g_log<<". Extra info: "<<ne.what();
1712 } catch(...) {}
1713
1714 g_log<<endl;
1715 }
1716 catch(...) {
1717 g_log<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
1718 }
1719
1720 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
1721 }
1722
1723 static void makeControlChannelSocket(int processNum=-1)
1724 {
1725 string sockname=::arg()["socket-dir"]+"/"+s_programname;
1726 if(processNum >= 0)
1727 sockname += "."+std::to_string(processNum);
1728 sockname+=".controlsocket";
1729 s_rcc.listen(sockname);
1730
1731 int sockowner = -1;
1732 int sockgroup = -1;
1733
1734 if (!::arg().isEmpty("socket-group"))
1735 sockgroup=::arg().asGid("socket-group");
1736 if (!::arg().isEmpty("socket-owner"))
1737 sockowner=::arg().asUid("socket-owner");
1738
1739 if (sockgroup > -1 || sockowner > -1) {
1740 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1741 unixDie("Failed to chown control socket");
1742 }
1743 }
1744
1745 // do mode change if socket-mode is given
1746 if(!::arg().isEmpty("socket-mode")) {
1747 mode_t sockmode=::arg().asMode("socket-mode");
1748 if(chmod(sockname.c_str(), sockmode) < 0) {
1749 unixDie("Failed to chmod control socket");
1750 }
1751 }
1752 }
1753
1754 static void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass,
1755 bool& foundECS, EDNSSubnetOpts* ednssubnet, EDNSOptionViewMap* options,
1756 bool& foundXPF, ComboAddress* xpfSource, ComboAddress* xpfDest)
1757 {
1758 const bool lookForXPF = xpfSource != nullptr && g_xpfRRCode != 0;
1759 const bool lookForECS = ednssubnet != nullptr;
1760 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(question.c_str());
1761 size_t questionLen = question.length();
1762 unsigned int consumed=0;
1763 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1764
1765 size_t pos= sizeof(dnsheader)+consumed+4;
1766 const size_t headerSize = /* root */ 1 + sizeof(dnsrecordheader);
1767 const uint16_t arcount = ntohs(dh->arcount);
1768
1769 for (uint16_t arpos = 0; arpos < arcount && questionLen > (pos + headerSize) && ((lookForECS && !foundECS) || (lookForXPF && !foundXPF)); arpos++) {
1770 if (question.at(pos) != 0) {
1771 /* not an OPT or a XPF, bye. */
1772 return;
1773 }
1774
1775 pos += 1;
1776 const dnsrecordheader* drh = reinterpret_cast<const dnsrecordheader*>(&question.at(pos));
1777 pos += sizeof(dnsrecordheader);
1778
1779 if (pos >= questionLen) {
1780 return;
1781 }
1782
1783 /* OPT root label (1) followed by type (2) */
1784 if(lookForECS && ntohs(drh->d_type) == QType::OPT) {
1785 if (!options) {
1786 char* ecsStart = nullptr;
1787 size_t ecsLen = 0;
1788 /* we need to pass the record len */
1789 int res = getEDNSOption(const_cast<char*>(reinterpret_cast<const char*>(&question.at(pos - sizeof(drh->d_clen)))), questionLen - pos + sizeof(drh->d_clen), EDNSOptionCode::ECS, &ecsStart, &ecsLen);
1790 if (res == 0 && ecsLen > 4) {
1791 EDNSSubnetOpts eso;
1792 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1793 *ednssubnet=eso;
1794 foundECS = true;
1795 }
1796 }
1797 }
1798 else {
1799 /* we need to pass the record len */
1800 int res = getEDNSOptions(reinterpret_cast<const char*>(&question.at(pos -sizeof(drh->d_clen))), questionLen - pos + (sizeof(drh->d_clen)), *options);
1801 if (res == 0) {
1802 const auto& it = options->find(EDNSOptionCode::ECS);
1803 if (it != options->end() && !it->second.values.empty() && it->second.values.at(0).content != nullptr && it->second.values.at(0).size > 0) {
1804 EDNSSubnetOpts eso;
1805 if(getEDNSSubnetOptsFromString(it->second.values.at(0).content, it->second.values.at(0).size, &eso)) {
1806 *ednssubnet=eso;
1807 foundECS = true;
1808 }
1809 }
1810 }
1811 }
1812 }
1813 else if (lookForXPF && ntohs(drh->d_type) == g_xpfRRCode && ntohs(drh->d_class) == QClass::IN && drh->d_ttl == 0) {
1814 if ((questionLen - pos) < ntohs(drh->d_clen)) {
1815 return;
1816 }
1817
1818 foundXPF = parseXPFPayload(reinterpret_cast<const char*>(&question.at(pos)), ntohs(drh->d_clen), *xpfSource, xpfDest);
1819 }
1820
1821 pos += ntohs(drh->d_clen);
1822 }
1823 }
1824
1825 static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
1826 {
1827 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
1828
1829 if(conn->state==TCPConnection::BYTE0) {
1830 ssize_t bytes=recv(conn->getFD(), &conn->data[0], 2, 0);
1831 if(bytes==1)
1832 conn->state=TCPConnection::BYTE1;
1833 if(bytes==2) {
1834 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
1835 conn->data.resize(conn->qlen);
1836 conn->bytesread=0;
1837 conn->state=TCPConnection::GETQUESTION;
1838 }
1839 if(!bytes || bytes < 0) {
1840 t_fdm->removeReadFD(fd);
1841 return;
1842 }
1843 }
1844 else if(conn->state==TCPConnection::BYTE1) {
1845 ssize_t bytes=recv(conn->getFD(), &conn->data[1], 1, 0);
1846 if(bytes==1) {
1847 conn->state=TCPConnection::GETQUESTION;
1848 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
1849 conn->data.resize(conn->qlen);
1850 conn->bytesread=0;
1851 }
1852 if(!bytes || bytes < 0) {
1853 if(g_logCommonErrors)
1854 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected after first byte"<<endl;
1855 t_fdm->removeReadFD(fd);
1856 return;
1857 }
1858 }
1859 else if(conn->state==TCPConnection::GETQUESTION) {
1860 ssize_t bytes=recv(conn->getFD(), &conn->data[conn->bytesread], conn->qlen - conn->bytesread, 0);
1861 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
1862 if(g_logCommonErrors) {
1863 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected while reading question body"<<endl;
1864 }
1865 t_fdm->removeReadFD(fd);
1866 return;
1867 }
1868 conn->bytesread+=(uint16_t)bytes;
1869 if(conn->bytesread==conn->qlen) {
1870 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
1871
1872 std::unique_ptr<DNSComboWriter> dc;
1873 try {
1874 dc=std::unique_ptr<DNSComboWriter>(new DNSComboWriter(conn->data, g_now));
1875 }
1876 catch(const MOADNSException &mde) {
1877 g_stats.clientParseError++;
1878 if(g_logCommonErrors)
1879 g_log<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
1880 return;
1881 }
1882 dc->d_tcpConnection = conn; // carry the torch
1883 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
1884 dc->d_tcp=true;
1885 dc->setRemote(conn->d_remote);
1886 dc->setSource(conn->d_remote);
1887 ComboAddress dest;
1888 dest.reset();
1889 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1890 socklen_t len = dest.getSocklen();
1891 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1892 dc->setLocal(dest);
1893 dc->setDestination(dest);
1894 DNSName qname;
1895 uint16_t qtype=0;
1896 uint16_t qclass=0;
1897 bool needECS = false;
1898 bool needXPF = g_XPFAcl.match(conn->d_remote);
1899 string requestorId;
1900 string deviceId;
1901 bool logQuery = false;
1902 #ifdef HAVE_PROTOBUF
1903 auto luaconfsLocal = g_luaconfs.getLocal();
1904 if (checkProtobufExport(luaconfsLocal)) {
1905 needECS = true;
1906 }
1907 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
1908 #endif
1909
1910 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag_ffi || t_pdl->d_gettag))) {
1911
1912 try {
1913 EDNSOptionViewMap ednsOptions;
1914 bool xpfFound = false;
1915 dc->d_ecsParsed = true;
1916 dc->d_ecsFound = false;
1917 getQNameAndSubnet(conn->data, &qname, &qtype, &qclass,
1918 dc->d_ecsFound, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
1919 xpfFound, needXPF ? &dc->d_source : nullptr, needXPF ? &dc->d_destination : nullptr);
1920
1921 if(t_pdl) {
1922 try {
1923 if (t_pdl->d_gettag_ffi) {
1924 dc->d_tag = t_pdl->gettag_ffi(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, dc->d_ttlCap, dc->d_variable, logQuery);
1925 }
1926 else if (t_pdl->d_gettag) {
1927 dc->d_tag = t_pdl->gettag(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId);
1928 }
1929 }
1930 catch(const std::exception& e) {
1931 if(g_logCommonErrors)
1932 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
1933 }
1934 }
1935 }
1936 catch(const std::exception& e)
1937 {
1938 if(g_logCommonErrors)
1939 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
1940 }
1941 }
1942
1943 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(&conn->data[0]);
1944
1945 #ifdef HAVE_PROTOBUF
1946 if(t_protobufServers || t_outgoingProtobufServers) {
1947 dc->d_requestorId = requestorId;
1948 dc->d_deviceId = deviceId;
1949 dc->d_uuid = getUniqueID();
1950 }
1951
1952 if(t_protobufServers) {
1953 try {
1954
1955 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && dc->d_policyTags.empty())) {
1956 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, dc->d_source, dc->d_destination, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId);
1957 }
1958 }
1959 catch(std::exception& e) {
1960 if(g_logCommonErrors)
1961 g_log<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
1962 }
1963 }
1964 #endif
1965 if(t_pdl) {
1966 if(t_pdl->ipfilter(dc->d_source, dc->d_destination, *dh)) {
1967 if(!g_quiet)
1968 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED TCP question from "<<dc->d_source.toStringWithPort()<<(dc->d_source != dc->d_remote ? " (via "+dc->d_remote.toStringWithPort()+")" : "")<<" based on policy"<<endl;
1969 g_stats.policyDrops++;
1970 return;
1971 }
1972 }
1973
1974 if(dc->d_mdp.d_header.qr) {
1975 g_stats.ignoredCount++;
1976 if(g_logCommonErrors) {
1977 g_log<<Logger::Error<<"Ignoring answer from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
1978 }
1979 return;
1980 }
1981 if(dc->d_mdp.d_header.opcode) {
1982 g_stats.ignoredCount++;
1983 if(g_logCommonErrors) {
1984 g_log<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
1985 }
1986 return;
1987 }
1988 else if (dh->qdcount == 0) {
1989 g_stats.emptyQueriesCount++;
1990 if(g_logCommonErrors) {
1991 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<< dc->getRemote() <<" on server socket!"<<endl;
1992 }
1993 return;
1994 }
1995 else {
1996 ++g_stats.qcounter;
1997 ++g_stats.tcpqcounter;
1998 MT->makeThread(startDoResolve, dc.release()); // deletes dc, will set state to BYTE0 again
1999 return;
2000 }
2001 }
2002 }
2003 }
2004
2005 //! Handle new incoming TCP connection
2006 static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
2007 {
2008 ComboAddress addr;
2009 socklen_t addrlen=sizeof(addr);
2010 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
2011 if(newsock>=0) {
2012 if(MT->numProcesses() > g_maxMThreads) {
2013 g_stats.overCapacityDrops++;
2014 try {
2015 closesocket(newsock);
2016 }
2017 catch(const PDNSException& e) {
2018 g_log<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
2019 }
2020 return;
2021 }
2022
2023 if(t_remotes)
2024 t_remotes->push_back(addr);
2025 if(t_allowFrom && !t_allowFrom->match(&addr)) {
2026 if(!g_quiet)
2027 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2028
2029 g_stats.unauthorizedTCP++;
2030 try {
2031 closesocket(newsock);
2032 }
2033 catch(const PDNSException& e) {
2034 g_log<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
2035 }
2036 return;
2037 }
2038 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
2039 g_stats.tcpClientOverflow++;
2040 try {
2041 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
2042 }
2043 catch(const PDNSException& e) {
2044 g_log<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
2045 }
2046 return;
2047 }
2048
2049 setNonBlocking(newsock);
2050 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
2051 tc->state=TCPConnection::BYTE0;
2052
2053 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc);
2054
2055 struct timeval now;
2056 Utility::gettimeofday(&now, 0);
2057 t_fdm->setReadTTD(tc->getFD(), now, g_tcpTimeout);
2058 }
2059 }
2060
2061 static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
2062 {
2063 gettimeofday(&g_now, 0);
2064 struct timeval diff = g_now - tv;
2065 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
2066
2067 if(tv.tv_sec && delta > 1000.0) {
2068 g_stats.tooOldDrops++;
2069 return 0;
2070 }
2071
2072 ++g_stats.qcounter;
2073 if(fromaddr.sin4.sin_family==AF_INET6)
2074 g_stats.ipv6qcounter++;
2075
2076 string response;
2077 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
2078 unsigned int ctag=0;
2079 uint32_t qhash = 0;
2080 bool needECS = false;
2081 bool needXPF = g_XPFAcl.match(fromaddr);
2082 std::vector<std::string> policyTags;
2083 LuaContext::LuaObject data;
2084 ComboAddress source = fromaddr;
2085 ComboAddress destination = destaddr;
2086 string requestorId;
2087 string deviceId;
2088 bool logQuery = false;
2089 #ifdef HAVE_PROTOBUF
2090 boost::uuids::uuid uniqueId;
2091 auto luaconfsLocal = g_luaconfs.getLocal();
2092 if (checkProtobufExport(luaconfsLocal)) {
2093 uniqueId = getUniqueID();
2094 needECS = true;
2095 } else if (checkOutgoingProtobufExport(luaconfsLocal)) {
2096 uniqueId = getUniqueID();
2097 }
2098 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
2099 bool logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
2100 #endif
2101 EDNSSubnetOpts ednssubnet;
2102 bool ecsFound = false;
2103 bool ecsParsed = false;
2104 uint16_t ecsBegin = 0;
2105 uint16_t ecsEnd = 0;
2106 uint32_t ttlCap = std::numeric_limits<uint32_t>::max();
2107 bool variable = false;
2108 try {
2109 DNSName qname;
2110 uint16_t qtype=0;
2111 uint16_t qclass=0;
2112 uint32_t age;
2113 bool qnameParsed=false;
2114 #ifdef MALLOC_TRACE
2115 /*
2116 static uint64_t last=0;
2117 if(!last)
2118 g_mtracer->clearAllocators();
2119 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
2120 last=g_mtracer->getAllocs();
2121 cout<<g_mtracer->topAllocatorsString()<<endl;
2122 g_mtracer->clearAllocators();
2123 */
2124 #endif
2125
2126 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag || t_pdl->d_gettag_ffi))) {
2127 try {
2128 EDNSOptionViewMap ednsOptions;
2129 bool xpfFound = false;
2130
2131 ecsFound = false;
2132
2133 getQNameAndSubnet(question, &qname, &qtype, &qclass,
2134 ecsFound, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2135 xpfFound, needXPF ? &source : nullptr, needXPF ? &destination : nullptr);
2136
2137 qnameParsed = true;
2138 ecsParsed = true;
2139
2140 if(t_pdl) {
2141 try {
2142 if (t_pdl->d_gettag_ffi) {
2143 ctag = t_pdl->gettag_ffi(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, ttlCap, variable, logQuery);
2144 }
2145 else if (t_pdl->d_gettag) {
2146 ctag = t_pdl->gettag(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId);
2147 }
2148 }
2149 catch(const std::exception& e) {
2150 if(g_logCommonErrors)
2151 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
2152 }
2153 }
2154 }
2155 catch(const std::exception& e)
2156 {
2157 if(g_logCommonErrors)
2158 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
2159 }
2160 }
2161
2162 bool cacheHit = false;
2163 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
2164 #ifdef HAVE_PROTOBUF
2165 if (t_protobufServers) {
2166 pbMessage = RecProtoBufMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
2167 pbMessage->setServerIdentity(SyncRes::s_serverID);
2168 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && policyTags.empty())) {
2169 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, source, destination, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId);
2170 }
2171 }
2172 #endif /* HAVE_PROTOBUF */
2173
2174 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
2175 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
2176 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
2177 vState valState;
2178 if (qnameParsed) {
2179 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
2180 }
2181 else {
2182 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, &qtype, &qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
2183 }
2184
2185 if (cacheHit) {
2186 if(valState == Bogus) {
2187 if(t_bogusremotes)
2188 t_bogusremotes->push_back(source);
2189 if(t_bogusqueryring)
2190 t_bogusqueryring->push_back(make_pair(qname, qtype));
2191 }
2192
2193 #ifdef HAVE_PROTOBUF
2194 if(t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && pbMessage->getAppliedPolicy().empty() && pbMessage->getPolicyTags().empty())) {
2195 Netmask requestorNM(source, source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
2196 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
2197 pbMessage->update(uniqueId, &requestor, &destination, false, dh->id);
2198 pbMessage->setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
2199 pbMessage->setQueryTime(g_now.tv_sec, g_now.tv_usec);
2200 pbMessage->setRequestorId(requestorId);
2201 pbMessage->setDeviceId(deviceId);
2202 protobufLogResponse(*pbMessage);
2203 }
2204 #endif /* HAVE_PROTOBUF */
2205 if(!g_quiet)
2206 g_log<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<endl;
2207
2208 g_stats.packetCacheHits++;
2209 SyncRes::s_queries++;
2210 ageDNSPacket(response, age);
2211 struct msghdr msgh;
2212 struct iovec iov;
2213 char cbuf[256];
2214 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
2215 msgh.msg_control=NULL;
2216
2217 if(g_fromtosockets.count(fd)) {
2218 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
2219 }
2220 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
2221 g_log<<Logger::Warning<<"Sending UDP reply to client "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" failed with: "<<strerror(errno)<<endl;
2222
2223 if(response.length() >= sizeof(struct dnsheader)) {
2224 struct dnsheader tmpdh;
2225 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
2226 updateResponseStats(tmpdh.rcode, source, response.length(), 0, 0);
2227 }
2228 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
2229 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
2230 return 0;
2231 }
2232 }
2233 catch(std::exception& e) {
2234 g_log<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
2235 return 0;
2236 }
2237
2238 if(t_pdl) {
2239 if(t_pdl->ipfilter(source, destination, *dh)) {
2240 if(!g_quiet)
2241 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" based on policy"<<endl;
2242 g_stats.policyDrops++;
2243 return 0;
2244 }
2245 }
2246
2247 if(MT->numProcesses() > g_maxMThreads) {
2248 if(!g_quiet)
2249 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<", over capacity"<<endl;
2250
2251 g_stats.overCapacityDrops++;
2252 return 0;
2253 }
2254
2255 auto dc = std::unique_ptr<DNSComboWriter>(new DNSComboWriter(question, g_now, std::move(policyTags), std::move(data)));
2256 dc->setSocket(fd);
2257 dc->d_tag=ctag;
2258 dc->d_qhash=qhash;
2259 dc->setRemote(fromaddr);
2260 dc->setSource(source);
2261 dc->setLocal(destaddr);
2262 dc->setDestination(destination);
2263 dc->d_tcp=false;
2264 dc->d_ecsFound = ecsFound;
2265 dc->d_ecsParsed = ecsParsed;
2266 dc->d_ecsBegin = ecsBegin;
2267 dc->d_ecsEnd = ecsEnd;
2268 dc->d_ednssubnet = ednssubnet;
2269 dc->d_ttlCap = ttlCap;
2270 dc->d_variable = variable;
2271 #ifdef HAVE_PROTOBUF
2272 if (t_protobufServers || t_outgoingProtobufServers) {
2273 dc->d_uuid = std::move(uniqueId);
2274 }
2275 dc->d_requestorId = requestorId;
2276 dc->d_deviceId = deviceId;
2277 #endif
2278
2279 MT->makeThread(startDoResolve, (void*) dc.release()); // deletes dc
2280 return 0;
2281 }
2282
2283
2284 static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
2285 {
2286 ssize_t len;
2287 static const size_t maxIncomingQuerySize = 512;
2288 static thread_local std::string data;
2289 ComboAddress fromaddr;
2290 struct msghdr msgh;
2291 struct iovec iov;
2292 char cbuf[256];
2293 bool firstQuery = true;
2294
2295 for(size_t queriesCounter = 0; queriesCounter < s_maxUDPQueriesPerRound; queriesCounter++) {
2296 data.resize(maxIncomingQuerySize);
2297 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
2298 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), &data[0], data.size(), &fromaddr);
2299
2300 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
2301
2302 firstQuery = false;
2303
2304 if (static_cast<size_t>(len) < sizeof(dnsheader)) {
2305 g_stats.ignoredCount++;
2306 if (!g_quiet) {
2307 g_log<<Logger::Error<<"Ignoring too-short ("<<std::to_string(len)<<") query from "<<fromaddr.toString()<<endl;
2308 }
2309 return;
2310 }
2311
2312 if (msgh.msg_flags & MSG_TRUNC) {
2313 g_stats.truncatedDrops++;
2314 if (!g_quiet) {
2315 g_log<<Logger::Error<<"Ignoring truncated query from "<<fromaddr.toString()<<endl;
2316 }
2317 return;
2318 }
2319
2320 if(t_remotes) {
2321 t_remotes->push_back(fromaddr);
2322 }
2323
2324 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
2325 if(!g_quiet) {
2326 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2327 }
2328
2329 g_stats.unauthorizedUDP++;
2330 return;
2331 }
2332 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
2333 if(!fromaddr.sin4.sin_port) { // also works for IPv6
2334 if(!g_quiet) {
2335 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
2336 }
2337
2338 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
2339 return;
2340 }
2341
2342 try {
2343 data.resize(static_cast<size_t>(len));
2344 dnsheader* dh=(dnsheader*)&data[0];
2345
2346 if(dh->qr) {
2347 g_stats.ignoredCount++;
2348 if(g_logCommonErrors) {
2349 g_log<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
2350 }
2351 }
2352 else if(dh->opcode) {
2353 g_stats.ignoredCount++;
2354 if(g_logCommonErrors) {
2355 g_log<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
2356 }
2357 }
2358 else if (dh->qdcount == 0) {
2359 g_stats.emptyQueriesCount++;
2360 if(g_logCommonErrors) {
2361 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<<fromaddr.toString()<<" on server socket!"<<endl;
2362 }
2363 }
2364 else {
2365 struct timeval tv={0,0};
2366 HarvestTimestamp(&msgh, &tv);
2367 ComboAddress dest;
2368 dest.reset(); // this makes sure we ignore this address if not returned by recvmsg above
2369 auto loc = rplookup(g_listenSocketsAddresses, fd);
2370 if(HarvestDestinationAddress(&msgh, &dest)) {
2371 // but.. need to get port too
2372 if(loc) {
2373 dest.sin4.sin_port = loc->sin4.sin_port;
2374 }
2375 }
2376 else {
2377 if(loc) {
2378 dest = *loc;
2379 }
2380 else {
2381 dest.sin4.sin_family = fromaddr.sin4.sin_family;
2382 socklen_t slen = dest.getSocklen();
2383 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
2384 }
2385 }
2386
2387 if(g_weDistributeQueries) {
2388 distributeAsyncFunction(data, boost::bind(doProcessUDPQuestion, data, fromaddr, dest, tv, fd));
2389 }
2390 else {
2391 ++s_threadInfos[t_id].numberOfDistributedQueries;
2392 doProcessUDPQuestion(data, fromaddr, dest, tv, fd);
2393 }
2394 }
2395 }
2396 catch(const MOADNSException &mde) {
2397 g_stats.clientParseError++;
2398 if(g_logCommonErrors) {
2399 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
2400 }
2401 }
2402 catch(const std::runtime_error& e) {
2403 g_stats.clientParseError++;
2404 if(g_logCommonErrors) {
2405 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
2406 }
2407 }
2408 }
2409 else {
2410 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
2411 if(firstQuery && errno == EAGAIN) {
2412 g_stats.noPacketError++;
2413 }
2414
2415 break;
2416 }
2417 }
2418 }
2419
2420 static void makeTCPServerSockets(deferredAdd_t& deferredAdds, std::set<int>& tcpSockets)
2421 {
2422 int fd;
2423 vector<string>locals;
2424 stringtok(locals,::arg()["local-address"]," ,");
2425
2426 if(locals.empty())
2427 throw PDNSException("No local address specified");
2428
2429 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
2430 ServiceTuple st;
2431 st.port=::arg().asNum("local-port");
2432 parseService(*i, st);
2433
2434 ComboAddress sin;
2435
2436 sin.reset();
2437 sin.sin4.sin_family = AF_INET;
2438 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
2439 sin.sin6.sin6_family = AF_INET6;
2440 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
2441 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
2442 }
2443
2444 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
2445 if(fd<0)
2446 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
2447
2448 setCloseOnExec(fd);
2449
2450 int tmp=1;
2451 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
2452 g_log<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
2453 exit(1);
2454 }
2455 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
2456 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
2457 }
2458
2459 #ifdef TCP_DEFER_ACCEPT
2460 if(setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
2461 if(i==locals.begin())
2462 g_log<<Logger::Info<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
2463 }
2464 #endif
2465
2466 if( ::arg().mustDo("non-local-bind") )
2467 Utility::setBindAny(AF_INET, fd);
2468
2469 #ifdef SO_REUSEPORT
2470 if(g_reusePort) {
2471 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
2472 throw PDNSException("SO_REUSEPORT: "+stringerror());
2473 }
2474 #endif
2475
2476 if (::arg().asNum("tcp-fast-open") > 0) {
2477 #ifdef TCP_FASTOPEN
2478 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
2479 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
2480 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno)<<endl;
2481 }
2482 #else
2483 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
2484 #endif
2485 }
2486
2487 sin.sin4.sin_port = htons(st.port);
2488 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
2489 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
2490 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
2491
2492 setNonBlocking(fd);
2493 setSocketSendBuffer(fd, 65000);
2494 listen(fd, 128);
2495 deferredAdds.push_back(make_pair(fd, handleNewTCPQuestion));
2496 tcpSockets.insert(fd);
2497
2498 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
2499 // - fd is not that which we know here, but returned from accept()
2500 if(sin.sin4.sin_family == AF_INET)
2501 g_log<<Logger::Info<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
2502 else
2503 g_log<<Logger::Info<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
2504 }
2505 }
2506
2507 static void makeUDPServerSockets(deferredAdd_t& deferredAdds)
2508 {
2509 int one=1;
2510 vector<string>locals;
2511 stringtok(locals,::arg()["local-address"]," ,");
2512
2513 if(locals.empty())
2514 throw PDNSException("No local address specified");
2515
2516 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
2517 ServiceTuple st;
2518 st.port=::arg().asNum("local-port");
2519 parseService(*i, st);
2520
2521 ComboAddress sin;
2522
2523 sin.reset();
2524 sin.sin4.sin_family = AF_INET;
2525 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
2526 sin.sin6.sin6_family = AF_INET6;
2527 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
2528 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
2529 }
2530
2531 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
2532 if(fd < 0) {
2533 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
2534 }
2535 if (!setSocketTimestamps(fd))
2536 g_log<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
2537
2538 if(IsAnyAddress(sin)) {
2539 if(sin.sin4.sin_family == AF_INET)
2540 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
2541 g_fromtosockets.insert(fd);
2542 #ifdef IPV6_RECVPKTINFO
2543 if(sin.sin4.sin_family == AF_INET6)
2544 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
2545 g_fromtosockets.insert(fd);
2546 #endif
2547 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
2548 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
2549 }
2550 }
2551 if( ::arg().mustDo("non-local-bind") )
2552 Utility::setBindAny(AF_INET6, fd);
2553
2554 setCloseOnExec(fd);
2555
2556 setSocketReceiveBuffer(fd, 250000);
2557 sin.sin4.sin_port = htons(st.port);
2558
2559
2560 #ifdef SO_REUSEPORT
2561 if(g_reusePort) {
2562 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
2563 throw PDNSException("SO_REUSEPORT: "+stringerror());
2564 }
2565 #endif
2566 socklen_t socklen=sin.getSocklen();
2567 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
2568 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
2569
2570 setNonBlocking(fd);
2571
2572 deferredAdds.push_back(make_pair(fd, handleNewUDPQuestion));
2573 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
2574 if(sin.sin4.sin_family == AF_INET)
2575 g_log<<Logger::Info<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
2576 else
2577 g_log<<Logger::Info<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
2578 }
2579 }
2580
2581 static void daemonize(void)
2582 {
2583 if(fork())
2584 exit(0); // bye bye
2585
2586 setsid();
2587
2588 int i=open("/dev/null",O_RDWR); /* open stdin */
2589 if(i < 0)
2590 g_log<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
2591 else {
2592 dup2(i,0); /* stdin */
2593 dup2(i,1); /* stderr */
2594 dup2(i,2); /* stderr */
2595 close(i);
2596 }
2597 }
2598
2599 static void usr1Handler(int)
2600 {
2601 statsWanted=true;
2602 }
2603
2604 static void usr2Handler(int)
2605 {
2606 g_quiet= !g_quiet;
2607 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
2608 ::arg().set("quiet")=g_quiet ? "" : "no";
2609 }
2610
2611 static void doStats(void)
2612 {
2613 static time_t lastOutputTime;
2614 static uint64_t lastQueryCount;
2615
2616 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
2617 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
2618
2619 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
2620 g_log<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
2621 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
2622 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
2623 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
2624
2625 g_log<<Logger::Notice<<"stats: throttle map: "
2626 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
2627 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
2628 g_log<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2629 g_log<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
2630 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
2631 g_log<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
2632 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
2633
2634 //g_log<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
2635 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
2636
2637 g_log<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
2638 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
2639
2640 size_t idx = 0;
2641 for (const auto& threadInfo : s_threadInfos) {
2642 if(threadInfo.isWorker) {
2643 g_log<<Logger::Notice<<"Thread "<<idx<<" has been distributed "<<threadInfo.numberOfDistributedQueries<<" queries"<<endl;
2644 ++idx;
2645 }
2646 }
2647
2648 time_t now = time(0);
2649 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
2650 g_log<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
2651 }
2652 lastOutputTime = now;
2653 lastQueryCount = SyncRes::s_queries;
2654 }
2655 else if(statsWanted)
2656 g_log<<Logger::Notice<<"stats: no stats yet!"<<endl;
2657
2658 statsWanted=false;
2659 }
2660
2661 static void houseKeeping(void *)
2662 {
2663 static thread_local time_t last_rootupdate, last_prune, last_secpoll, last_trustAnchorUpdate{0};
2664 static thread_local int cleanCounter=0;
2665 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
2666 auto luaconfsLocal = g_luaconfs.getLocal();
2667
2668 if (last_trustAnchorUpdate == 0 && !luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0) {
2669 // Loading the Lua config file already "refreshed" the TAs
2670 last_trustAnchorUpdate = g_now.tv_sec + luaconfsLocal->trustAnchorFileInfo.interval * 3600;
2671 }
2672
2673 try {
2674 if(s_running)
2675 return;
2676 s_running=true;
2677
2678 struct timeval now;
2679 Utility::gettimeofday(&now, 0);
2680
2681 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
2682 DTime dt;
2683 dt.setTimeval(now);
2684 t_RC->doPrune(g_maxCacheEntries / g_numThreads); // this function is local to a thread, so fine anyhow
2685 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
2686
2687 SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
2688
2689 if(!((cleanCounter++)%40)) { // this is a full scan!
2690 time_t limit=now.tv_sec-300;
2691 SyncRes::pruneNSSpeeds(limit);
2692 }
2693 last_prune=time(0);
2694 }
2695
2696 if(now.tv_sec - last_rootupdate > 7200) {
2697 int res = SyncRes::getRootNS(g_now, nullptr);
2698 if (!res)
2699 last_rootupdate=now.tv_sec;
2700 }
2701
2702 if(isHandlerThread()) {
2703
2704 if(now.tv_sec - last_secpoll >= 3600) {
2705 try {
2706 doSecPoll(&last_secpoll);
2707 }
2708 catch(std::exception& e)
2709 {
2710 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
2711 }
2712 catch(PDNSException& e)
2713 {
2714 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
2715 }
2716 catch(ImmediateServFailException &e)
2717 {
2718 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
2719 }
2720 catch(...)
2721 {
2722 g_log<<Logger::Error<<"Exception while performing security poll"<<endl;
2723 }
2724 }
2725
2726 if (!luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0 &&
2727 g_now.tv_sec - last_trustAnchorUpdate >= (luaconfsLocal->trustAnchorFileInfo.interval * 3600)) {
2728 g_log<<Logger::Debug<<"Refreshing Trust Anchors from file"<<endl;
2729 try {
2730 map<DNSName, dsmap_t> dsAnchors;
2731 if (updateTrustAnchorsFromFile(luaconfsLocal->trustAnchorFileInfo.fname, dsAnchors)) {
2732 g_luaconfs.modify([&dsAnchors](LuaConfigItems& lci) {
2733 lci.dsAnchors = dsAnchors;
2734 });
2735 }
2736 last_trustAnchorUpdate = now.tv_sec;
2737 } catch (const PDNSException &pe) {
2738 g_log<<Logger::Error<<"Unable to update Trust Anchors: "<<pe.reason<<endl;
2739 }
2740 }
2741 s_running=false;
2742 }
2743 }
2744 catch(PDNSException& ae)
2745 {
2746 s_running=false;
2747 g_log<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
2748 throw;
2749 }
2750 }
2751
2752 static void makeThreadPipes()
2753 {
2754 /* thread 0 is the handler / SNMP, we start at 1 */
2755 for(unsigned int n = 1; n <= (g_numWorkerThreads + g_numDistributorThreads); ++n) {
2756 auto& threadInfos = s_threadInfos.at(n);
2757
2758 int fd[2];
2759 if(pipe(fd) < 0)
2760 unixDie("Creating pipe for inter-thread communications");
2761
2762 threadInfos.pipes.readToThread = fd[0];
2763 threadInfos.pipes.writeToThread = fd[1];
2764
2765 if(pipe(fd) < 0)
2766 unixDie("Creating pipe for inter-thread communications");
2767
2768 threadInfos.pipes.readFromThread = fd[0];
2769 threadInfos.pipes.writeFromThread = fd[1];
2770
2771 if(pipe(fd) < 0)
2772 unixDie("Creating pipe for inter-thread communications");
2773
2774 threadInfos.pipes.readQueriesToThread = fd[0];
2775 threadInfos.pipes.writeQueriesToThread = fd[1];
2776
2777 if (!setNonBlocking(threadInfos.pipes.writeQueriesToThread)) {
2778 unixDie("Making pipe for inter-thread communications non-blocking");
2779 }
2780 }
2781 }
2782
2783 struct ThreadMSG
2784 {
2785 pipefunc_t func;
2786 bool wantAnswer;
2787 };
2788
2789 void broadcastFunction(const pipefunc_t& func)
2790 {
2791 /* This function might be called by the worker with t_id 0 during startup
2792 for the initialization of ACLs and domain maps. After that it should only
2793 be called by the handler. */
2794
2795 if (s_threadInfos.empty() && isHandlerThread()) {
2796 /* the handler and distributors will call themselves below, but
2797 during startup we get called while s_threadInfos has not been
2798 populated yet to update the ACL or domain maps, so we need to
2799 handle that case.
2800 */
2801 func();
2802 }
2803
2804 unsigned int n = 0;
2805 for (const auto& threadInfo : s_threadInfos) {
2806 if(n++ == t_id) {
2807 func(); // don't write to ourselves!
2808 continue;
2809 }
2810
2811 ThreadMSG* tmsg = new ThreadMSG();
2812 tmsg->func = func;
2813 tmsg->wantAnswer = true;
2814 if(write(threadInfo.pipes.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2815 delete tmsg;
2816
2817 unixDie("write to thread pipe returned wrong size or error");
2818 }
2819
2820 string* resp = nullptr;
2821 if(read(threadInfo.pipes.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2822 unixDie("read from thread pipe returned wrong size or error");
2823
2824 if(resp) {
2825 delete resp;
2826 resp = nullptr;
2827 }
2828 }
2829 }
2830
2831 static bool trySendingQueryToWorker(unsigned int target, ThreadMSG* tmsg)
2832 {
2833 auto& targetInfo = s_threadInfos[target];
2834 if(!targetInfo.isWorker) {
2835 g_log<<Logger::Error<<"distributeAsyncFunction() tried to assign a query to a non-worker thread"<<endl;
2836 exit(1);
2837 }
2838
2839 const auto& tps = targetInfo.pipes;
2840
2841 ssize_t written = write(tps.writeQueriesToThread, &tmsg, sizeof(tmsg));
2842 if (written > 0) {
2843 if (static_cast<size_t>(written) != sizeof(tmsg)) {
2844 delete tmsg;
2845 unixDie("write to thread pipe returned wrong size or error");
2846 }
2847 }
2848 else {
2849 int error = errno;
2850 if (error == EAGAIN || error == EWOULDBLOCK) {
2851 return false;
2852 } else {
2853 delete tmsg;
2854 unixDie("write to thread pipe returned wrong size or error:" + std::to_string(error));
2855 }
2856 }
2857
2858 ++targetInfo.numberOfDistributedQueries;
2859
2860 return true;
2861 }
2862
2863 static unsigned int getWorkerLoad(size_t workerIdx)
2864 {
2865 const auto mt = s_threadInfos[/* skip handler */ 1 + g_numDistributorThreads + workerIdx].mt;
2866 if (mt != nullptr) {
2867 return mt->numProcesses();
2868 }
2869 return 0;
2870 }
2871
2872 static unsigned int selectWorker(unsigned int hash)
2873 {
2874 if (s_balancingFactor == 0) {
2875 return /* skip handler */ 1 + g_numDistributorThreads + (hash % g_numWorkerThreads);
2876 }
2877
2878 /* we start with one, representing the query we are currently handling */
2879 double currentLoad = 1;
2880 std::vector<unsigned int> load(g_numWorkerThreads);
2881 for (size_t idx = 0; idx < g_numWorkerThreads; idx++) {
2882 load[idx] = getWorkerLoad(idx);
2883 currentLoad += load[idx];
2884 // cerr<<"load for worker "<<idx<<" is "<<load[idx]<<endl;
2885 }
2886
2887 double targetLoad = (currentLoad / g_numWorkerThreads) * s_balancingFactor;
2888 // cerr<<"total load is "<<currentLoad<<", number of workers is "<<g_numWorkerThreads<<", target load is "<<targetLoad<<endl;
2889
2890 unsigned int worker = hash % g_numWorkerThreads;
2891 /* at least one server has to be below the average load */
2892 if (load[worker] > targetLoad) {
2893 ++g_stats.rebalancedQueries;
2894 do {
2895 // cerr<<"worker "<<worker<<" is above the target load, selecting another one"<<endl;
2896 worker = (worker + 1) % g_numWorkerThreads;
2897 }
2898 while(load[worker] > targetLoad);
2899 }
2900
2901 return /* skip handler */ 1 + g_numDistributorThreads + worker;
2902 }
2903
2904 // This function is only called by the distributor threads, when pdns-distributes-queries is set
2905 void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
2906 {
2907 if (!isDistributorThread()) {
2908 g_log<<Logger::Error<<"distributeAsyncFunction() has been called by a worker ("<<t_id<<")"<<endl;
2909 exit(1);
2910 }
2911
2912 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
2913 unsigned int target = selectWorker(hash);
2914
2915 ThreadMSG* tmsg = new ThreadMSG();
2916 tmsg->func = func;
2917 tmsg->wantAnswer = false;
2918
2919 if (!trySendingQueryToWorker(target, tmsg)) {
2920 /* if this function failed but did not raise an exception, it means that the pipe
2921 was full, let's try another one */
2922 unsigned int newTarget = 0;
2923 do {
2924 newTarget = /* skip handler */ 1 + g_numDistributorThreads + dns_random(g_numWorkerThreads);
2925 } while (newTarget == target);
2926
2927 if (!trySendingQueryToWorker(newTarget, tmsg)) {
2928 g_stats.queryPipeFullDrops++;
2929 delete tmsg;
2930 }
2931 }
2932 }
2933
2934 static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
2935 {
2936 ThreadMSG* tmsg = nullptr;
2937
2938 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread || fd == readQueriesToThread
2939 unixDie("read from thread pipe returned wrong size or error");
2940 }
2941
2942 void *resp=0;
2943 try {
2944 resp = tmsg->func();
2945 }
2946 catch(std::exception& e) {
2947 if(g_logCommonErrors)
2948 g_log<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2949 }
2950 catch(PDNSException& e) {
2951 if(g_logCommonErrors)
2952 g_log<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2953 }
2954 if(tmsg->wantAnswer) {
2955 const auto& threadInfo = s_threadInfos.at(t_id);
2956 if(write(threadInfo.pipes.writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
2957 delete tmsg;
2958 unixDie("write to thread pipe returned wrong size or error");
2959 }
2960 }
2961
2962 delete tmsg;
2963 }
2964
2965 template<class T> void *voider(const boost::function<T*()>& func)
2966 {
2967 return func();
2968 }
2969
2970 vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
2971 {
2972 a.insert(a.end(), b.begin(), b.end());
2973 return a;
2974 }
2975
2976 vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
2977 {
2978 a.insert(a.end(), b.begin(), b.end());
2979 return a;
2980 }
2981
2982 vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
2983 {
2984 a.insert(a.end(), b.begin(), b.end());
2985 return a;
2986 }
2987
2988
2989 /*
2990 This function should only be called by the handler to gather metrics, wipe the cache,
2991 reload the Lua script (not the Lua config) or change the current trace regex,
2992 and by the SNMP thread to gather metrics. */
2993 template<class T> T broadcastAccFunction(const boost::function<T*()>& func)
2994 {
2995 if (!isHandlerThread()) {
2996 g_log<<Logger::Error<<"broadcastAccFunction has been called by a worker ("<<t_id<<")"<<endl;
2997 exit(1);
2998 }
2999
3000 unsigned int n = 0;
3001 T ret=T();
3002 for (const auto& threadInfo : s_threadInfos) {
3003 if (n++ == t_id) {
3004 continue;
3005 }
3006
3007 const auto& tps = threadInfo.pipes;
3008 ThreadMSG* tmsg = new ThreadMSG();
3009 tmsg->func = boost::bind(voider<T>, func);
3010 tmsg->wantAnswer = true;
3011
3012 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
3013 delete tmsg;
3014 unixDie("write to thread pipe returned wrong size or error");
3015 }
3016
3017 T* resp = nullptr;
3018 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
3019 unixDie("read from thread pipe returned wrong size or error");
3020
3021 if(resp) {
3022 ret += *resp;
3023 delete resp;
3024 resp = nullptr;
3025 }
3026 }
3027 return ret;
3028 }
3029
3030 template string broadcastAccFunction(const boost::function<string*()>& fun); // explicit instantiation
3031 template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun); // explicit instantiation
3032 template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun); // explicit instantiation
3033 template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun); // explicit instantiation
3034
3035 static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
3036 {
3037 try {
3038 string remote;
3039 string msg=s_rcc.recv(&remote);
3040 RecursorControlParser rcp;
3041 RecursorControlParser::func_t* command;
3042
3043 string answer=rcp.getAnswer(msg, &command);
3044
3045 // If we are inside a chroot, we need to strip
3046 if (!arg()["chroot"].empty()) {
3047 size_t len = arg()["chroot"].length();
3048 remote = remote.substr(len);
3049 }
3050
3051 s_rcc.send(answer, &remote);
3052 command();
3053 }
3054 catch(const std::exception& e) {
3055 g_log<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
3056 }
3057 catch(const PDNSException& ae) {
3058 g_log<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
3059 }
3060 }
3061
3062 static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
3063 {
3064 PacketID* pident=any_cast<PacketID>(&var);
3065 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
3066
3067 shared_array<char> buffer(new char[pident->inNeeded]);
3068
3069 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
3070 if(ret > 0) {
3071 pident->inMSG.append(&buffer[0], &buffer[ret]);
3072 pident->inNeeded-=(size_t)ret;
3073 if(!pident->inNeeded || pident->inIncompleteOkay) {
3074 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
3075 PacketID pid=*pident;
3076 string msg=pident->inMSG;
3077
3078 t_fdm->removeReadFD(fd);
3079 MT->sendEvent(pid, &msg);
3080 }
3081 else {
3082 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
3083 }
3084 }
3085 else {
3086 PacketID tmp=*pident;
3087 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
3088 string empty;
3089 MT->sendEvent(tmp, &empty); // this conveys error status
3090 }
3091 }
3092
3093 static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
3094 {
3095 PacketID* pid=any_cast<PacketID>(&var);
3096 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
3097 if(ret > 0) {
3098 pid->outPos+=(ssize_t)ret;
3099 if(pid->outPos==pid->outMSG.size()) {
3100 PacketID tmp=*pid;
3101 t_fdm->removeWriteFD(fd);
3102 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
3103 }
3104 }
3105 else { // error or EOF
3106 PacketID tmp(*pid);
3107 t_fdm->removeWriteFD(fd);
3108 string sent;
3109 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
3110 }
3111 }
3112
3113 // resend event to everybody chained onto it
3114 static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
3115 {
3116 if(iter->key.chain.empty())
3117 return;
3118 // cerr<<"doResends called!\n";
3119 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
3120 resend.fd=-1;
3121 resend.id=*i;
3122 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
3123
3124 MT->sendEvent(resend, &content);
3125 g_stats.chainResends++;
3126 }
3127 }
3128
3129 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
3130 {
3131 PacketID pid=any_cast<PacketID>(var);
3132 ssize_t len;
3133 std::string packet;
3134 packet.resize(g_outgoingEDNSBufsize);
3135 ComboAddress fromaddr;
3136 socklen_t addrlen=sizeof(fromaddr);
3137
3138 len=recvfrom(fd, &packet.at(0), packet.size(), 0, (sockaddr *)&fromaddr, &addrlen);
3139
3140 if(len < (ssize_t) sizeof(dnsheader)) {
3141 if(len < 0)
3142 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
3143 else {
3144 g_stats.serverParseError++;
3145 if(g_logCommonErrors)
3146 g_log<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
3147 ": packet smaller than DNS header"<<endl;
3148 }
3149
3150 t_udpclientsocks->returnSocket(fd);
3151 string empty;
3152
3153 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3154 if(iter != MT->d_waiters.end())
3155 doResends(iter, pid, empty);
3156
3157 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
3158 return;
3159 }
3160
3161 packet.resize(len);
3162 dnsheader dh;
3163 memcpy(&dh, &packet.at(0), sizeof(dh));
3164
3165 PacketID pident;
3166 pident.remote=fromaddr;
3167 pident.id=dh.id;
3168 pident.fd=fd;
3169
3170 if(!dh.qr && g_logCommonErrors) {
3171 g_log<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
3172 }
3173
3174 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
3175 !dh.qr) { // one weird server
3176 pident.domain.clear();
3177 pident.type = 0;
3178 }
3179 else {
3180 try {
3181 if(len > 12)
3182 pident.domain=DNSName(&packet.at(0), len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
3183 }
3184 catch(std::exception& e) {
3185 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
3186 g_log<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
3187 return;
3188 }
3189 }
3190
3191 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
3192 if(iter != MT->d_waiters.end()) {
3193 doResends(iter, pident, packet);
3194 }
3195
3196 retryWithName:
3197
3198 if(!MT->sendEvent(pident, &packet)) {
3199 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
3200 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
3201 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
3202 pident.domain == mthread->key.domain) {
3203 mthread->key.nearMisses++;
3204 }
3205
3206 // be a bit paranoid here since we're weakening our matching
3207 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
3208 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
3209 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
3210 pident.domain = mthread->key.domain;
3211 pident.type = mthread->key.type;
3212 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
3213 }
3214 }
3215 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
3216 if(g_logCommonErrors) {
3217 g_log<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
3218 }
3219 }
3220 else if(fd >= 0) {
3221 t_udpclientsocks->returnSocket(fd);
3222 }
3223 }
3224
3225 FDMultiplexer* getMultiplexer()
3226 {
3227 FDMultiplexer* ret;
3228 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
3229 try {
3230 ret=i.second();
3231 return ret;
3232 }
3233 catch(FDMultiplexerException &fe) {
3234 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
3235 }
3236 catch(...) {
3237 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
3238 }
3239 }
3240 g_log<<Logger::Error<<"No working multiplexer found!"<<endl;
3241 exit(1);
3242 }
3243
3244
3245 static string* doReloadLuaScript()
3246 {
3247 string fname= ::arg()["lua-dns-script"];
3248 try {
3249 if(fname.empty()) {
3250 t_pdl.reset();
3251 g_log<<Logger::Info<<t_id<<" Unloaded current lua script"<<endl;
3252 return new string("unloaded\n");
3253 }
3254 else {
3255 t_pdl = std::make_shared<RecursorLua4>();
3256 t_pdl->loadFile(fname);
3257 }
3258 }
3259 catch(std::exception& e) {
3260 g_log<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
3261 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
3262 }
3263
3264 g_log<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
3265 return new string("(re)loaded '"+fname+"'\n");
3266 }
3267
3268 string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3269 {
3270 if(begin != end)
3271 ::arg().set("lua-dns-script") = *begin;
3272
3273 return broadcastAccFunction<string>(doReloadLuaScript);
3274 }
3275
3276 static string* pleaseUseNewTraceRegex(const std::string& newRegex)
3277 try
3278 {
3279 if(newRegex.empty()) {
3280 t_traceRegex.reset();
3281 return new string("unset\n");
3282 }
3283 else {
3284 t_traceRegex = std::make_shared<Regex>(newRegex);
3285 return new string("ok\n");
3286 }
3287 }
3288 catch(PDNSException& ae)
3289 {
3290 return new string(ae.reason+"\n");
3291 }
3292
3293 string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3294 {
3295 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
3296 }
3297
3298 static void checkLinuxIPv6Limits()
3299 {
3300 #ifdef __linux__
3301 string line;
3302 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
3303 int lim=std::stoi(line);
3304 if(lim < 16384) {
3305 g_log<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
3306 }
3307 }
3308 #endif
3309 }
3310 static void checkOrFixFDS()
3311 {
3312 unsigned int availFDs=getFilenumLimit();
3313 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
3314
3315 if(wantFDs > availFDs) {
3316 unsigned int hardlimit= getFilenumLimit(true);
3317 if(hardlimit >= wantFDs) {
3318 setFilenumLimit(wantFDs);
3319 g_log<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
3320 }
3321 else {
3322 int newval = (hardlimit - 25) / g_numWorkerThreads;
3323 g_log<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
3324 g_maxMThreads = newval;
3325 setFilenumLimit(hardlimit);
3326 }
3327 }
3328 }
3329
3330 static void* recursorThread(unsigned int tid, const string& threadName);
3331
3332 static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
3333 {
3334 t_allowFrom = ng;
3335 return nullptr;
3336 }
3337
3338 int g_argc;
3339 char** g_argv;
3340
3341 void parseACLs()
3342 {
3343 static bool l_initialized;
3344
3345 if(l_initialized) { // only reload configuration file on second call
3346 string configname=::arg()["config-dir"]+"/recursor.conf";
3347 if(::arg()["config-name"]!="") {
3348 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3349 }
3350 cleanSlashes(configname);
3351
3352 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
3353 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
3354 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
3355 ::arg().preParseFile(configname.c_str(), "include-dir");
3356 ::arg().preParse(g_argc, g_argv, "include-dir");
3357
3358 // then process includes
3359 std::vector<std::string> extraConfigs;
3360 ::arg().gatherIncludes(extraConfigs);
3361
3362 for(const std::string& fn : extraConfigs) {
3363 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
3364 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
3365 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
3366 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
3367 }
3368
3369 ::arg().preParse(g_argc, g_argv, "allow-from-file");
3370 ::arg().preParse(g_argc, g_argv, "allow-from");
3371 }
3372
3373 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
3374 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
3375
3376 if(!::arg()["allow-from-file"].empty()) {
3377 string line;
3378 ifstream ifs(::arg()["allow-from-file"].c_str());
3379 if(!ifs) {
3380 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
3381 }
3382
3383 string::size_type pos;
3384 while(getline(ifs,line)) {
3385 pos=line.find('#');
3386 if(pos!=string::npos)
3387 line.resize(pos);
3388 trim(line);
3389 if(line.empty())
3390 continue;
3391
3392 allowFrom->addMask(line);
3393 }
3394 g_log<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
3395 }
3396 else if(!::arg()["allow-from"].empty()) {
3397 vector<string> ips;
3398 stringtok(ips, ::arg()["allow-from"], ", ");
3399
3400 g_log<<Logger::Warning<<"Only allowing queries from: ";
3401 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
3402 allowFrom->addMask(*i);
3403 if(i!=ips.begin())
3404 g_log<<Logger::Warning<<", ";
3405 g_log<<Logger::Warning<<*i;
3406 }
3407 g_log<<Logger::Warning<<endl;
3408 }
3409 else {
3410 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
3411 g_log<<Logger::Warning<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
3412 allowFrom = nullptr;
3413 }
3414
3415 g_initialAllowFrom = allowFrom;
3416 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
3417 oldAllowFrom = nullptr;
3418
3419 l_initialized = true;
3420 }
3421
3422
3423 static void setupDelegationOnly()
3424 {
3425 vector<string> parts;
3426 stringtok(parts, ::arg()["delegation-only"], ", \t");
3427 for(const auto& p : parts) {
3428 SyncRes::addDelegationOnly(DNSName(p));
3429 }
3430 }
3431
3432 static std::map<unsigned int, std::set<int> > parseCPUMap()
3433 {
3434 std::map<unsigned int, std::set<int> > result;
3435
3436 const std::string value = ::arg()["cpu-map"];
3437
3438 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
3439 g_log<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
3440 return result;
3441 }
3442
3443 std::vector<std::string> parts;
3444
3445 stringtok(parts, value, " \t");
3446
3447 for(const auto& part : parts) {
3448 if (part.find('=') == string::npos)
3449 continue;
3450
3451 try {
3452 auto headers = splitField(part, '=');
3453 trim(headers.first);
3454 trim(headers.second);
3455
3456 unsigned int threadId = pdns_stou(headers.first);
3457 std::vector<std::string> cpus;
3458
3459 stringtok(cpus, headers.second, ",");
3460
3461 for(const auto& cpu : cpus) {
3462 int cpuId = std::stoi(cpu);
3463
3464 result[threadId].insert(cpuId);
3465 }
3466 }
3467 catch(const std::exception& e) {
3468 g_log<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
3469 }
3470 }
3471
3472 return result;
3473 }
3474
3475 static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
3476 {
3477 const auto& cpuMapping = cpusMap.find(n);
3478 if (cpuMapping != cpusMap.cend()) {
3479 int rc = mapThreadToCPUList(tid, cpuMapping->second);
3480 if (rc == 0) {
3481 g_log<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
3482 for (const auto cpu : cpuMapping->second) {
3483 g_log<<Logger::Info<<" "<<cpu;
3484 }
3485 g_log<<Logger::Info<<endl;
3486 }
3487 else {
3488 g_log<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
3489 for (const auto cpu : cpuMapping->second) {
3490 g_log<<Logger::Info<<" "<<cpu;
3491 }
3492 g_log<<Logger::Info<<strerror(rc)<<endl;
3493 }
3494 }
3495 }
3496
3497 #ifdef NOD_ENABLED
3498 static void setupNODThread()
3499 {
3500 if (g_nodEnabled) {
3501 uint32_t num_cells = ::arg().asNum("new-domain-db-size");
3502 t_nodDBp = std::make_shared<nod::NODDB>(num_cells);
3503 try {
3504 t_nodDBp->setCacheDir(::arg()["new-domain-history-dir"]);
3505 }
3506 catch (const PDNSException& e) {
3507 g_log<<Logger::Error<<"new-domain-history-dir (" << ::arg()["new-domain-history-dir"] << ") is not readable or does not exist"<<endl;
3508 _exit(1);
3509 }
3510 if (!t_nodDBp->init()) {
3511 g_log<<Logger::Error<<"Could not initialize domain tracking"<<endl;
3512 _exit(1);
3513 }
3514 std::thread t(nod::NODDB::startHousekeepingThread, t_nodDBp, std::this_thread::get_id());
3515 t.detach();
3516 g_nod_pbtag = ::arg()["new-domain-pb-tag"];
3517 }
3518 if (g_udrEnabled) {
3519 uint32_t num_cells = ::arg().asNum("unique-response-db-size");
3520 t_udrDBp = std::make_shared<nod::UniqueResponseDB>(num_cells);
3521 try {
3522 t_udrDBp->setCacheDir(::arg()["unique-response-history-dir"]);
3523 }
3524 catch (const PDNSException& e) {
3525 g_log<<Logger::Error<<"unique-response-history-dir (" << ::arg()["unique-response-history-dir"] << ") is not readable or does not exist"<<endl;
3526 _exit(1);
3527 }
3528 if (!t_udrDBp->init()) {
3529 g_log<<Logger::Error<<"Could not initialize unique response tracking"<<endl;
3530 _exit(1);
3531 }
3532 std::thread t(nod::UniqueResponseDB::startHousekeepingThread, t_udrDBp, std::this_thread::get_id());
3533 t.detach();
3534 g_udr_pbtag = ::arg()["unique-response-pb-tag"];
3535 }
3536 }
3537
3538 void parseNODWhitelist(const std::string& wlist)
3539 {
3540 vector<string> parts;
3541 stringtok(parts, wlist, ",; ");
3542 for(const auto& a : parts) {
3543 g_nodDomainWL.add(DNSName(a));
3544 }
3545 }
3546
3547 static void setupNODGlobal()
3548 {
3549 // Setup NOD subsystem
3550 g_nodEnabled = ::arg().mustDo("new-domain-tracking");
3551 g_nodLookupDomain = DNSName(::arg()["new-domain-lookup"]);
3552 g_nodLog = ::arg().mustDo("new-domain-log");
3553 parseNODWhitelist(::arg()["new-domain-whitelist"]);
3554
3555 // Setup Unique DNS Response subsystem
3556 g_udrEnabled = ::arg().mustDo("unique-response-tracking");
3557 g_udrLog = ::arg().mustDo("unique-response-log");
3558 }
3559 #endif /* NOD_ENABLED */
3560
3561 static int serviceMain(int argc, char*argv[])
3562 {
3563 g_log.setName(s_programname);
3564 g_log.disableSyslog(::arg().mustDo("disable-syslog"));
3565 g_log.setTimestamps(::arg().mustDo("log-timestamp"));
3566
3567 if(!::arg()["logging-facility"].empty()) {
3568 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
3569 if(val >= 0)
3570 g_log.setFacility(val);
3571 else
3572 g_log<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
3573 }
3574
3575 showProductVersion();
3576
3577 g_disthashseed=dns_random(0xffffffff);
3578
3579 checkLinuxIPv6Limits();
3580 try {
3581 vector<string> addrs;
3582 if(!::arg()["query-local-address6"].empty()) {
3583 SyncRes::s_doIPv6=true;
3584 g_log<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
3585
3586 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
3587 for(const string& addr : addrs) {
3588 g_localQueryAddresses6.push_back(ComboAddress(addr));
3589 }
3590 }
3591 else {
3592 g_log<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
3593 }
3594 addrs.clear();
3595 stringtok(addrs, ::arg()["query-local-address"], ", ;");
3596 for(const string& addr : addrs) {
3597 g_localQueryAddresses4.push_back(ComboAddress(addr));
3598 }
3599 }
3600 catch(std::exception& e) {
3601 g_log<<Logger::Error<<"Assigning local query addresses: "<<e.what();
3602 exit(99);
3603 }
3604
3605 // keep this ABOVE loadRecursorLuaConfig!
3606 if(::arg()["dnssec"]=="off")
3607 g_dnssecmode=DNSSECMode::Off;
3608 else if(::arg()["dnssec"]=="process-no-validate")
3609 g_dnssecmode=DNSSECMode::ProcessNoValidate;
3610 else if(::arg()["dnssec"]=="process")
3611 g_dnssecmode=DNSSECMode::Process;
3612 else if(::arg()["dnssec"]=="validate")
3613 g_dnssecmode=DNSSECMode::ValidateAll;
3614 else if(::arg()["dnssec"]=="log-fail")
3615 g_dnssecmode=DNSSECMode::ValidateForLog;
3616 else {
3617 g_log<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
3618 exit(1);
3619 }
3620
3621 g_signatureInceptionSkew = ::arg().asNum("signature-inception-skew");
3622 if (g_signatureInceptionSkew < 0) {
3623 g_log<<Logger::Error<<"A negative value for 'signature-inception-skew' is not allowed"<<endl;
3624 exit(1);
3625 }
3626
3627 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
3628 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
3629
3630 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
3631 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
3632
3633 luaConfigDelayedThreads delayedLuaThreads;
3634 try {
3635 loadRecursorLuaConfig(::arg()["lua-config-file"], delayedLuaThreads);
3636 }
3637 catch (PDNSException &e) {
3638 g_log<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
3639 exit(1);
3640 }
3641
3642 parseACLs();
3643 initPublicSuffixList(::arg()["public-suffix-list-file"]);
3644
3645 if(!::arg()["dont-query"].empty()) {
3646 vector<string> ips;
3647 stringtok(ips, ::arg()["dont-query"], ", ");
3648 ips.push_back("0.0.0.0");
3649 ips.push_back("::");
3650
3651 g_log<<Logger::Warning<<"Will not send queries to: ";
3652 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
3653 SyncRes::addDontQuery(*i);
3654 if(i!=ips.begin())
3655 g_log<<Logger::Warning<<", ";
3656 g_log<<Logger::Warning<<*i;
3657 }
3658 g_log<<Logger::Warning<<endl;
3659 }
3660
3661 g_quiet=::arg().mustDo("quiet");
3662
3663 /* this needs to be done before parseACLs(), which call broadcastFunction() */
3664 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
3665 if(g_weDistributeQueries) {
3666 g_log<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
3667 }
3668
3669 setupDelegationOnly();
3670 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
3671
3672 if(::arg()["trace"]=="fail") {
3673 SyncRes::setDefaultLogMode(SyncRes::Store);
3674 }
3675 else if(::arg().mustDo("trace")) {
3676 SyncRes::setDefaultLogMode(SyncRes::Log);
3677 ::arg().set("quiet")="no";
3678 g_quiet=false;
3679 g_dnssecLOG=true;
3680 }
3681 string myHostname = getHostname();
3682 if (myHostname == "UNKNOWN"){
3683 g_log<<Logger::Warning<<"Unable to get the hostname, NSID and id.server values will be empty"<<endl;
3684 myHostname = "";
3685 }
3686
3687 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
3688 SyncRes::s_minimumECSTTL = ::arg().asNum("ecs-minimum-ttl-override");
3689
3690 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
3691
3692 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
3693 SyncRes::s_maxbogusttl=::arg().asNum("max-cache-bogus-ttl");
3694 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
3695 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
3696 // Cap the packetcache-servfail-ttl to the packetcache-ttl
3697 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
3698 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
3699 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
3700 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
3701 SyncRes::s_serverID=::arg()["server-id"];
3702 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
3703 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
3704 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
3705 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
3706 if(SyncRes::s_serverID.empty()) {
3707 SyncRes::s_serverID = myHostname;
3708 }
3709
3710 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
3711 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
3712 SyncRes::clearECSStats();
3713
3714 if (!::arg().isEmpty("ecs-scope-zero-address")) {
3715 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
3716 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
3717 }
3718 else {
3719 bool found = false;
3720 for (const auto& addr : g_localQueryAddresses4) {
3721 if (!IsAnyAddress(addr)) {
3722 SyncRes::setECSScopeZeroAddress(Netmask(addr, 32));
3723 found = true;
3724 break;
3725 }
3726 }
3727 if (!found) {
3728 for (const auto& addr : g_localQueryAddresses6) {
3729 if (!IsAnyAddress(addr)) {
3730 SyncRes::setECSScopeZeroAddress(Netmask(addr, 128));
3731 found = true;
3732 break;
3733 }
3734 }
3735 if (!found) {
3736 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
3737 }
3738 }
3739 }
3740
3741 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
3742 SyncRes::parseEDNSSubnetAddFor(::arg()["ecs-add-for"]);
3743 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
3744
3745 g_XPFAcl.toMasks(::arg()["xpf-allow-from"]);
3746 g_xpfRRCode = ::arg().asNum("xpf-rr-code");
3747
3748 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
3749
3750 g_initialDomainMap = parseAuthAndForwards();
3751
3752 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3753
3754 g_logCommonErrors=::arg().mustDo("log-common-errors");
3755 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
3756
3757 g_anyToTcp = ::arg().mustDo("any-to-tcp");
3758 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
3759
3760 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
3761
3762 g_numDistributorThreads = ::arg().asNum("distributor-threads");
3763 g_numWorkerThreads = ::arg().asNum("threads");
3764 if (g_numWorkerThreads < 1) {
3765 g_log<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
3766 g_numWorkerThreads = 1;
3767 }
3768
3769 g_numThreads = g_numDistributorThreads + g_numWorkerThreads;
3770 g_maxMThreads = ::arg().asNum("max-mthreads");
3771
3772 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
3773
3774 g_statisticsInterval = ::arg().asNum("statistics-interval");
3775
3776 s_balancingFactor = ::arg().asDouble("distribution-load-factor");
3777
3778 #ifdef SO_REUSEPORT
3779 g_reusePort = ::arg().mustDo("reuseport");
3780 #endif
3781
3782 s_threadInfos.resize(g_numDistributorThreads + g_numWorkerThreads + /* handler */ 1);
3783
3784 if (g_reusePort) {
3785 if (g_weDistributeQueries) {
3786 /* first thread is the handler, then distributors */
3787 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3788 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
3789 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
3790 makeUDPServerSockets(deferredAdds);
3791 makeTCPServerSockets(deferredAdds, tcpSockets);
3792 }
3793 }
3794 else {
3795 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3796 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3797 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
3798 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
3799 makeUDPServerSockets(deferredAdds);
3800 makeTCPServerSockets(deferredAdds, tcpSockets);
3801 }
3802 }
3803 }
3804 else {
3805 std::set<int> tcpSockets;
3806 /* we don't have reuseport so we can only open one socket per
3807 listening addr:port and everyone will listen on it */
3808 makeUDPServerSockets(g_deferredAdds);
3809 makeTCPServerSockets(g_deferredAdds, tcpSockets);
3810
3811 /* every listener (so distributor if g_weDistributeQueries, workers otherwise)
3812 needs to listen to the shared sockets */
3813 if (g_weDistributeQueries) {
3814 /* first thread is the handler, then distributors */
3815 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3816 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3817 }
3818 }
3819 else {
3820 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3821 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3822 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3823 }
3824 }
3825 }
3826
3827 #ifdef NOD_ENABLED
3828 // Setup newly observed domain globals
3829 setupNODGlobal();
3830 #endif /* NOD_ENABLED */
3831
3832 int forks;
3833 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
3834 if(!fork()) // we are child
3835 break;
3836 }
3837
3838 if(::arg().mustDo("daemon")) {
3839 g_log<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
3840 g_log.toConsole(Logger::Critical);
3841 daemonize();
3842 }
3843 signal(SIGUSR1,usr1Handler);
3844 signal(SIGUSR2,usr2Handler);
3845 signal(SIGPIPE,SIG_IGN);
3846
3847 checkOrFixFDS();
3848
3849 #ifdef HAVE_LIBSODIUM
3850 if (sodium_init() == -1) {
3851 g_log<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
3852 exit(99);
3853 }
3854 #endif
3855
3856 openssl_thread_setup();
3857 openssl_seed();
3858 /* setup rng before chroot */
3859 dns_random_init();
3860
3861 if(::arg()["server-id"].empty()) {
3862 ::arg().set("server-id") = myHostname;
3863 }
3864
3865 int newgid=0;
3866 if(!::arg()["setgid"].empty())
3867 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
3868 int newuid=0;
3869 if(!::arg()["setuid"].empty())
3870 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
3871
3872 Utility::dropGroupPrivs(newuid, newgid);
3873
3874 if (!::arg()["chroot"].empty()) {
3875 #ifdef HAVE_SYSTEMD
3876 char *ns;
3877 ns = getenv("NOTIFY_SOCKET");
3878 if (ns != nullptr) {
3879 g_log<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
3880 exit(1);
3881 }
3882 #endif
3883 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
3884 g_log<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
3885 exit(1);
3886 }
3887 else
3888 g_log<<Logger::Info<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
3889 }
3890
3891 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
3892 if(!s_pidfname.empty())
3893 unlink(s_pidfname.c_str()); // remove possible old pid file
3894 writePid();
3895
3896 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
3897
3898 Utility::dropUserPrivs(newuid);
3899 try {
3900 /* we might still have capabilities remaining, for example if we have been started as root
3901 without --setuid (please don't do that) or as an unprivileged user with ambient capabilities
3902 like CAP_NET_BIND_SERVICE.
3903 */
3904 dropCapabilities();
3905 }
3906 catch(const std::exception& e) {
3907 g_log<<Logger::Warning<<e.what()<<endl;
3908 }
3909
3910 startLuaConfigDelayedThreads(delayedLuaThreads, g_luaconfs.getCopy().generation);
3911
3912 makeThreadPipes();
3913
3914 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
3915 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
3916 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
3917 s_maxUDPQueriesPerRound=::arg().asNum("max-udp-queries-per-round");
3918
3919 blacklistStats(StatComponent::API, ::arg()["stats-api-blacklist"]);
3920 blacklistStats(StatComponent::Carbon, ::arg()["stats-carbon-blacklist"]);
3921 blacklistStats(StatComponent::RecControl, ::arg()["stats-rec-control-blacklist"]);
3922 blacklistStats(StatComponent::SNMP, ::arg()["stats-snmp-blacklist"]);
3923
3924 if (::arg().mustDo("snmp-agent")) {
3925 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
3926 g_snmpAgent->run();
3927 }
3928
3929 int port = ::arg().asNum("udp-source-port-min");
3930 if(port < 1024 || port > 65535){
3931 g_log<<Logger::Error<<"Unable to launch, udp-source-port-min is not a valid port number"<<endl;
3932 exit(99); // this isn't going to fix itself either
3933 }
3934 s_minUdpSourcePort = port;
3935 port = ::arg().asNum("udp-source-port-max");
3936 if(port < 1024 || port > 65535 || port < s_minUdpSourcePort){
3937 g_log<<Logger::Error<<"Unable to launch, udp-source-port-max is not a valid port number or is smaller than udp-source-port-min"<<endl;
3938 exit(99); // this isn't going to fix itself either
3939 }
3940 s_maxUdpSourcePort = port;
3941 std::vector<string> parts {};
3942 stringtok(parts, ::arg()["udp-source-port-avoid"], ", ");
3943 for (const auto &part : parts)
3944 {
3945 port = std::stoi(part);
3946 if(port < 1024 || port > 65535){
3947 g_log<<Logger::Error<<"Unable to launch, udp-source-port-avoid contains an invalid port number: "<<part<<endl;
3948 exit(99); // this isn't going to fix itself either
3949 }
3950 s_avoidUdpSourcePorts.insert(port);
3951 }
3952
3953 unsigned int currentThreadId = 1;
3954 const auto cpusMap = parseCPUMap();
3955
3956 if(g_numThreads == 1) {
3957 g_log<<Logger::Warning<<"Operating unthreaded"<<endl;
3958 #ifdef HAVE_SYSTEMD
3959 sd_notify(0, "READY=1");
3960 #endif
3961
3962 /* This thread handles the web server, carbon, statistics and the control channel */
3963 auto& handlerInfos = s_threadInfos.at(0);
3964 handlerInfos.isHandler = true;
3965 handlerInfos.thread = std::thread(recursorThread, 0, "main");
3966
3967 setCPUMap(cpusMap, currentThreadId, pthread_self());
3968
3969 auto& infos = s_threadInfos.at(currentThreadId);
3970 infos.isListener = true;
3971 infos.isWorker = true;
3972 recursorThread(currentThreadId++, "worker");
3973 }
3974 else {
3975
3976 if (g_weDistributeQueries) {
3977 g_log<<Logger::Warning<<"Launching "<< g_numDistributorThreads <<" distributor threads"<<endl;
3978 for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
3979 auto& infos = s_threadInfos.at(currentThreadId);
3980 infos.isListener = true;
3981 infos.thread = std::thread(recursorThread, currentThreadId++, "distr");
3982
3983 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
3984 }
3985 }
3986
3987 g_log<<Logger::Warning<<"Launching "<< g_numWorkerThreads <<" worker threads"<<endl;
3988
3989 for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
3990 auto& infos = s_threadInfos.at(currentThreadId);
3991 infos.isListener = g_weDistributeQueries ? false : true;
3992 infos.isWorker = true;
3993 infos.thread = std::thread(recursorThread, currentThreadId++, "worker");
3994
3995 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
3996 }
3997
3998 #ifdef HAVE_SYSTEMD
3999 sd_notify(0, "READY=1");
4000 #endif
4001
4002 /* This thread handles the web server, carbon, statistics and the control channel */
4003 auto& infos = s_threadInfos.at(0);
4004 infos.isHandler = true;
4005 infos.thread = std::thread(recursorThread, 0, "web+stat");
4006
4007 s_threadInfos.at(0).thread.join();
4008 }
4009 return 0;
4010 }
4011
4012 static void* recursorThread(unsigned int n, const string& threadName)
4013 try
4014 {
4015 t_id=n;
4016 auto& threadInfo = s_threadInfos.at(t_id);
4017
4018 static string threadPrefix = "pdns-r/";
4019 setThreadName(threadPrefix + threadName);
4020
4021 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
4022 SyncRes::setDomainMap(g_initialDomainMap);
4023 t_allowFrom = g_initialAllowFrom;
4024 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
4025 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
4026 primeHints();
4027
4028 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
4029
4030 g_log<<Logger::Warning<<"Done priming cache with root hints"<<endl;
4031
4032 #ifdef NOD_ENABLED
4033 if (threadInfo.isWorker)
4034 setupNODThread();
4035 #endif /* NOD_ENABLED */
4036
4037 /* the listener threads handle TCP queries */
4038 if(threadInfo.isWorker || threadInfo.isListener) {
4039 try {
4040 if(!::arg()["lua-dns-script"].empty()) {
4041 t_pdl = std::make_shared<RecursorLua4>();
4042 t_pdl->loadFile(::arg()["lua-dns-script"]);
4043 g_log<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
4044 }
4045 }
4046 catch(std::exception &e) {
4047 g_log<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
4048 _exit(99);
4049 }
4050 }
4051
4052 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
4053 if(ringsize) {
4054 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4055 if(g_weDistributeQueries)
4056 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries") / g_numDistributorThreads);
4057 else
4058 t_remotes->set_capacity(ringsize);
4059 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4060 t_servfailremotes->set_capacity(ringsize);
4061 t_bogusremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4062 t_bogusremotes->set_capacity(ringsize);
4063 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4064 t_largeanswerremotes->set_capacity(ringsize);
4065 t_timeouts = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4066 t_timeouts->set_capacity(ringsize);
4067
4068 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4069 t_queryring->set_capacity(ringsize);
4070 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4071 t_servfailqueryring->set_capacity(ringsize);
4072 t_bogusqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4073 t_bogusqueryring->set_capacity(ringsize);
4074 }
4075
4076 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
4077 threadInfo.mt = MT.get();
4078
4079 #ifdef HAVE_PROTOBUF
4080 /* start protobuf export threads if needed */
4081 auto luaconfsLocal = g_luaconfs.getLocal();
4082 checkProtobufExport(luaconfsLocal);
4083 checkOutgoingProtobufExport(luaconfsLocal);
4084 #endif /* HAVE_PROTOBUF */
4085
4086 PacketID pident;
4087
4088 t_fdm=getMultiplexer();
4089
4090 if(threadInfo.isHandler) {
4091 if(::arg().mustDo("webserver")) {
4092 g_log<<Logger::Warning << "Enabling web server" << endl;
4093 try {
4094 new RecursorWebServer(t_fdm);
4095 }
4096 catch(PDNSException &e) {
4097 g_log<<Logger::Error<<"Exception: "<<e.reason<<endl;
4098 exit(99);
4099 }
4100 }
4101 g_log<<Logger::Info<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
4102 }
4103 else {
4104
4105 t_fdm->addReadFD(threadInfo.pipes.readToThread, handlePipeRequest);
4106 t_fdm->addReadFD(threadInfo.pipes.readQueriesToThread, handlePipeRequest);
4107
4108 if (threadInfo.isListener) {
4109 if (g_reusePort) {
4110 /* then every listener has its own FDs */
4111 for(const auto deferred : threadInfo.deferredAdds) {
4112 t_fdm->addReadFD(deferred.first, deferred.second);
4113 }
4114 }
4115 else {
4116 /* otherwise all listeners are listening on the same ones */
4117 for(const auto deferred : g_deferredAdds) {
4118 t_fdm->addReadFD(deferred.first, deferred.second);
4119 }
4120 }
4121 }
4122 }
4123
4124 registerAllStats();
4125
4126 if(threadInfo.isHandler) {
4127 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
4128 }
4129
4130 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
4131
4132 bool listenOnTCP(true);
4133
4134 time_t last_stat = 0;
4135 time_t last_carbon=0, last_lua_maintenance=0;
4136 time_t carbonInterval=::arg().asNum("carbon-interval");
4137 time_t luaMaintenanceInterval=::arg().asNum("lua-maintenance-interval");
4138 counter.store(0); // used to periodically execute certain tasks
4139 for(;;) {
4140 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
4141
4142 if(!(counter%500)) {
4143 MT->makeThread(houseKeeping, 0);
4144 }
4145
4146 if(!(counter%55)) {
4147 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
4148 expired_t expired=t_fdm->getTimeouts(g_now);
4149
4150 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
4151 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4152 if(g_logCommonErrors)
4153 g_log<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4154 t_fdm->removeReadFD(i->first);
4155 }
4156 }
4157
4158 counter++;
4159
4160 if(threadInfo.isHandler) {
4161 if(statsWanted || (g_statisticsInterval > 0 && (g_now.tv_sec - last_stat) >= g_statisticsInterval)) {
4162 doStats();
4163 last_stat = g_now.tv_sec;
4164 }
4165
4166 Utility::gettimeofday(&g_now, 0);
4167
4168 if((g_now.tv_sec - last_carbon) >= carbonInterval) {
4169 MT->makeThread(doCarbonDump, 0);
4170 last_carbon = g_now.tv_sec;
4171 }
4172 }
4173 if (t_pdl != nullptr) {
4174 // lua-dns-script directive is present, call the maintenance callback if needed
4175 /* remember that the listener threads handle TCP queries */
4176 if (threadInfo.isWorker || threadInfo.isListener) {
4177 // Only on threads processing queries
4178 if(g_now.tv_sec - last_lua_maintenance >= luaMaintenanceInterval) {
4179 t_pdl->maintenance();
4180 last_lua_maintenance = g_now.tv_sec;
4181 }
4182 }
4183 }
4184
4185 t_fdm->run(&g_now);
4186 // 'run' updates g_now for us
4187
4188 if(threadInfo.isListener) {
4189 if(listenOnTCP) {
4190 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
4191 for(const auto fd : threadInfo.tcpSockets) {
4192 t_fdm->removeReadFD(fd);
4193 }
4194 listenOnTCP=false;
4195 }
4196 }
4197 else {
4198 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
4199 for(const auto fd : threadInfo.tcpSockets) {
4200 t_fdm->addReadFD(fd, handleNewTCPQuestion);
4201 }
4202 listenOnTCP=true;
4203 }
4204 }
4205 }
4206 }
4207 }
4208 catch(PDNSException &ae) {
4209 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
4210 return 0;
4211 }
4212 catch(std::exception &e) {
4213 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
4214 return 0;
4215 }
4216 catch(...) {
4217 g_log<<Logger::Error<<"any other exception in main: "<<endl;
4218 return 0;
4219 }
4220
4221
4222 int main(int argc, char **argv)
4223 {
4224 g_argc = argc;
4225 g_argv = argv;
4226 g_stats.startupTime=time(0);
4227 Utility::srandom();
4228 versionSetProduct(ProductRecursor);
4229 reportBasicTypes();
4230 reportOtherTypes();
4231
4232 int ret = EXIT_SUCCESS;
4233
4234 try {
4235 ::arg().set("stack-size","stack size per mthread")="200000";
4236 ::arg().set("soa-minimum-ttl","Don't change")="0";
4237 ::arg().set("no-shuffle","Don't change")="off";
4238 ::arg().set("local-port","port to listen on")="53";
4239 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
4240 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
4241 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
4242 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
4243 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
4244 ::arg().set("signature-inception-skew", "Allow the signature inception to be off by this number of seconds")="60";
4245 ::arg().set("daemon","Operate as a daemon")="no";
4246 ::arg().setSwitch("write-pid","Write a PID file")="yes";
4247 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
4248 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
4249 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
4250 ::arg().set("log-common-errors","If we should log rather common errors")="no";
4251 ::arg().set("chroot","switch to chroot jail")="";
4252 ::arg().set("setgid","If set, change group id to this gid for more security")="";
4253 ::arg().set("setuid","If set, change user id to this uid for more security")="";
4254 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
4255 ::arg().set("threads", "Launch this number of threads")="2";
4256 ::arg().set("distributor-threads", "Launch this number of distributor threads, distributing queries to other threads")="0";
4257 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
4258 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
4259 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
4260 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
4261 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
4262 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
4263 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
4264 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
4265 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
4266 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
4267 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
4268 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
4269 ::arg().set("carbon-namespace", "If set overwrites the first part of the carbon string")="pdns";
4270 ::arg().set("carbon-instance", "If set overwrites the the instance name default")="recursor";
4271
4272 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
4273 ::arg().set("quiet","Suppress logging of questions and answers")="";
4274 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
4275 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
4276 ::arg().set("socket-owner","Owner of socket")="";
4277 ::arg().set("socket-group","Group of socket")="";
4278 ::arg().set("socket-mode", "Permissions for socket")="";
4279
4280 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
4281 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
4282 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
4283 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
4284 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
4285 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
4286 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
4287 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
4288 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
4289 ::arg().set("hint-file", "If set, load root hints from this file")="";
4290 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
4291 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
4292 ::arg().set("max-cache-bogus-ttl", "maximum number of seconds to keep a Bogus (positive or negative) cached entry in memory")="3600";
4293 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
4294 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
4295 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
4296 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
4297 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname, set custom or 'disabled'")="";
4298 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
4299 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
4300 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
4301 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
4302 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
4303 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4304 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
4305 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
4306 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4307 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
4308 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
4309 ::arg().set("lua-config-file", "More powerful configuration options")="";
4310
4311 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
4312 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
4313 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
4314 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
4315 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
4316 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
4317 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
4318 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
4319 ::arg().set("lua-maintenance-interval", "Number of seconds between calls to the lua user defined maintenance() function")="1";
4320 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
4321 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
4322 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
4323 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
4324 ::arg().set("ecs-minimum-ttl-override", "Set under adverse conditions, a minimum TTL for records in ECS-specific answers")="0";
4325 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
4326 ::arg().set("ecs-add-for", "List of client netmasks for which EDNS Client Subnet will be added")="0.0.0.0/0, ::/0, " LOCAL_NETS_INVERSE;
4327 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
4328 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
4329 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
4330 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
4331 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
4332 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
4333 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
4334 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1232";
4335 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1232";
4336 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
4337 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
4338 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
4339 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
4340 ::arg().set("max-udp-queries-per-round", "Maximum number of UDP queries processed per recvmsg() round, before returning back to normal processing")="10000";
4341
4342 ::arg().set("include-dir","Include *.conf files from this directory")="";
4343 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
4344
4345 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
4346
4347 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
4348 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
4349
4350 std::string defaultBlacklistedStats = "cache-bytes, packetcache-bytes, special-memory-usage";
4351 for (size_t idx = 0; idx < 32; idx++) {
4352 defaultBlacklistedStats += ", ecs-v4-response-bits-" + std::to_string(idx + 1);
4353 }
4354 for (size_t idx = 0; idx < 128; idx++) {
4355 defaultBlacklistedStats += ", ecs-v6-response-bits-" + std::to_string(idx + 1);
4356 }
4357 ::arg().set("stats-api-blacklist", "List of statistics that are disabled when retrieving the complete list of statistics via the API")=defaultBlacklistedStats;
4358 ::arg().set("stats-carbon-blacklist", "List of statistics that are prevented from being exported via Carbon")=defaultBlacklistedStats;
4359 ::arg().set("stats-rec-control-blacklist", "List of statistics that are prevented from being exported via rec_control get-all")=defaultBlacklistedStats;
4360 ::arg().set("stats-snmp-blacklist", "List of statistics that are prevented from being exported via SNMP")=defaultBlacklistedStats;
4361
4362 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
4363 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
4364
4365 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
4366
4367 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
4368
4369 ::arg().set("xpf-allow-from","XPF information is only processed from these subnets")="";
4370 ::arg().set("xpf-rr-code","XPF option code to use")="0";
4371
4372 ::arg().set("udp-source-port-min", "Minimum UDP port to bind on")="1024";
4373 ::arg().set("udp-source-port-max", "Maximum UDP port to bind on")="65535";
4374 ::arg().set("udp-source-port-avoid", "List of comma separated UDP port number to avoid")="11211";
4375 ::arg().set("rng", "Specify random number generator to use. Valid values are auto,sodium,openssl,getrandom,arc4random,urandom.")="auto";
4376 ::arg().set("public-suffix-list-file", "Path to the Public Suffix List file, if any")="";
4377 ::arg().set("distribution-load-factor", "The load factor used when PowerDNS is distributing queries to worker threads")="0.0";
4378 #ifdef NOD_ENABLED
4379 ::arg().set("new-domain-tracking", "Track newly observed domains (i.e. never seen before).")="no";
4380 ::arg().set("new-domain-log", "Log newly observed domains.")="yes";
4381 ::arg().set("new-domain-lookup", "Perform a DNS lookup newly observed domains as a subdomain of the configured domain")="";
4382 ::arg().set("new-domain-history-dir", "Persist new domain tracking data here to persist between restarts")=string(NODCACHEDIR)+"/nod";
4383 ::arg().set("new-domain-whitelist", "List of domains (and implicitly all subdomains) which will never be considered a new domain")="";
4384 ::arg().set("new-domain-db-size", "Size of the DB used to track new domains in terms of number of cells. Defaults to 67108864")="67108864";
4385 ::arg().set("new-domain-pb-tag", "If protobuf is configured, the tag to use for messages containing newly observed domains. Defaults to 'pdns-nod'")="pdns-nod";
4386 ::arg().set("unique-response-tracking", "Track unique responses (tuple of query name, type and RR).")="no";
4387 ::arg().set("unique-response-log", "Log unique responses")="yes";
4388 ::arg().set("unique-response-history-dir", "Persist unique response tracking data here to persist between restarts")=string(NODCACHEDIR)+"/udr";
4389 ::arg().set("unique-response-db-size", "Size of the DB used to track unique responses in terms of number of cells. Defaults to 67108864")="67108864";
4390 ::arg().set("unique-response-pb-tag", "If protobuf is configured, the tag to use for messages containing unique DNS responses. Defaults to 'pdns-udr'")="pdns-udr";
4391 #endif /* NOD_ENABLED */
4392 ::arg().setCmd("help","Provide a helpful message");
4393 ::arg().setCmd("version","Print version string");
4394 ::arg().setCmd("config","Output blank configuration");
4395 g_log.toConsole(Logger::Info);
4396 ::arg().laxParse(argc,argv); // do a lax parse
4397
4398 string configname=::arg()["config-dir"]+"/recursor.conf";
4399 if(::arg()["config-name"]!="") {
4400 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
4401 s_programname+="-"+::arg()["config-name"];
4402 }
4403 cleanSlashes(configname);
4404
4405 if(!::arg().getCommands().empty()) {
4406 cerr<<"Fatal: non-option on the command line, perhaps a '--setting=123' statement missed the '='?"<<endl;
4407 exit(99);
4408 }
4409
4410 if(::arg().mustDo("config")) {
4411 cout<<::arg().configstring()<<endl;
4412 exit(0);
4413 }
4414
4415 if(!::arg().file(configname.c_str()))
4416 g_log<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
4417
4418 ::arg().parse(argc,argv);
4419
4420 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() ) {
4421 g_log<<Logger::Error<<"Using chroot and enabling the API is not possible"<<endl;
4422 exit(EXIT_FAILURE);
4423 }
4424
4425 if (::arg()["socket-dir"].empty()) {
4426 if (::arg()["chroot"].empty())
4427 ::arg().set("socket-dir") = LOCALSTATEDIR;
4428 else
4429 ::arg().set("socket-dir") = "/";
4430 }
4431
4432 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
4433
4434 if(::arg().asNum("threads")==1) {
4435 if (::arg().mustDo("pdns-distributes-queries")) {
4436 g_log<<Logger::Warning<<"Only one thread, no need to distribute queries ourselves"<<endl;
4437 ::arg().set("pdns-distributes-queries")="no";
4438 }
4439 }
4440
4441 if(::arg().mustDo("pdns-distributes-queries") && ::arg().asNum("distributor-threads") <= 0) {
4442 g_log<<Logger::Warning<<"Asked to run with pdns-distributes-queries set but no distributor threads, raising to 1"<<endl;
4443 ::arg().set("distributor-threads")="1";
4444 }
4445
4446 if (!::arg().mustDo("pdns-distributes-queries")) {
4447 ::arg().set("distributor-threads")="0";
4448 }
4449
4450 if(::arg().mustDo("help")) {
4451 cout<<"syntax:"<<endl<<endl;
4452 cout<<::arg().helpstring(::arg()["help"])<<endl;
4453 exit(0);
4454 }
4455 if(::arg().mustDo("version")) {
4456 showProductVersion();
4457 showBuildConfiguration();
4458 exit(0);
4459 }
4460
4461 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
4462
4463 if (logUrgency < Logger::Error)
4464 logUrgency = Logger::Error;
4465 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
4466 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
4467 }
4468 g_log.setLoglevel(logUrgency);
4469 g_log.toConsole(logUrgency);
4470
4471 serviceMain(argc, argv);
4472 }
4473 catch(PDNSException &ae) {
4474 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
4475 ret=EXIT_FAILURE;
4476 }
4477 catch(std::exception &e) {
4478 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
4479 ret=EXIT_FAILURE;
4480 }
4481 catch(...) {
4482 g_log<<Logger::Error<<"any other exception in main: "<<endl;
4483 ret=EXIT_FAILURE;
4484 }
4485
4486 return ret;
4487 }