]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/pdns_recursor.cc
rec: Add a distribution-pipe-buffer-size setting
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25
26 #include <netdb.h>
27 #include <sys/stat.h>
28 #include <unistd.h>
29 #ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
30 #include <boost/container/flat_set.hpp>
31 #endif
32 #include "ws-recursor.hh"
33 #include <thread>
34 #include "threadname.hh"
35 #include "recpacketcache.hh"
36 #include "utility.hh"
37 #include "dns_random.hh"
38 #ifdef HAVE_LIBSODIUM
39 #include <sodium.h>
40 #endif
41 #include "opensslsigners.hh"
42 #include <iostream>
43 #include <errno.h>
44 #include <boost/static_assert.hpp>
45 #include <map>
46 #include <set>
47 #include "recursor_cache.hh"
48 #include "cachecleaner.hh"
49 #include <stdio.h>
50 #include <signal.h>
51 #include <stdlib.h>
52 #include "misc.hh"
53 #include "mtasker.hh"
54 #include <utility>
55 #include "arguments.hh"
56 #include "syncres.hh"
57 #include <fcntl.h>
58 #include <fstream>
59 #include "sortlist.hh"
60 #include "sstuff.hh"
61 #include <boost/tuple/tuple.hpp>
62 #include <boost/tuple/tuple_comparison.hpp>
63 #include <boost/shared_array.hpp>
64 #include <boost/function.hpp>
65 #include <boost/algorithm/string.hpp>
66 #ifdef MALLOC_TRACE
67 #include "malloctrace.hh"
68 #endif
69 #include <netinet/tcp.h>
70 #include "capabilities.hh"
71 #include "dnsparser.hh"
72 #include "dnswriter.hh"
73 #include "dnsrecords.hh"
74 #include "zoneparser-tng.hh"
75 #include "rec_channel.hh"
76 #include "logger.hh"
77 #include "iputils.hh"
78 #include "mplexer.hh"
79 #include "config.h"
80 #include "lua-recursor4.hh"
81 #include "version.hh"
82 #include "responsestats.hh"
83 #include "secpoll-recursor.hh"
84 #include "dnsname.hh"
85 #include "filterpo.hh"
86 #include "rpzloader.hh"
87 #include "validate-recursor.hh"
88 #include "rec-lua-conf.hh"
89 #include "ednsoptions.hh"
90 #include "gettime.hh"
91 #include "pubsuffix.hh"
92 #ifdef NOD_ENABLED
93 #include "nod.hh"
94 #endif /* NOD_ENABLED */
95
96 #include "rec-protobuf.hh"
97 #include "rec-snmp.hh"
98
99 #ifdef HAVE_SYSTEMD
100 #include <systemd/sd-daemon.h>
101 #endif
102
103 #include "namespaces.hh"
104
105 #ifdef HAVE_PROTOBUF
106 #include "uuid-utils.hh"
107 #endif
108
109 #include "xpf.hh"
110
111 typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
112
113 static thread_local std::shared_ptr<RecursorLua4> t_pdl;
114 static thread_local unsigned int t_id = 0;
115 static thread_local std::shared_ptr<Regex> t_traceRegex;
116 static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
117 #ifdef HAVE_PROTOBUF
118 static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_protobufServers{nullptr};
119 static thread_local uint64_t t_protobufServersGeneration;
120 static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_outgoingProtobufServers{nullptr};
121 static thread_local uint64_t t_outgoingProtobufServersGeneration;
122 #endif /* HAVE_PROTOBUF */
123
124 thread_local std::unique_ptr<MT_t> MT; // the big MTasker
125 thread_local std::unique_ptr<MemRecursorCache> t_RC;
126 thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
127 thread_local FDMultiplexer* t_fdm{nullptr};
128 thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes, t_bogusremotes;
129 thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring, t_bogusqueryring;
130 thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
131 #ifdef NOD_ENABLED
132 thread_local std::shared_ptr<nod::NODDB> t_nodDBp;
133 thread_local std::shared_ptr<nod::UniqueResponseDB> t_udrDBp;
134 #endif /* NOD_ENABLED */
135 __thread struct timeval g_now; // timestamp, updated (too) frequently
136
137 typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
138
139 // for communicating with our threads
140 // effectively readonly after startup
141 struct RecThreadInfo
142 {
143 struct ThreadPipeSet
144 {
145 int writeToThread{-1};
146 int readToThread{-1};
147 int writeFromThread{-1};
148 int readFromThread{-1};
149 int writeQueriesToThread{-1}; // this one is non-blocking
150 int readQueriesToThread{-1};
151 };
152
153 /* FD corresponding to TCP sockets this thread is listening
154 on.
155 These FDs are also in deferredAdds when we have one
156 socket per listener, and in g_deferredAdds instead. */
157 std::set<int> tcpSockets;
158 /* FD corresponding to listening sockets if we have one socket per
159 listener (with reuseport), otherwise all listeners share the
160 same FD and g_deferredAdds is then used instead */
161 deferredAdd_t deferredAdds;
162 struct ThreadPipeSet pipes;
163 std::thread thread;
164 MT_t* mt{nullptr};
165 uint64_t numberOfDistributedQueries{0};
166 /* handle the web server, carbon, statistics and the control channel */
167 bool isHandler{false};
168 /* accept incoming queries (and distributes them to the workers if pdns-distributes-queries is set) */
169 bool isListener{false};
170 /* process queries */
171 bool isWorker{false};
172 };
173
174 /* first we have the handler thread, t_id == 0 (some other
175 helper threads like SNMP might have t_id == 0 as well)
176 then the distributor threads if any
177 and finally the workers */
178 static std::vector<RecThreadInfo> s_threadInfos;
179 /* without reuseport, all listeners share the same sockets */
180 static deferredAdd_t g_deferredAdds;
181
182 typedef vector<int> tcpListenSockets_t;
183 typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
184
185 static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
186 static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
187 static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
188 static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
189 static AtomicCounter counter;
190 static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
191 static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
192 static NetmaskGroup g_XPFAcl;
193 static size_t g_tcpMaxQueriesPerConn;
194 static size_t s_maxUDPQueriesPerRound;
195 static uint64_t g_latencyStatSize;
196 static uint32_t g_disthashseed;
197 static unsigned int g_maxTCPPerClient;
198 static unsigned int g_maxMThreads;
199 static unsigned int g_numDistributorThreads;
200 static unsigned int g_numWorkerThreads;
201 static int g_tcpTimeout;
202 static uint16_t g_udpTruncationThreshold;
203 static uint16_t g_xpfRRCode{0};
204 static std::atomic<bool> statsWanted;
205 static std::atomic<bool> g_quiet;
206 static bool g_logCommonErrors;
207 static bool g_anyToTcp;
208 static bool g_weDistributeQueries; // if true, 1 or more threads listen on the incoming query sockets and distribute them to workers
209 static bool g_reusePort{false};
210 static bool g_gettagNeedsEDNSOptions{false};
211 static time_t g_statisticsInterval;
212 static bool g_useIncomingECS;
213 static bool g_useKernelTimestamp;
214 std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
215 #ifdef NOD_ENABLED
216 static bool g_nodEnabled;
217 static DNSName g_nodLookupDomain;
218 static bool g_nodLog;
219 static SuffixMatchNode g_nodDomainWL;
220 static std::string g_nod_pbtag;
221 static bool g_udrEnabled;
222 static bool g_udrLog;
223 static std::string g_udr_pbtag;
224 #endif /* NOD_ENABLED */
225 #ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
226 static boost::container::flat_set<uint16_t> s_avoidUdpSourcePorts;
227 #else
228 static std::set<uint16_t> s_avoidUdpSourcePorts;
229 #endif
230 static uint16_t s_minUdpSourcePort;
231 static uint16_t s_maxUdpSourcePort;
232 static double s_balancingFactor;
233
234 RecursorControlChannel s_rcc; // only active in the handler thread
235 RecursorStats g_stats;
236 string s_programname="pdns_recursor";
237 string s_pidfname;
238 bool g_lowercaseOutgoing;
239 unsigned int g_networkTimeoutMsec;
240 unsigned int g_numThreads;
241 uint16_t g_outgoingEDNSBufsize;
242 bool g_logRPZChanges{false};
243
244 // Used in the Syncres to not throttle certain servers
245 GlobalStateHolder<SuffixMatchNode> g_dontThrottleNames;
246 GlobalStateHolder<NetmaskGroup> g_dontThrottleNetmasks;
247
248 #define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
249 #define LOCAL_NETS_INVERSE "!127.0.0.0/8, !10.0.0.0/8, !100.64.0.0/10, !169.254.0.0/16, !192.168.0.0/16, !172.16.0.0/12, !::1/128, !fc00::/7, !fe80::/10"
250 // Bad Nets taken from both:
251 // http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
252 // and
253 // http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
254 // where such a network may not be considered a valid destination
255 #define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
256 #define DONT_QUERY LOCAL_NETS ", " BAD_NETS
257
258 //! used to send information to a newborn mthread
259 struct DNSComboWriter {
260 DNSComboWriter(const std::string& query, const struct timeval& now): d_mdp(true, query), d_now(now), d_query(query)
261 {
262 }
263
264 DNSComboWriter(const std::string& query, const struct timeval& now, std::vector<std::string>&& policyTags, LuaContext::LuaObject&& data): d_mdp(true, query), d_now(now), d_query(query), d_policyTags(std::move(policyTags)), d_data(std::move(data))
265 {
266 }
267
268 void setRemote(const ComboAddress& sa)
269 {
270 d_remote=sa;
271 }
272
273 void setSource(const ComboAddress& sa)
274 {
275 d_source=sa;
276 }
277
278 void setLocal(const ComboAddress& sa)
279 {
280 d_local=sa;
281 }
282
283 void setDestination(const ComboAddress& sa)
284 {
285 d_destination=sa;
286 }
287
288 void setSocket(int sock)
289 {
290 d_socket=sock;
291 }
292
293 string getRemote() const
294 {
295 if (d_source == d_remote) {
296 return d_source.toStringWithPort();
297 }
298 return d_source.toStringWithPort() + " (proxied by " + d_remote.toStringWithPort() + ")";
299 }
300
301 MOADNSParser d_mdp;
302 struct timeval d_now;
303 /* Remote client, might differ from d_source
304 in case of XPF, in which case d_source holds
305 the IP of the client and d_remote of the proxy
306 */
307 ComboAddress d_remote;
308 ComboAddress d_source;
309 /* Destination address, might differ from
310 d_destination in case of XPF, in which case
311 d_destination holds the IP of the proxy and
312 d_local holds our own. */
313 ComboAddress d_local;
314 ComboAddress d_destination;
315 #ifdef HAVE_PROTOBUF
316 boost::uuids::uuid d_uuid;
317 string d_requestorId;
318 string d_deviceId;
319 struct timeval d_kernelTimestamp{0,0};
320 #endif
321 std::string d_query;
322 std::vector<std::string> d_policyTags;
323 LuaContext::LuaObject d_data;
324 EDNSSubnetOpts d_ednssubnet;
325 shared_ptr<TCPConnection> d_tcpConnection;
326 int d_socket;
327 unsigned int d_tag{0};
328 uint32_t d_qhash{0};
329 uint32_t d_ttlCap{std::numeric_limits<uint32_t>::max()};
330 uint16_t d_ecsBegin{0};
331 uint16_t d_ecsEnd{0};
332 bool d_variable{false};
333 bool d_ecsFound{false};
334 bool d_ecsParsed{false};
335 bool d_tcp;
336 };
337
338 MT_t* getMT()
339 {
340 return MT ? MT.get() : nullptr;
341 }
342
343 ArgvMap &arg()
344 {
345 static ArgvMap theArg;
346 return theArg;
347 }
348
349 unsigned int getRecursorThreadId()
350 {
351 return t_id;
352 }
353
354 int getMTaskerTID()
355 {
356 return MT->getTid();
357 }
358
359 static bool isDistributorThread()
360 {
361 if (t_id == 0) {
362 return false;
363 }
364
365 return g_weDistributeQueries && s_threadInfos.at(t_id).isListener;
366 }
367
368 static bool isHandlerThread()
369 {
370 if (t_id == 0) {
371 return true;
372 }
373
374 return s_threadInfos.at(t_id).isHandler;
375 }
376
377 static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
378
379 // -1 is error, 0 is timeout, 1 is success
380 int asendtcp(const string& data, Socket* sock)
381 {
382 PacketID pident;
383 pident.sock=sock;
384 pident.outMSG=data;
385
386 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
387 string packet;
388
389 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
390
391 if(!ret || ret==-1) { // timeout
392 t_fdm->removeWriteFD(sock->getHandle());
393 }
394 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
395 return -1;
396 }
397 return ret;
398 }
399
400 static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
401
402 // -1 is error, 0 is timeout, 1 is success
403 int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
404 {
405 data.clear();
406 PacketID pident;
407 pident.sock=sock;
408 pident.inNeeded=len;
409 pident.inIncompleteOkay=incompleteOkay;
410 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
411
412 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
413 if(!ret || ret==-1) { // timeout
414 t_fdm->removeReadFD(sock->getHandle());
415 }
416 else if(data.empty()) {// error, EOF or other
417 return -1;
418 }
419
420 return ret;
421 }
422
423 static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
424 {
425 PacketID pident=*any_cast<PacketID>(&var);
426 char resp[512];
427 ComboAddress fromaddr;
428 socklen_t addrlen=sizeof(fromaddr);
429
430 ssize_t ret=recvfrom(fd, resp, sizeof(resp), 0, (sockaddr *)&fromaddr, &addrlen);
431 if (fromaddr != pident.remote) {
432 g_log<<Logger::Notice<<"Response received from the wrong remote host ("<<fromaddr.toStringWithPort()<<" instead of "<<pident.remote.toStringWithPort()<<"), discarding"<<endl;
433
434 }
435
436 t_fdm->removeReadFD(fd);
437 if(ret >= 0) {
438 string data(resp, (size_t) ret);
439 MT->sendEvent(pident, &data);
440 }
441 else {
442 string empty;
443 MT->sendEvent(pident, &empty);
444 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
445 }
446 }
447 string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
448 {
449 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
450 s.setNonBlocking();
451 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
452
453 s.bind(local);
454 s.connect(dest);
455 s.send(query);
456
457 PacketID pident;
458 pident.sock=&s;
459 pident.remote=dest;
460 pident.type=0;
461 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
462
463 string data;
464
465 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
466
467 if(!ret || ret==-1) { // timeout
468 t_fdm->removeReadFD(s.getHandle());
469 }
470 else if(data.empty()) {// error, EOF or other
471 // we could special case this
472 return data;
473 }
474 return data;
475 }
476
477 //! pick a random query local address
478 ComboAddress getQueryLocalAddress(int family, uint16_t port)
479 {
480 ComboAddress ret;
481 if(family==AF_INET) {
482 if(g_localQueryAddresses4.empty())
483 ret = g_local4;
484 else
485 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
486 ret.sin4.sin_port = htons(port);
487 }
488 else {
489 if(g_localQueryAddresses6.empty())
490 ret = g_local6;
491 else
492 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
493
494 ret.sin6.sin6_port = htons(port);
495 }
496 return ret;
497 }
498
499 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
500
501 static void setSocketBuffer(int fd, int optname, uint32_t size)
502 {
503 uint32_t psize=0;
504 socklen_t len=sizeof(psize);
505
506 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
507 g_log<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
508 return;
509 }
510
511 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
512 g_log<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
513 }
514
515
516 static void setSocketReceiveBuffer(int fd, uint32_t size)
517 {
518 setSocketBuffer(fd, SO_RCVBUF, size);
519 }
520
521 static void setSocketSendBuffer(int fd, uint32_t size)
522 {
523 setSocketBuffer(fd, SO_SNDBUF, size);
524 }
525
526
527 // you can ask this class for a UDP socket to send a query from
528 // this socket is not yours, don't even think about deleting it
529 // but after you call 'returnSocket' on it, don't assume anything anymore
530 class UDPClientSocks
531 {
532 unsigned int d_numsocks;
533 public:
534 UDPClientSocks() : d_numsocks(0)
535 {
536 }
537
538 typedef set<int> socks_t;
539 socks_t d_socks;
540
541 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
542 int getSocket(const ComboAddress& toaddr, int* fd)
543 {
544 *fd=makeClientSocket(toaddr.sin4.sin_family);
545 if(*fd < 0) // temporary error - receive exception otherwise
546 return -2;
547
548 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
549 int err = errno;
550 // returnSocket(*fd);
551 try {
552 closesocket(*fd);
553 }
554 catch(const PDNSException& e) {
555 g_log<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
556 }
557
558 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
559 return -2;
560 return -1;
561 }
562
563 d_socks.insert(*fd);
564 d_numsocks++;
565 return 0;
566 }
567
568 void returnSocket(int fd)
569 {
570 socks_t::iterator i=d_socks.find(fd);
571 if(i==d_socks.end()) {
572 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd)+") not in the pool");
573 }
574 returnSocketLocked(i);
575 }
576
577 // return a socket to the pool, or simply erase it
578 void returnSocketLocked(socks_t::iterator& i)
579 {
580 if(i==d_socks.end()) {
581 throw PDNSException("Trying to return a socket not in the pool");
582 }
583 try {
584 t_fdm->removeReadFD(*i);
585 }
586 catch(FDMultiplexerException& e) {
587 // we sometimes return a socket that has not yet been assigned to t_fdm
588 }
589 try {
590 closesocket(*i);
591 }
592 catch(const PDNSException& e) {
593 g_log<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
594 }
595
596 d_socks.erase(i++);
597 --d_numsocks;
598 }
599
600 // returns -1 for errors which might go away, throws for ones that won't
601 static int makeClientSocket(int family)
602 {
603 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
604
605 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
606 return ret;
607
608 if(ret<0)
609 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
610
611 // setCloseOnExec(ret); // we're not going to exec
612
613 int tries=10;
614 ComboAddress sin;
615 while(--tries) {
616 uint16_t port;
617
618 if(tries==1) // fall back to kernel 'random'
619 port = 0;
620 else {
621 do {
622 port = s_minUdpSourcePort + dns_random(s_maxUdpSourcePort - s_minUdpSourcePort + 1);
623 }
624 while (s_avoidUdpSourcePorts.count(port));
625 }
626
627 sin=getQueryLocalAddress(family, port); // does htons for us
628
629 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
630 break;
631 }
632 if(!tries)
633 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
634
635 setReceiveSocketErrors(ret, family);
636 setNonBlocking(ret);
637 return ret;
638 }
639 };
640
641 static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
642
643 /* these two functions are used by LWRes */
644 // -2 is OS error, -1 is error that depends on the remote, > 0 is success
645 int asendto(const char *data, size_t len, int flags,
646 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
647 {
648
649 PacketID pident;
650 pident.domain = domain;
651 pident.remote = toaddr;
652 pident.type = qtype;
653
654 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
655 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
656
657 for(; chain.first != chain.second; chain.first++) {
658 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
659 /*
660 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
661 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
662 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
663 */
664 chain.first->key.chain.insert(id); // we can chain
665 *fd=-1; // gets used in waitEvent / sendEvent later on
666 return 1;
667 }
668 }
669
670 int ret=t_udpclientsocks->getSocket(toaddr, fd);
671 if(ret < 0)
672 return ret;
673
674 pident.fd=*fd;
675 pident.id=id;
676
677 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
678 ret = send(*fd, data, len, 0);
679
680 int tmp = errno;
681
682 if(ret < 0)
683 t_udpclientsocks->returnSocket(*fd);
684
685 errno = tmp; // this is for logging purposes only
686 return ret;
687 }
688
689 // -1 is error, 0 is timeout, 1 is success
690 int arecvfrom(std::string& packet, int flags, const ComboAddress& fromaddr, size_t *d_len,
691 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
692 {
693 static optional<unsigned int> nearMissLimit;
694 if(!nearMissLimit)
695 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
696
697 PacketID pident;
698 pident.fd=fd;
699 pident.id=id;
700 pident.domain=domain;
701 pident.type = qtype;
702 pident.remote=fromaddr;
703
704 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
705
706 if(ret > 0) {
707 if(packet.empty()) // means "error"
708 return -1;
709
710 *d_len=packet.size();
711
712 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
713 g_log<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
714 g_stats.spoofCount++;
715 return -1;
716 }
717 }
718 else {
719 if(fd >= 0)
720 t_udpclientsocks->returnSocket(fd);
721 }
722 return ret;
723 }
724
725 static void writePid(void)
726 {
727 if(!::arg().mustDo("write-pid"))
728 return;
729 ofstream of(s_pidfname.c_str(), std::ios_base::app);
730 if(of)
731 of<< Utility::getpid() <<endl;
732 else
733 g_log<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
734 }
735
736 TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : data(2, 0), d_remote(addr), d_fd(fd)
737 {
738 ++s_currentConnections;
739 (*t_tcpClientCounts)[d_remote]++;
740 }
741
742 TCPConnection::~TCPConnection()
743 {
744 try {
745 if(closesocket(d_fd) < 0)
746 g_log<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
747 }
748 catch(const PDNSException& e) {
749 g_log<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
750 }
751
752 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
753 t_tcpClientCounts->erase(d_remote);
754 --s_currentConnections;
755 }
756
757 AtomicCounter TCPConnection::s_currentConnections;
758
759 static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
760
761 // the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
762 static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
763 {
764 if(packetsize > 1000 && t_largeanswerremotes)
765 t_largeanswerremotes->push_back(remote);
766 switch(res) {
767 case RCode::ServFail:
768 if(t_servfailremotes) {
769 t_servfailremotes->push_back(remote);
770 if(query && t_servfailqueryring) // packet cache
771 t_servfailqueryring->push_back(make_pair(*query, qtype));
772 }
773 g_stats.servFails++;
774 break;
775 case RCode::NXDomain:
776 g_stats.nxDomains++;
777 break;
778 case RCode::NoError:
779 g_stats.noErrors++;
780 break;
781 }
782 }
783
784 static string makeLoginfo(const std::unique_ptr<DNSComboWriter>& dc)
785 try
786 {
787 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->getRemote())+")";
788 }
789 catch(...)
790 {
791 return "Exception making error message for exception";
792 }
793
794 #ifdef HAVE_PROTOBUF
795 static void protobufLogQuery(uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId)
796 {
797 if (!t_protobufServers) {
798 return;
799 }
800
801 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
802 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
803 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
804 message.setServerIdentity(SyncRes::s_serverID);
805 message.setEDNSSubnet(ednssubnet, ednssubnet.isIpv4() ? maskV4 : maskV6);
806 message.setRequestorId(requestorId);
807 message.setDeviceId(deviceId);
808
809 if (!policyTags.empty()) {
810 message.setPolicyTags(policyTags);
811 }
812
813 // cerr <<message.toDebugString()<<endl;
814 std::string str;
815 message.serialize(str);
816
817 for (auto& server : *t_protobufServers) {
818 server->queueData(str);
819 }
820 }
821
822 static void protobufLogResponse(const RecProtoBufMessage& message)
823 {
824 if (!t_protobufServers) {
825 return;
826 }
827
828 // cerr <<message.toDebugString()<<endl;
829 std::string str;
830 message.serialize(str);
831
832 for (auto& server : *t_protobufServers) {
833 server->queueData(str);
834 }
835 }
836 #endif
837
838 /**
839 * Chases the CNAME provided by the PolicyCustom RPZ policy.
840 *
841 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
842 * @param qtype: The QType of the original query
843 * @param sr: A SyncRes
844 * @param res: An integer that will contain the RCODE of the lookup we do
845 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
846 */
847 static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
848 {
849 if (spoofed.d_type == QType::CNAME) {
850 bool oldWantsRPZ = sr.getWantsRPZ();
851 sr.setWantsRPZ(false);
852 vector<DNSRecord> ans;
853 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, QClass::IN, ans);
854 for (const auto& rec : ans) {
855 if(rec.d_place == DNSResourceRecord::ANSWER) {
856 ret.push_back(rec);
857 }
858 }
859 // Reset the RPZ state of the SyncRes
860 sr.setWantsRPZ(oldWantsRPZ);
861 }
862 }
863
864 static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, uint32_t ttlCap, const uint16_t maxAnswerSize)
865 {
866 pw.startRecord(rec.d_name, rec.d_type, (rec.d_ttl > ttlCap ? ttlCap : rec.d_ttl), rec.d_class, rec.d_place);
867
868 if(rec.d_type != QType::OPT) // their TTL ain't real
869 minTTL = min(minTTL, rec.d_ttl);
870
871 rec.d_content->toPacket(pw);
872 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
873 pw.rollback();
874 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
875 pw.getHeader()->tc=1;
876 pw.truncate();
877 }
878 return false;
879 }
880
881 return true;
882 }
883
884 #ifdef HAVE_PROTOBUF
885 static std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> startProtobufServers(const ProtobufExportConfig& config)
886 {
887 auto result = std::make_shared<std::vector<std::unique_ptr<RemoteLogger>>>();
888
889 for (const auto& server : config.servers) {
890 try {
891 result->emplace_back(new RemoteLogger(server, config.timeout, 100*config.maxQueuedEntries, config.reconnectWaitTime, config.asyncConnect));
892 }
893 catch(const std::exception& e) {
894 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.what()<<endl;
895 }
896 catch(const PDNSException& e) {
897 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.reason<<endl;
898 }
899 }
900
901 return result;
902 }
903
904 static bool checkProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
905 {
906 if (!luaconfsLocal->protobufExportConfig.enabled) {
907 if (t_protobufServers) {
908 for (auto& server : *t_protobufServers) {
909 server->stop();
910 }
911 t_protobufServers.reset();
912 }
913
914 return false;
915 }
916
917 /* if the server was not running, or if it was running according to a
918 previous configuration */
919 if (!t_protobufServers ||
920 t_protobufServersGeneration < luaconfsLocal->generation) {
921
922 if (t_protobufServers) {
923 for (auto& server : *t_protobufServers) {
924 server->stop();
925 }
926 }
927 t_protobufServers.reset();
928
929 t_protobufServers = startProtobufServers(luaconfsLocal->protobufExportConfig);
930 t_protobufServersGeneration = luaconfsLocal->generation;
931 }
932
933 return true;
934 }
935
936 static bool checkOutgoingProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
937 {
938 if (!luaconfsLocal->outgoingProtobufExportConfig.enabled) {
939 if (t_outgoingProtobufServers) {
940 for (auto& server : *t_outgoingProtobufServers) {
941 server->stop();
942 }
943 }
944 t_outgoingProtobufServers.reset();
945
946 return false;
947 }
948
949 /* if the server was not running, or if it was running according to a
950 previous configuration */
951 if (!t_outgoingProtobufServers ||
952 t_outgoingProtobufServersGeneration < luaconfsLocal->generation) {
953
954 if (t_outgoingProtobufServers) {
955 for (auto& server : *t_outgoingProtobufServers) {
956 server->stop();
957 }
958 }
959 t_outgoingProtobufServers.reset();
960
961 t_outgoingProtobufServers = startProtobufServers(luaconfsLocal->outgoingProtobufExportConfig);
962 t_outgoingProtobufServersGeneration = luaconfsLocal->generation;
963 }
964
965 return true;
966 }
967 #endif /* HAVE_PROTOBUF */
968
969 #ifdef NOD_ENABLED
970 static bool nodCheckNewDomain(const DNSName& dname)
971 {
972 static const QType qt(QType::A);
973 static const uint16_t qc(QClass::IN);
974 bool ret = false;
975 // First check the (sub)domain isn't whitelisted for NOD purposes
976 if (!g_nodDomainWL.check(dname)) {
977 // Now check the NODDB (note this is probablistic so can have FNs/FPs)
978 if (t_nodDBp && t_nodDBp->isNewDomain(dname)) {
979 if (g_nodLog) {
980 // This should probably log to a dedicated log file
981 g_log<<Logger::Notice<<"Newly observed domain nod="<<dname.toLogString()<<endl;
982 }
983 if (!(g_nodLookupDomain.isRoot())) {
984 // Send a DNS A query to <domain>.g_nodLookupDomain
985 DNSName qname = dname;
986 vector<DNSRecord> dummy;
987 qname += g_nodLookupDomain;
988 directResolve(qname, qt, qc, dummy);
989 }
990 ret = true;
991 }
992 }
993 return ret;
994 }
995
996 static void nodAddDomain(const DNSName& dname)
997 {
998 // Don't bother adding domains on the nod whitelist
999 if (!g_nodDomainWL.check(dname)) {
1000 if (t_nodDBp) {
1001 // This keeps the nod info up to date
1002 t_nodDBp->addDomain(dname);
1003 }
1004 }
1005 }
1006
1007 static bool udrCheckUniqueDNSRecord(const DNSName& dname, uint16_t qtype, const DNSRecord& record)
1008 {
1009 bool ret = false;
1010 if (record.d_place == DNSResourceRecord::ANSWER ||
1011 record.d_place == DNSResourceRecord::ADDITIONAL) {
1012 // Create a string that represent a triplet of (qname, qtype and RR[type, name, content])
1013 std::stringstream ss;
1014 ss << dname.toDNSStringLC() << ":" << qtype << ":" << qtype << ":" << record.d_type << ":" << record.d_name.toDNSStringLC() << ":" << record.d_content->getZoneRepresentation();
1015 if (t_udrDBp && t_udrDBp->isUniqueResponse(ss.str())) {
1016 if (g_udrLog) {
1017 // This should also probably log to a dedicated file.
1018 g_log<<Logger::Notice<<"Unique response observed: qname="<<dname.toLogString()<<" qtype="<<QType(qtype).getName()<< " rrtype=" << QType(record.d_type).getName() << " rrname=" << record.d_name.toLogString() << " rrcontent=" << record.d_content->getZoneRepresentation() << endl;
1019 }
1020 ret = true;
1021 }
1022 }
1023 return ret;
1024 }
1025 #endif /* NOD_ENABLED */
1026
1027 static void startDoResolve(void *p)
1028 {
1029 auto dc=std::unique_ptr<DNSComboWriter>(reinterpret_cast<DNSComboWriter*>(p));
1030 try {
1031 if (t_queryring)
1032 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
1033
1034 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
1035 EDNSOpts edo;
1036 std::vector<pair<uint16_t, string> > ednsOpts;
1037 bool variableAnswer = dc->d_variable;
1038 bool haveEDNS=false;
1039 #ifdef NOD_ENABLED
1040 bool hasUDR = false;
1041 #endif /* NOD_ENABLED */
1042 DNSPacketWriter::optvect_t returnedEdnsOptions; // Here we stuff all the options for the return packet
1043 uint8_t ednsExtRCode = 0;
1044 if(getEDNSOpts(dc->d_mdp, &edo)) {
1045 haveEDNS=true;
1046 if (edo.d_version != 0) {
1047 ednsExtRCode = ERCode::BADVERS;
1048 }
1049
1050 if(!dc->d_tcp) {
1051 /* rfc6891 6.2.3:
1052 "Values lower than 512 MUST be treated as equal to 512."
1053 */
1054 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
1055 }
1056 ednsOpts = edo.d_options;
1057 maxanswersize -= 11; // EDNS header size
1058
1059 for (const auto& o : edo.d_options) {
1060 if (o.first == EDNSOptionCode::ECS && g_useIncomingECS && !dc->d_ecsParsed) {
1061 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
1062 } else if (o.first == EDNSOptionCode::NSID) {
1063 const static string mode_server_id = ::arg()["server-id"];
1064 if(mode_server_id != "disabled" && !mode_server_id.empty() &&
1065 maxanswersize > (2 + 2 + mode_server_id.size())) {
1066 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::NSID, mode_server_id));
1067 variableAnswer = true; // Can't packetcache an answer with NSID
1068 // Option Code and Option Length are both 2
1069 maxanswersize -= 2 + 2 + mode_server_id.size();
1070 }
1071 }
1072 }
1073 }
1074 /* perhaps there was no EDNS or no ECS but by now we looked */
1075 dc->d_ecsParsed = true;
1076 vector<DNSRecord> ret;
1077 vector<uint8_t> packet;
1078
1079 auto luaconfsLocal = g_luaconfs.getLocal();
1080 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
1081 bool wantsRPZ(true);
1082 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
1083 bool logResponse = false;
1084 #ifdef HAVE_PROTOBUF
1085 if (checkProtobufExport(luaconfsLocal)) {
1086 logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
1087 Netmask requestorNM(dc->d_source, dc->d_source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1088 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
1089 pbMessage = RecProtoBufMessage(RecProtoBufMessage::Response, dc->d_uuid, &requestor, &dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass, dc->d_mdp.d_header.id, dc->d_tcp, 0);
1090 pbMessage->setServerIdentity(SyncRes::s_serverID);
1091 pbMessage->setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1092 }
1093 #endif /* HAVE_PROTOBUF */
1094
1095 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
1096
1097 pw.getHeader()->aa=0;
1098 pw.getHeader()->ra=1;
1099 pw.getHeader()->qr=1;
1100 pw.getHeader()->tc=0;
1101 pw.getHeader()->id=dc->d_mdp.d_header.id;
1102 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
1103 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
1104
1105 /* This is the lowest TTL seen in the records of the response,
1106 so we can't cache it for longer than this value.
1107 If we have a TTL cap, this value can't be larger than the
1108 cap no matter what. */
1109 uint32_t minTTL = dc->d_ttlCap;
1110
1111 SyncRes sr(dc->d_now);
1112
1113 bool DNSSECOK=false;
1114 if(t_pdl) {
1115 sr.setLuaEngine(t_pdl);
1116 }
1117 if(g_dnssecmode != DNSSECMode::Off) {
1118 sr.setDoDNSSEC(true);
1119
1120 // Does the requestor want DNSSEC records?
1121 if(edo.d_extFlags & EDNSOpts::DNSSECOK) {
1122 DNSSECOK=true;
1123 g_stats.dnssecQueries++;
1124 }
1125 if (dc->d_mdp.d_header.cd) {
1126 /* Per rfc6840 section 5.9, "When processing a request with
1127 the Checking Disabled (CD) bit set, a resolver SHOULD attempt
1128 to return all response data, even data that has failed DNSSEC
1129 validation. */
1130 ++g_stats.dnssecCheckDisabledQueries;
1131 }
1132 if (dc->d_mdp.d_header.ad) {
1133 /* Per rfc6840 section 5.7, "the AD bit in a query as a signal
1134 indicating that the requester understands and is interested in the
1135 value of the AD bit in the response. This allows a requester to
1136 indicate that it understands the AD bit without also requesting
1137 DNSSEC data via the DO bit. */
1138 ++g_stats.dnssecAuthenticDataQueries;
1139 }
1140 } else {
1141 // Ignore the client-set CD flag
1142 pw.getHeader()->cd=0;
1143 }
1144 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
1145
1146 #ifdef HAVE_PROTOBUF
1147 sr.setInitialRequestId(dc->d_uuid);
1148 sr.setOutgoingProtobufServers(t_outgoingProtobufServers);
1149 #endif
1150
1151 sr.setQuerySource(dc->d_remote, g_useIncomingECS && !dc->d_ednssubnet.source.empty() ? boost::optional<const EDNSSubnetOpts&>(dc->d_ednssubnet) : boost::none);
1152
1153 bool tracedQuery=false; // we could consider letting Lua know about this too
1154 bool shouldNotValidate = false;
1155
1156 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
1157 int res = RCode::NoError;
1158 DNSFilterEngine::Policy appliedPolicy;
1159 std::vector<DNSRecord> spoofed;
1160 RecursorLua4::DNSQuestion dq(dc->d_source, dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ, logResponse);
1161 dq.ednsFlags = &edo.d_extFlags;
1162 dq.ednsOptions = &ednsOpts;
1163 dq.tag = dc->d_tag;
1164 dq.discardedPolicies = &sr.d_discardedPolicies;
1165 dq.policyTags = &dc->d_policyTags;
1166 dq.appliedPolicy = &appliedPolicy;
1167 dq.currentRecords = &ret;
1168 dq.dh = &dc->d_mdp.d_header;
1169 dq.data = dc->d_data;
1170 #ifdef HAVE_PROTOBUF
1171 dq.requestorId = dc->d_requestorId;
1172 dq.deviceId = dc->d_deviceId;
1173 #endif
1174
1175 if(ednsExtRCode != 0) {
1176 goto sendit;
1177 }
1178
1179 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
1180 pw.getHeader()->tc = 1;
1181 res = 0;
1182 variableAnswer = true;
1183 goto sendit;
1184 }
1185
1186 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
1187 sr.setLogMode(SyncRes::Store);
1188 tracedQuery=true;
1189 }
1190
1191
1192 if(!g_quiet || tracedQuery) {
1193 g_log<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
1194 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
1195 if(!dc->d_ednssubnet.source.empty()) {
1196 g_log<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
1197 }
1198 g_log<<endl;
1199 }
1200
1201 sr.setId(MT->getTid());
1202 if(!dc->d_mdp.d_header.rd)
1203 sr.setCacheOnly();
1204
1205 if (t_pdl) {
1206 t_pdl->prerpz(dq, res);
1207 }
1208
1209 // Check if the query has a policy attached to it
1210 if (wantsRPZ) {
1211 appliedPolicy = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_source, sr.d_discardedPolicies);
1212 }
1213
1214 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
1215 if(!t_pdl || !t_pdl->preresolve(dq, res)) {
1216
1217 sr.setWantsRPZ(wantsRPZ);
1218 if(wantsRPZ) {
1219 switch(appliedPolicy.d_kind) {
1220 case DNSFilterEngine::PolicyKind::NoAction:
1221 break;
1222 case DNSFilterEngine::PolicyKind::Drop:
1223 g_stats.policyDrops++;
1224 g_stats.policyResults[appliedPolicy.d_kind]++;
1225 return;
1226 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1227 g_stats.policyResults[appliedPolicy.d_kind]++;
1228 res=RCode::NXDomain;
1229 goto haveAnswer;
1230 case DNSFilterEngine::PolicyKind::NODATA:
1231 g_stats.policyResults[appliedPolicy.d_kind]++;
1232 res=RCode::NoError;
1233 goto haveAnswer;
1234 case DNSFilterEngine::PolicyKind::Custom:
1235 g_stats.policyResults[appliedPolicy.d_kind]++;
1236 res=RCode::NoError;
1237 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1238 for (const auto& dr : spoofed) {
1239 ret.push_back(dr);
1240 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1241 }
1242 goto haveAnswer;
1243 case DNSFilterEngine::PolicyKind::Truncate:
1244 if(!dc->d_tcp) {
1245 g_stats.policyResults[appliedPolicy.d_kind]++;
1246 res=RCode::NoError;
1247 pw.getHeader()->tc=1;
1248 goto haveAnswer;
1249 }
1250 break;
1251 }
1252 }
1253
1254 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
1255 try {
1256 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
1257 shouldNotValidate = sr.wasOutOfBand();
1258 }
1259 catch(ImmediateServFailException &e) {
1260 if(g_logCommonErrors)
1261 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
1262 res = RCode::ServFail;
1263 }
1264
1265 dq.validationState = sr.getValidationState();
1266
1267 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
1268 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
1269 appliedPolicy = sr.d_appliedPolicy;
1270 g_stats.policyResults[appliedPolicy.d_kind]++;
1271 switch(appliedPolicy.d_kind) {
1272 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
1273 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
1274 case DNSFilterEngine::PolicyKind::Drop:
1275 g_stats.policyDrops++;
1276 return;
1277 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1278 ret.clear();
1279 res=RCode::NXDomain;
1280 goto haveAnswer;
1281
1282 case DNSFilterEngine::PolicyKind::NODATA:
1283 ret.clear();
1284 res=RCode::NoError;
1285 goto haveAnswer;
1286
1287 case DNSFilterEngine::PolicyKind::Truncate:
1288 if(!dc->d_tcp) {
1289 ret.clear();
1290 res=RCode::NoError;
1291 pw.getHeader()->tc=1;
1292 goto haveAnswer;
1293 }
1294 break;
1295
1296 case DNSFilterEngine::PolicyKind::Custom:
1297 ret.clear();
1298 res=RCode::NoError;
1299 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1300 for (const auto& dr : spoofed) {
1301 ret.push_back(dr);
1302 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1303 }
1304 goto haveAnswer;
1305 }
1306 }
1307
1308 if (wantsRPZ) {
1309 appliedPolicy = luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies);
1310 }
1311
1312 if(t_pdl) {
1313 if(res == RCode::NoError) {
1314 auto i=ret.cbegin();
1315 for(; i!= ret.cend(); ++i)
1316 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
1317 break;
1318 if(i == ret.cend() && t_pdl->nodata(dq, res))
1319 shouldNotValidate = true;
1320
1321 }
1322 else if(res == RCode::NXDomain && t_pdl->nxdomain(dq, res))
1323 shouldNotValidate = true;
1324
1325 if(t_pdl->postresolve(dq, res))
1326 shouldNotValidate = true;
1327 }
1328
1329 if (wantsRPZ) { //XXX This block is repeated, see above
1330 g_stats.policyResults[appliedPolicy.d_kind]++;
1331 switch(appliedPolicy.d_kind) {
1332 case DNSFilterEngine::PolicyKind::NoAction:
1333 break;
1334 case DNSFilterEngine::PolicyKind::Drop:
1335 g_stats.policyDrops++;
1336 return;
1337 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1338 ret.clear();
1339 res=RCode::NXDomain;
1340 goto haveAnswer;
1341
1342 case DNSFilterEngine::PolicyKind::NODATA:
1343 ret.clear();
1344 res=RCode::NoError;
1345 goto haveAnswer;
1346
1347 case DNSFilterEngine::PolicyKind::Truncate:
1348 if(!dc->d_tcp) {
1349 ret.clear();
1350 res=RCode::NoError;
1351 pw.getHeader()->tc=1;
1352 goto haveAnswer;
1353 }
1354 break;
1355
1356 case DNSFilterEngine::PolicyKind::Custom:
1357 ret.clear();
1358 res=RCode::NoError;
1359 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1360 for (const auto& dr : spoofed) {
1361 ret.push_back(dr);
1362 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1363 }
1364 goto haveAnswer;
1365 }
1366 }
1367 }
1368 haveAnswer:;
1369 if(res == PolicyDecision::DROP) {
1370 g_stats.policyDrops++;
1371 return;
1372 }
1373 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1374 {
1375 string trace(sr.getTrace());
1376 if(!trace.empty()) {
1377 vector<string> lines;
1378 boost::split(lines, trace, boost::is_any_of("\n"));
1379 for(const string& line : lines) {
1380 if(!line.empty())
1381 g_log<<Logger::Warning<< line << endl;
1382 }
1383 }
1384 }
1385
1386 if(res == -1) {
1387 pw.getHeader()->rcode=RCode::ServFail;
1388 // no commit here, because no record
1389 g_stats.servFails++;
1390 }
1391 else {
1392 pw.getHeader()->rcode=res;
1393
1394 // Does the validation mode or query demand validation?
1395 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1396 try {
1397 if(sr.doLog()) {
1398 g_log<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<endl;
1399 }
1400
1401 auto state = sr.getValidationState();
1402
1403 if(state == Secure) {
1404 if(sr.doLog()) {
1405 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates correctly"<<endl;
1406 }
1407
1408 // Is the query source interested in the value of the ad-bit?
1409 if (dc->d_mdp.d_header.ad || DNSSECOK)
1410 pw.getHeader()->ad=1;
1411 }
1412 else if(state == Insecure) {
1413 if(sr.doLog()) {
1414 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Insecure"<<endl;
1415 }
1416
1417 pw.getHeader()->ad=0;
1418 }
1419 else if(state == Bogus) {
1420 if(t_bogusremotes)
1421 t_bogusremotes->push_back(dc->d_source);
1422 if(t_bogusqueryring)
1423 t_bogusqueryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
1424 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
1425 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Bogus"<<endl;
1426 }
1427
1428 // Does the query or validation mode sending out a SERVFAIL on validation errors?
1429 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
1430 if(sr.doLog()) {
1431 g_log<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
1432 }
1433
1434 pw.getHeader()->rcode=RCode::ServFail;
1435 goto sendit;
1436 } else {
1437 if(sr.doLog()) {
1438 g_log<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
1439 }
1440 }
1441 }
1442 }
1443 catch(ImmediateServFailException &e) {
1444 if(g_logCommonErrors)
1445 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
1446 pw.getHeader()->rcode=RCode::ServFail;
1447 goto sendit;
1448 }
1449 }
1450
1451 if(ret.size()) {
1452 orderAndShuffle(ret);
1453 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_source)) {
1454 stable_sort(ret.begin(), ret.end(), *sl);
1455 variableAnswer=true;
1456 }
1457 }
1458
1459 bool needCommit = false;
1460 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
1461 if( ! DNSSECOK &&
1462 ( i->d_type == QType::NSEC3 ||
1463 (
1464 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1465 (
1466 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1467 i->d_place != DNSResourceRecord::ANSWER
1468 )
1469 )
1470 )
1471 ) {
1472 continue;
1473 }
1474
1475 if (!addRecordToPacket(pw, *i, minTTL, dc->d_ttlCap, maxanswersize)) {
1476 needCommit = false;
1477 break;
1478 }
1479 needCommit = true;
1480
1481 #ifdef NOD_ENABLED
1482 bool udr = false;
1483 if (g_udrEnabled) {
1484 udr = udrCheckUniqueDNSRecord(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, *i);
1485 if (!hasUDR && udr)
1486 hasUDR = true;
1487 }
1488 #endif /* NOD ENABLED */
1489
1490 #ifdef HAVE_PROTOBUF
1491 if (t_protobufServers) {
1492 #ifdef NOD_ENABLED
1493 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes, udr);
1494 #else
1495 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes);
1496 #endif /* NOD_ENABLED */
1497 }
1498 #endif
1499 }
1500 if(needCommit)
1501 pw.commit();
1502 }
1503 sendit:;
1504
1505 if(g_useIncomingECS && dc->d_ecsFound && !sr.wasVariable() && !variableAnswer) {
1506 // cerr<<"Stuffing in a 0 scope because answer is static"<<endl;
1507 EDNSSubnetOpts eo;
1508 eo.source = dc->d_ednssubnet.source;
1509 ComboAddress sa;
1510 sa.reset();
1511 sa.sin4.sin_family = eo.source.getNetwork().sin4.sin_family;
1512 eo.scope = Netmask(sa, 0);
1513
1514 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::ECS, makeEDNSSubnetOptsString(eo)));
1515 }
1516
1517 if (haveEDNS) {
1518 /* we try to add the EDNS OPT RR even for truncated answers,
1519 as rfc6891 states:
1520 "The minimal response MUST be the DNS header, question section, and an
1521 OPT record. This MUST also occur when a truncated response (using
1522 the DNS header's TC bit) is returned."
1523 */
1524 pw.addOpt(512, ednsExtRCode, DNSSECOK ? EDNSOpts::DNSSECOK : 0, returnedEdnsOptions);
1525 pw.commit();
1526 }
1527
1528 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
1529 updateResponseStats(res, dc->d_source, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1530 #ifdef NOD_ENABLED
1531 bool nod = false;
1532 if (g_nodEnabled) {
1533 if (nodCheckNewDomain(dc->d_mdp.d_qname))
1534 nod = true;
1535 }
1536 #endif /* NOD_ENABLED */
1537 #ifdef HAVE_PROTOBUF
1538 if (t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && (!appliedPolicy.d_name || appliedPolicy.d_name->empty()) && dc->d_policyTags.empty())) {
1539 pbMessage->setBytes(packet.size());
1540 pbMessage->setResponseCode(pw.getHeader()->rcode);
1541 if (appliedPolicy.d_name) {
1542 pbMessage->setAppliedPolicy(*appliedPolicy.d_name);
1543 pbMessage->setAppliedPolicyType(appliedPolicy.d_type);
1544 }
1545 pbMessage->setPolicyTags(dc->d_policyTags);
1546 if (g_useKernelTimestamp && dc->d_kernelTimestamp.tv_sec) {
1547 pbMessage->setQueryTime(dc->d_kernelTimestamp.tv_sec, dc->d_kernelTimestamp.tv_usec);
1548 }
1549 else {
1550 pbMessage->setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1551 }
1552 pbMessage->setRequestorId(dq.requestorId);
1553 pbMessage->setDeviceId(dq.deviceId);
1554 #ifdef NOD_ENABLED
1555 if (g_nodEnabled) {
1556 if (nod) {
1557 pbMessage->setNOD(true);
1558 pbMessage->addPolicyTag(g_nod_pbtag);
1559 }
1560 if (hasUDR) {
1561 pbMessage->addPolicyTag(g_udr_pbtag);
1562 }
1563 }
1564 #endif /* NOD_ENABLED */
1565 protobufLogResponse(*pbMessage);
1566 #ifdef NOD_ENABLED
1567 if (g_nodEnabled) {
1568 pbMessage->setNOD(false);
1569 pbMessage->clearUDR();
1570 if (nod)
1571 pbMessage->removePolicyTag(g_nod_pbtag);
1572 if (hasUDR)
1573 pbMessage->removePolicyTag(g_udr_pbtag);
1574 }
1575 #endif /* NOD_ENABLED */
1576 }
1577 #endif
1578 if(!dc->d_tcp) {
1579 struct msghdr msgh;
1580 struct iovec iov;
1581 char cbuf[256];
1582 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
1583 msgh.msg_control=NULL;
1584
1585 if(g_fromtosockets.count(dc->d_socket)) {
1586 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
1587 }
1588 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
1589 g_log<<Logger::Warning<<"Sending UDP reply to client "<<dc->getRemote()<<" failed with: "<<strerror(errno)<<endl;
1590
1591 if(variableAnswer || sr.wasVariable()) {
1592 g_stats.variableResponses++;
1593 }
1594 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
1595 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, std::move(dc->d_query), dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
1596 string((const char*)&*packet.begin(), packet.size()),
1597 g_now.tv_sec,
1598 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
1599 min(minTTL,SyncRes::s_packetcachettl),
1600 dq.validationState,
1601 dc->d_ecsBegin,
1602 dc->d_ecsEnd,
1603 std::move(pbMessage));
1604 }
1605 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
1606 }
1607 else {
1608 char buf[2];
1609 buf[0]=packet.size()/256;
1610 buf[1]=packet.size()%256;
1611
1612 Utility::iovec iov[2];
1613
1614 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1615 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
1616
1617 int wret=Utility::writev(dc->d_socket, iov, 2);
1618 bool hadError=true;
1619
1620 if(wret == 0)
1621 g_log<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
1622 else if(wret < 0 )
1623 g_log<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
1624 else if((unsigned int)wret != 2 + packet.size())
1625 g_log<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
1626 else
1627 hadError=false;
1628
1629 // update tcp connection status, either by closing or moving to 'BYTE0'
1630
1631 if(hadError) {
1632 // no need to remove us from FDM, we weren't there
1633 dc->d_socket = -1;
1634 }
1635 else {
1636 dc->d_tcpConnection->queriesCount++;
1637 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1638 dc->d_socket = -1;
1639 }
1640 else {
1641 dc->d_tcpConnection->state=TCPConnection::BYTE0;
1642 Utility::gettimeofday(&g_now, 0); // needs to be updated
1643 struct timeval ttd = g_now;
1644 ttd.tv_sec += g_tcpTimeout;
1645
1646 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection, &ttd);
1647 }
1648 }
1649 }
1650 float spent=makeFloat(sr.getNow()-dc->d_now);
1651 if(!g_quiet) {
1652 g_log<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1653 g_log<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
1654 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
1655 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
1656
1657 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1658 g_log<< ", dnssec="<<vStates[sr.getValidationState()];
1659 }
1660
1661 g_log<<endl;
1662
1663 }
1664
1665 if (sr.d_outqueries || sr.d_authzonequeries) {
1666 t_RC->cacheMisses++;
1667 }
1668 else {
1669 t_RC->cacheHits++;
1670 }
1671
1672 if(spent < 0.001)
1673 g_stats.answers0_1++;
1674 else if(spent < 0.010)
1675 g_stats.answers1_10++;
1676 else if(spent < 0.1)
1677 g_stats.answers10_100++;
1678 else if(spent < 1.0)
1679 g_stats.answers100_1000++;
1680 else
1681 g_stats.answersSlow++;
1682
1683 uint64_t newLat=(uint64_t)(spent*1000000);
1684 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
1685 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
1686 // no worries, we do this for packet cache hits elsewhere
1687
1688 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
1689 if(ourtime < 1)
1690 g_stats.ourtime0_1++;
1691 else if(ourtime < 2)
1692 g_stats.ourtime1_2++;
1693 else if(ourtime < 4)
1694 g_stats.ourtime2_4++;
1695 else if(ourtime < 8)
1696 g_stats.ourtime4_8++;
1697 else if(ourtime < 16)
1698 g_stats.ourtime8_16++;
1699 else if(ourtime < 32)
1700 g_stats.ourtime16_32++;
1701 else {
1702 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1703 g_stats.ourtimeSlow++;
1704 }
1705 if(ourtime >= 0.0) {
1706 newLat=ourtime*1000; // usec
1707 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
1708 }
1709 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
1710 }
1711 catch(PDNSException &ae) {
1712 g_log<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
1713 }
1714 catch(const MOADNSException &mde) {
1715 g_log<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<mde.what()<<endl;
1716 }
1717 catch(std::exception& e) {
1718 g_log<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
1719
1720 // Luawrapper nests the exception from Lua, so we unnest it here
1721 try {
1722 std::rethrow_if_nested(e);
1723 } catch(const std::exception& ne) {
1724 g_log<<". Extra info: "<<ne.what();
1725 } catch(...) {}
1726
1727 g_log<<endl;
1728 }
1729 catch(...) {
1730 g_log<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
1731 }
1732
1733 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
1734 }
1735
1736 static void makeControlChannelSocket(int processNum=-1)
1737 {
1738 string sockname=::arg()["socket-dir"]+"/"+s_programname;
1739 if(processNum >= 0)
1740 sockname += "."+std::to_string(processNum);
1741 sockname+=".controlsocket";
1742 s_rcc.listen(sockname);
1743
1744 int sockowner = -1;
1745 int sockgroup = -1;
1746
1747 if (!::arg().isEmpty("socket-group"))
1748 sockgroup=::arg().asGid("socket-group");
1749 if (!::arg().isEmpty("socket-owner"))
1750 sockowner=::arg().asUid("socket-owner");
1751
1752 if (sockgroup > -1 || sockowner > -1) {
1753 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1754 unixDie("Failed to chown control socket");
1755 }
1756 }
1757
1758 // do mode change if socket-mode is given
1759 if(!::arg().isEmpty("socket-mode")) {
1760 mode_t sockmode=::arg().asMode("socket-mode");
1761 if(chmod(sockname.c_str(), sockmode) < 0) {
1762 unixDie("Failed to chmod control socket");
1763 }
1764 }
1765 }
1766
1767 static void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass,
1768 bool& foundECS, EDNSSubnetOpts* ednssubnet, EDNSOptionViewMap* options,
1769 bool& foundXPF, ComboAddress* xpfSource, ComboAddress* xpfDest)
1770 {
1771 const bool lookForXPF = xpfSource != nullptr && g_xpfRRCode != 0;
1772 const bool lookForECS = ednssubnet != nullptr;
1773 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(question.c_str());
1774 size_t questionLen = question.length();
1775 unsigned int consumed=0;
1776 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1777
1778 size_t pos= sizeof(dnsheader)+consumed+4;
1779 const size_t headerSize = /* root */ 1 + sizeof(dnsrecordheader);
1780 const uint16_t arcount = ntohs(dh->arcount);
1781
1782 for (uint16_t arpos = 0; arpos < arcount && questionLen > (pos + headerSize) && ((lookForECS && !foundECS) || (lookForXPF && !foundXPF)); arpos++) {
1783 if (question.at(pos) != 0) {
1784 /* not an OPT or a XPF, bye. */
1785 return;
1786 }
1787
1788 pos += 1;
1789 const dnsrecordheader* drh = reinterpret_cast<const dnsrecordheader*>(&question.at(pos));
1790 pos += sizeof(dnsrecordheader);
1791
1792 if (pos >= questionLen) {
1793 return;
1794 }
1795
1796 /* OPT root label (1) followed by type (2) */
1797 if(lookForECS && ntohs(drh->d_type) == QType::OPT) {
1798 if (!options) {
1799 char* ecsStart = nullptr;
1800 size_t ecsLen = 0;
1801 /* we need to pass the record len */
1802 int res = getEDNSOption(const_cast<char*>(reinterpret_cast<const char*>(&question.at(pos - sizeof(drh->d_clen)))), questionLen - pos + sizeof(drh->d_clen), EDNSOptionCode::ECS, &ecsStart, &ecsLen);
1803 if (res == 0 && ecsLen > 4) {
1804 EDNSSubnetOpts eso;
1805 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1806 *ednssubnet=eso;
1807 foundECS = true;
1808 }
1809 }
1810 }
1811 else {
1812 /* we need to pass the record len */
1813 int res = getEDNSOptions(reinterpret_cast<const char*>(&question.at(pos -sizeof(drh->d_clen))), questionLen - pos + (sizeof(drh->d_clen)), *options);
1814 if (res == 0) {
1815 const auto& it = options->find(EDNSOptionCode::ECS);
1816 if (it != options->end() && !it->second.values.empty() && it->second.values.at(0).content != nullptr && it->second.values.at(0).size > 0) {
1817 EDNSSubnetOpts eso;
1818 if(getEDNSSubnetOptsFromString(it->second.values.at(0).content, it->second.values.at(0).size, &eso)) {
1819 *ednssubnet=eso;
1820 foundECS = true;
1821 }
1822 }
1823 }
1824 }
1825 }
1826 else if (lookForXPF && ntohs(drh->d_type) == g_xpfRRCode && ntohs(drh->d_class) == QClass::IN && drh->d_ttl == 0) {
1827 if ((questionLen - pos) < ntohs(drh->d_clen)) {
1828 return;
1829 }
1830
1831 foundXPF = parseXPFPayload(reinterpret_cast<const char*>(&question.at(pos)), ntohs(drh->d_clen), *xpfSource, xpfDest);
1832 }
1833
1834 pos += ntohs(drh->d_clen);
1835 }
1836 }
1837
1838 static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
1839 {
1840 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
1841
1842 if(conn->state==TCPConnection::BYTE0) {
1843 ssize_t bytes=recv(conn->getFD(), &conn->data[0], 2, 0);
1844 if(bytes==1)
1845 conn->state=TCPConnection::BYTE1;
1846 if(bytes==2) {
1847 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
1848 conn->data.resize(conn->qlen);
1849 conn->bytesread=0;
1850 conn->state=TCPConnection::GETQUESTION;
1851 }
1852 if(!bytes || bytes < 0) {
1853 t_fdm->removeReadFD(fd);
1854 return;
1855 }
1856 }
1857 else if(conn->state==TCPConnection::BYTE1) {
1858 ssize_t bytes=recv(conn->getFD(), &conn->data[1], 1, 0);
1859 if(bytes==1) {
1860 conn->state=TCPConnection::GETQUESTION;
1861 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
1862 conn->data.resize(conn->qlen);
1863 conn->bytesread=0;
1864 }
1865 if(!bytes || bytes < 0) {
1866 if(g_logCommonErrors)
1867 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected after first byte"<<endl;
1868 t_fdm->removeReadFD(fd);
1869 return;
1870 }
1871 }
1872 else if(conn->state==TCPConnection::GETQUESTION) {
1873 ssize_t bytes=recv(conn->getFD(), &conn->data[conn->bytesread], conn->qlen - conn->bytesread, 0);
1874 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
1875 if(g_logCommonErrors) {
1876 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected while reading question body"<<endl;
1877 }
1878 t_fdm->removeReadFD(fd);
1879 return;
1880 }
1881 conn->bytesread+=(uint16_t)bytes;
1882 if(conn->bytesread==conn->qlen) {
1883 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
1884
1885 std::unique_ptr<DNSComboWriter> dc;
1886 try {
1887 dc=std::unique_ptr<DNSComboWriter>(new DNSComboWriter(conn->data, g_now));
1888 }
1889 catch(const MOADNSException &mde) {
1890 g_stats.clientParseError++;
1891 if(g_logCommonErrors)
1892 g_log<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
1893 return;
1894 }
1895 dc->d_tcpConnection = conn; // carry the torch
1896 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
1897 dc->d_tcp=true;
1898 dc->setRemote(conn->d_remote);
1899 dc->setSource(conn->d_remote);
1900 ComboAddress dest;
1901 dest.reset();
1902 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1903 socklen_t len = dest.getSocklen();
1904 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1905 dc->setLocal(dest);
1906 dc->setDestination(dest);
1907 DNSName qname;
1908 uint16_t qtype=0;
1909 uint16_t qclass=0;
1910 bool needECS = false;
1911 bool needXPF = g_XPFAcl.match(conn->d_remote);
1912 string requestorId;
1913 string deviceId;
1914 bool logQuery = false;
1915 #ifdef HAVE_PROTOBUF
1916 auto luaconfsLocal = g_luaconfs.getLocal();
1917 if (checkProtobufExport(luaconfsLocal)) {
1918 needECS = true;
1919 }
1920 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
1921 #endif
1922
1923 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag_ffi || t_pdl->d_gettag))) {
1924
1925 try {
1926 EDNSOptionViewMap ednsOptions;
1927 bool xpfFound = false;
1928 dc->d_ecsParsed = true;
1929 dc->d_ecsFound = false;
1930 getQNameAndSubnet(conn->data, &qname, &qtype, &qclass,
1931 dc->d_ecsFound, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
1932 xpfFound, needXPF ? &dc->d_source : nullptr, needXPF ? &dc->d_destination : nullptr);
1933
1934 if(t_pdl) {
1935 try {
1936 if (t_pdl->d_gettag_ffi) {
1937 dc->d_tag = t_pdl->gettag_ffi(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, dc->d_ttlCap, dc->d_variable, logQuery);
1938 }
1939 else if (t_pdl->d_gettag) {
1940 dc->d_tag = t_pdl->gettag(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId);
1941 }
1942 }
1943 catch(const std::exception& e) {
1944 if(g_logCommonErrors)
1945 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
1946 }
1947 }
1948 }
1949 catch(const std::exception& e)
1950 {
1951 if(g_logCommonErrors)
1952 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
1953 }
1954 }
1955
1956 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(&conn->data[0]);
1957
1958 #ifdef HAVE_PROTOBUF
1959 if(t_protobufServers || t_outgoingProtobufServers) {
1960 dc->d_requestorId = requestorId;
1961 dc->d_deviceId = deviceId;
1962 dc->d_uuid = getUniqueID();
1963 }
1964
1965 if(t_protobufServers) {
1966 try {
1967
1968 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && dc->d_policyTags.empty())) {
1969 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, dc->d_source, dc->d_destination, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId);
1970 }
1971 }
1972 catch(std::exception& e) {
1973 if(g_logCommonErrors)
1974 g_log<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
1975 }
1976 }
1977 #endif
1978 if(t_pdl) {
1979 if(t_pdl->ipfilter(dc->d_source, dc->d_destination, *dh)) {
1980 if(!g_quiet)
1981 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED TCP question from "<<dc->d_source.toStringWithPort()<<(dc->d_source != dc->d_remote ? " (via "+dc->d_remote.toStringWithPort()+")" : "")<<" based on policy"<<endl;
1982 g_stats.policyDrops++;
1983 return;
1984 }
1985 }
1986
1987 if(dc->d_mdp.d_header.qr) {
1988 g_stats.ignoredCount++;
1989 if(g_logCommonErrors) {
1990 g_log<<Logger::Error<<"Ignoring answer from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
1991 }
1992 return;
1993 }
1994 if(dc->d_mdp.d_header.opcode) {
1995 g_stats.ignoredCount++;
1996 if(g_logCommonErrors) {
1997 g_log<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
1998 }
1999 return;
2000 }
2001 else if (dh->qdcount == 0) {
2002 g_stats.emptyQueriesCount++;
2003 if(g_logCommonErrors) {
2004 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<< dc->getRemote() <<" on server socket!"<<endl;
2005 }
2006 return;
2007 }
2008 else {
2009 ++g_stats.qcounter;
2010 ++g_stats.tcpqcounter;
2011 MT->makeThread(startDoResolve, dc.release()); // deletes dc, will set state to BYTE0 again
2012 return;
2013 }
2014 }
2015 }
2016 }
2017
2018 //! Handle new incoming TCP connection
2019 static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
2020 {
2021 ComboAddress addr;
2022 socklen_t addrlen=sizeof(addr);
2023 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
2024 if(newsock>=0) {
2025 if(MT->numProcesses() > g_maxMThreads) {
2026 g_stats.overCapacityDrops++;
2027 try {
2028 closesocket(newsock);
2029 }
2030 catch(const PDNSException& e) {
2031 g_log<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
2032 }
2033 return;
2034 }
2035
2036 if(t_remotes)
2037 t_remotes->push_back(addr);
2038 if(t_allowFrom && !t_allowFrom->match(&addr)) {
2039 if(!g_quiet)
2040 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2041
2042 g_stats.unauthorizedTCP++;
2043 try {
2044 closesocket(newsock);
2045 }
2046 catch(const PDNSException& e) {
2047 g_log<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
2048 }
2049 return;
2050 }
2051 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
2052 g_stats.tcpClientOverflow++;
2053 try {
2054 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
2055 }
2056 catch(const PDNSException& e) {
2057 g_log<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
2058 }
2059 return;
2060 }
2061
2062 setNonBlocking(newsock);
2063 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
2064 tc->state=TCPConnection::BYTE0;
2065
2066 struct timeval ttd;
2067 Utility::gettimeofday(&ttd, 0);
2068 ttd.tv_sec += g_tcpTimeout;
2069
2070 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc, &ttd);
2071 }
2072 }
2073
2074 static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
2075 {
2076 gettimeofday(&g_now, 0);
2077 if (tv.tv_sec) {
2078 struct timeval diff = g_now - tv;
2079 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
2080
2081 if(delta > 1000.0) {
2082 g_stats.tooOldDrops++;
2083 return nullptr;
2084 }
2085 }
2086
2087 ++g_stats.qcounter;
2088 if(fromaddr.sin4.sin_family==AF_INET6)
2089 g_stats.ipv6qcounter++;
2090
2091 string response;
2092 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
2093 unsigned int ctag=0;
2094 uint32_t qhash = 0;
2095 bool needECS = false;
2096 bool needXPF = g_XPFAcl.match(fromaddr);
2097 std::vector<std::string> policyTags;
2098 LuaContext::LuaObject data;
2099 ComboAddress source = fromaddr;
2100 ComboAddress destination = destaddr;
2101 string requestorId;
2102 string deviceId;
2103 bool logQuery = false;
2104 #ifdef HAVE_PROTOBUF
2105 boost::uuids::uuid uniqueId;
2106 auto luaconfsLocal = g_luaconfs.getLocal();
2107 if (checkProtobufExport(luaconfsLocal)) {
2108 uniqueId = getUniqueID();
2109 needECS = true;
2110 } else if (checkOutgoingProtobufExport(luaconfsLocal)) {
2111 uniqueId = getUniqueID();
2112 }
2113 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
2114 bool logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
2115 #endif
2116 EDNSSubnetOpts ednssubnet;
2117 bool ecsFound = false;
2118 bool ecsParsed = false;
2119 uint16_t ecsBegin = 0;
2120 uint16_t ecsEnd = 0;
2121 uint32_t ttlCap = std::numeric_limits<uint32_t>::max();
2122 bool variable = false;
2123 try {
2124 DNSName qname;
2125 uint16_t qtype=0;
2126 uint16_t qclass=0;
2127 uint32_t age;
2128 bool qnameParsed=false;
2129 #ifdef MALLOC_TRACE
2130 /*
2131 static uint64_t last=0;
2132 if(!last)
2133 g_mtracer->clearAllocators();
2134 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
2135 last=g_mtracer->getAllocs();
2136 cout<<g_mtracer->topAllocatorsString()<<endl;
2137 g_mtracer->clearAllocators();
2138 */
2139 #endif
2140
2141 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag || t_pdl->d_gettag_ffi))) {
2142 try {
2143 EDNSOptionViewMap ednsOptions;
2144 bool xpfFound = false;
2145
2146 ecsFound = false;
2147
2148 getQNameAndSubnet(question, &qname, &qtype, &qclass,
2149 ecsFound, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2150 xpfFound, needXPF ? &source : nullptr, needXPF ? &destination : nullptr);
2151
2152 qnameParsed = true;
2153 ecsParsed = true;
2154
2155 if(t_pdl) {
2156 try {
2157 if (t_pdl->d_gettag_ffi) {
2158 ctag = t_pdl->gettag_ffi(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, ttlCap, variable, logQuery);
2159 }
2160 else if (t_pdl->d_gettag) {
2161 ctag = t_pdl->gettag(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId);
2162 }
2163 }
2164 catch(const std::exception& e) {
2165 if(g_logCommonErrors)
2166 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
2167 }
2168 }
2169 }
2170 catch(const std::exception& e)
2171 {
2172 if(g_logCommonErrors)
2173 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
2174 }
2175 }
2176
2177 bool cacheHit = false;
2178 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
2179 #ifdef HAVE_PROTOBUF
2180 if (t_protobufServers) {
2181 pbMessage = RecProtoBufMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
2182 pbMessage->setServerIdentity(SyncRes::s_serverID);
2183 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && policyTags.empty())) {
2184 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, source, destination, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId);
2185 }
2186 }
2187 #endif /* HAVE_PROTOBUF */
2188
2189 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
2190 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
2191 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
2192 vState valState;
2193 if (qnameParsed) {
2194 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
2195 }
2196 else {
2197 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, &qtype, &qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
2198 }
2199
2200 if (cacheHit) {
2201 if(valState == Bogus) {
2202 if(t_bogusremotes)
2203 t_bogusremotes->push_back(source);
2204 if(t_bogusqueryring)
2205 t_bogusqueryring->push_back(make_pair(qname, qtype));
2206 }
2207
2208 #ifdef HAVE_PROTOBUF
2209 if(t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && pbMessage->getAppliedPolicy().empty() && pbMessage->getPolicyTags().empty())) {
2210 Netmask requestorNM(source, source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
2211 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
2212 pbMessage->update(uniqueId, &requestor, &destination, false, dh->id);
2213 pbMessage->setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
2214 if (g_useKernelTimestamp && tv.tv_sec) {
2215 pbMessage->setQueryTime(tv.tv_sec, tv.tv_usec);
2216 }
2217 else {
2218 pbMessage->setQueryTime(g_now.tv_sec, g_now.tv_usec);
2219 }
2220 pbMessage->setRequestorId(requestorId);
2221 pbMessage->setDeviceId(deviceId);
2222 protobufLogResponse(*pbMessage);
2223 }
2224 #endif /* HAVE_PROTOBUF */
2225 if(!g_quiet)
2226 g_log<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<endl;
2227
2228 g_stats.packetCacheHits++;
2229 SyncRes::s_queries++;
2230 ageDNSPacket(response, age);
2231 struct msghdr msgh;
2232 struct iovec iov;
2233 char cbuf[256];
2234 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
2235 msgh.msg_control=NULL;
2236
2237 if(g_fromtosockets.count(fd)) {
2238 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
2239 }
2240 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
2241 g_log<<Logger::Warning<<"Sending UDP reply to client "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" failed with: "<<strerror(errno)<<endl;
2242
2243 if(response.length() >= sizeof(struct dnsheader)) {
2244 struct dnsheader tmpdh;
2245 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
2246 updateResponseStats(tmpdh.rcode, source, response.length(), 0, 0);
2247 }
2248 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
2249 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
2250 return 0;
2251 }
2252 }
2253 catch(std::exception& e) {
2254 g_log<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
2255 return 0;
2256 }
2257
2258 if(t_pdl) {
2259 if(t_pdl->ipfilter(source, destination, *dh)) {
2260 if(!g_quiet)
2261 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" based on policy"<<endl;
2262 g_stats.policyDrops++;
2263 return 0;
2264 }
2265 }
2266
2267 if(MT->numProcesses() > g_maxMThreads) {
2268 if(!g_quiet)
2269 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<", over capacity"<<endl;
2270
2271 g_stats.overCapacityDrops++;
2272 return 0;
2273 }
2274
2275 auto dc = std::unique_ptr<DNSComboWriter>(new DNSComboWriter(question, g_now, std::move(policyTags), std::move(data)));
2276 dc->setSocket(fd);
2277 dc->d_tag=ctag;
2278 dc->d_qhash=qhash;
2279 dc->setRemote(fromaddr);
2280 dc->setSource(source);
2281 dc->setLocal(destaddr);
2282 dc->setDestination(destination);
2283 dc->d_tcp=false;
2284 dc->d_ecsFound = ecsFound;
2285 dc->d_ecsParsed = ecsParsed;
2286 dc->d_ecsBegin = ecsBegin;
2287 dc->d_ecsEnd = ecsEnd;
2288 dc->d_ednssubnet = ednssubnet;
2289 dc->d_ttlCap = ttlCap;
2290 dc->d_variable = variable;
2291 #ifdef HAVE_PROTOBUF
2292 if (t_protobufServers || t_outgoingProtobufServers) {
2293 dc->d_uuid = std::move(uniqueId);
2294 }
2295 dc->d_requestorId = requestorId;
2296 dc->d_deviceId = deviceId;
2297 dc->d_kernelTimestamp = tv;
2298 #endif
2299
2300 MT->makeThread(startDoResolve, (void*) dc.release()); // deletes dc
2301 return 0;
2302 }
2303
2304
2305 static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
2306 {
2307 ssize_t len;
2308 static const size_t maxIncomingQuerySize = 512;
2309 static thread_local std::string data;
2310 ComboAddress fromaddr;
2311 struct msghdr msgh;
2312 struct iovec iov;
2313 char cbuf[256];
2314 bool firstQuery = true;
2315
2316 for(size_t queriesCounter = 0; queriesCounter < s_maxUDPQueriesPerRound; queriesCounter++) {
2317 data.resize(maxIncomingQuerySize);
2318 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
2319 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), &data[0], data.size(), &fromaddr);
2320
2321 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
2322
2323 firstQuery = false;
2324
2325 if (static_cast<size_t>(len) < sizeof(dnsheader)) {
2326 g_stats.ignoredCount++;
2327 if (!g_quiet) {
2328 g_log<<Logger::Error<<"Ignoring too-short ("<<std::to_string(len)<<") query from "<<fromaddr.toString()<<endl;
2329 }
2330 return;
2331 }
2332
2333 if (msgh.msg_flags & MSG_TRUNC) {
2334 g_stats.truncatedDrops++;
2335 if (!g_quiet) {
2336 g_log<<Logger::Error<<"Ignoring truncated query from "<<fromaddr.toString()<<endl;
2337 }
2338 return;
2339 }
2340
2341 if(t_remotes) {
2342 t_remotes->push_back(fromaddr);
2343 }
2344
2345 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
2346 if(!g_quiet) {
2347 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2348 }
2349
2350 g_stats.unauthorizedUDP++;
2351 return;
2352 }
2353 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
2354 if(!fromaddr.sin4.sin_port) { // also works for IPv6
2355 if(!g_quiet) {
2356 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
2357 }
2358
2359 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
2360 return;
2361 }
2362
2363 try {
2364 data.resize(static_cast<size_t>(len));
2365 dnsheader* dh=(dnsheader*)&data[0];
2366
2367 if(dh->qr) {
2368 g_stats.ignoredCount++;
2369 if(g_logCommonErrors) {
2370 g_log<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
2371 }
2372 }
2373 else if(dh->opcode) {
2374 g_stats.ignoredCount++;
2375 if(g_logCommonErrors) {
2376 g_log<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
2377 }
2378 }
2379 else if (dh->qdcount == 0) {
2380 g_stats.emptyQueriesCount++;
2381 if(g_logCommonErrors) {
2382 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<<fromaddr.toString()<<" on server socket!"<<endl;
2383 }
2384 }
2385 else {
2386 struct timeval tv={0,0};
2387 HarvestTimestamp(&msgh, &tv);
2388 ComboAddress dest;
2389 dest.reset(); // this makes sure we ignore this address if not returned by recvmsg above
2390 auto loc = rplookup(g_listenSocketsAddresses, fd);
2391 if(HarvestDestinationAddress(&msgh, &dest)) {
2392 // but.. need to get port too
2393 if(loc) {
2394 dest.sin4.sin_port = loc->sin4.sin_port;
2395 }
2396 }
2397 else {
2398 if(loc) {
2399 dest = *loc;
2400 }
2401 else {
2402 dest.sin4.sin_family = fromaddr.sin4.sin_family;
2403 socklen_t slen = dest.getSocklen();
2404 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
2405 }
2406 }
2407
2408 if(g_weDistributeQueries) {
2409 distributeAsyncFunction(data, boost::bind(doProcessUDPQuestion, data, fromaddr, dest, tv, fd));
2410 }
2411 else {
2412 ++s_threadInfos[t_id].numberOfDistributedQueries;
2413 doProcessUDPQuestion(data, fromaddr, dest, tv, fd);
2414 }
2415 }
2416 }
2417 catch(const MOADNSException &mde) {
2418 g_stats.clientParseError++;
2419 if(g_logCommonErrors) {
2420 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
2421 }
2422 }
2423 catch(const std::runtime_error& e) {
2424 g_stats.clientParseError++;
2425 if(g_logCommonErrors) {
2426 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
2427 }
2428 }
2429 }
2430 else {
2431 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
2432 if(firstQuery && errno == EAGAIN) {
2433 g_stats.noPacketError++;
2434 }
2435
2436 break;
2437 }
2438 }
2439 }
2440
2441 static void makeTCPServerSockets(deferredAdd_t& deferredAdds, std::set<int>& tcpSockets)
2442 {
2443 int fd;
2444 vector<string>locals;
2445 stringtok(locals,::arg()["local-address"]," ,");
2446
2447 if(locals.empty())
2448 throw PDNSException("No local address specified");
2449
2450 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
2451 ServiceTuple st;
2452 st.port=::arg().asNum("local-port");
2453 parseService(*i, st);
2454
2455 ComboAddress sin;
2456
2457 sin.reset();
2458 sin.sin4.sin_family = AF_INET;
2459 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
2460 sin.sin6.sin6_family = AF_INET6;
2461 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
2462 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
2463 }
2464
2465 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
2466 if(fd<0)
2467 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
2468
2469 setCloseOnExec(fd);
2470
2471 int tmp=1;
2472 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
2473 g_log<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
2474 exit(1);
2475 }
2476 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
2477 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
2478 }
2479
2480 #ifdef TCP_DEFER_ACCEPT
2481 if(setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
2482 if(i==locals.begin())
2483 g_log<<Logger::Info<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
2484 }
2485 #endif
2486
2487 if( ::arg().mustDo("non-local-bind") )
2488 Utility::setBindAny(AF_INET, fd);
2489
2490 #ifdef SO_REUSEPORT
2491 if(g_reusePort) {
2492 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
2493 throw PDNSException("SO_REUSEPORT: "+stringerror());
2494 }
2495 #endif
2496
2497 if (::arg().asNum("tcp-fast-open") > 0) {
2498 #ifdef TCP_FASTOPEN
2499 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
2500 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
2501 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno)<<endl;
2502 }
2503 #else
2504 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
2505 #endif
2506 }
2507
2508 sin.sin4.sin_port = htons(st.port);
2509 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
2510 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
2511 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
2512
2513 setNonBlocking(fd);
2514 setSocketSendBuffer(fd, 65000);
2515 listen(fd, 128);
2516 deferredAdds.push_back(make_pair(fd, handleNewTCPQuestion));
2517 tcpSockets.insert(fd);
2518
2519 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
2520 // - fd is not that which we know here, but returned from accept()
2521 if(sin.sin4.sin_family == AF_INET)
2522 g_log<<Logger::Info<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
2523 else
2524 g_log<<Logger::Info<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
2525 }
2526 }
2527
2528 static void makeUDPServerSockets(deferredAdd_t& deferredAdds)
2529 {
2530 int one=1;
2531 vector<string>locals;
2532 stringtok(locals,::arg()["local-address"]," ,");
2533
2534 if(locals.empty())
2535 throw PDNSException("No local address specified");
2536
2537 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
2538 ServiceTuple st;
2539 st.port=::arg().asNum("local-port");
2540 parseService(*i, st);
2541
2542 ComboAddress sin;
2543
2544 sin.reset();
2545 sin.sin4.sin_family = AF_INET;
2546 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
2547 sin.sin6.sin6_family = AF_INET6;
2548 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
2549 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
2550 }
2551
2552 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
2553 if(fd < 0) {
2554 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
2555 }
2556 if (!setSocketTimestamps(fd))
2557 g_log<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
2558
2559 if(IsAnyAddress(sin)) {
2560 if(sin.sin4.sin_family == AF_INET)
2561 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
2562 g_fromtosockets.insert(fd);
2563 #ifdef IPV6_RECVPKTINFO
2564 if(sin.sin4.sin_family == AF_INET6)
2565 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
2566 g_fromtosockets.insert(fd);
2567 #endif
2568 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
2569 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
2570 }
2571 }
2572 if( ::arg().mustDo("non-local-bind") )
2573 Utility::setBindAny(AF_INET6, fd);
2574
2575 setCloseOnExec(fd);
2576
2577 setSocketReceiveBuffer(fd, 250000);
2578 sin.sin4.sin_port = htons(st.port);
2579
2580
2581 #ifdef SO_REUSEPORT
2582 if(g_reusePort) {
2583 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
2584 throw PDNSException("SO_REUSEPORT: "+stringerror());
2585 }
2586 #endif
2587
2588 if (sin.isIPv4()) {
2589 try {
2590 setSocketIgnorePMTU(fd);
2591 }
2592 catch(const std::exception& e) {
2593 g_log<<Logger::Warning<<"Failed to set IP_MTU_DISCOVER on UDP server socket: "<<e.what()<<endl;
2594 }
2595 }
2596
2597 socklen_t socklen=sin.getSocklen();
2598 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
2599 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
2600
2601 setNonBlocking(fd);
2602
2603 deferredAdds.push_back(make_pair(fd, handleNewUDPQuestion));
2604 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
2605 if(sin.sin4.sin_family == AF_INET)
2606 g_log<<Logger::Info<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
2607 else
2608 g_log<<Logger::Info<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
2609 }
2610 }
2611
2612 static void daemonize(void)
2613 {
2614 if(fork())
2615 exit(0); // bye bye
2616
2617 setsid();
2618
2619 int i=open("/dev/null",O_RDWR); /* open stdin */
2620 if(i < 0)
2621 g_log<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
2622 else {
2623 dup2(i,0); /* stdin */
2624 dup2(i,1); /* stderr */
2625 dup2(i,2); /* stderr */
2626 close(i);
2627 }
2628 }
2629
2630 static void usr1Handler(int)
2631 {
2632 statsWanted=true;
2633 }
2634
2635 static void usr2Handler(int)
2636 {
2637 g_quiet= !g_quiet;
2638 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
2639 ::arg().set("quiet")=g_quiet ? "" : "no";
2640 }
2641
2642 static void doStats(void)
2643 {
2644 static time_t lastOutputTime;
2645 static uint64_t lastQueryCount;
2646
2647 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
2648 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
2649
2650 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
2651 g_log<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
2652 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
2653 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
2654 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
2655
2656 g_log<<Logger::Notice<<"stats: throttle map: "
2657 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
2658 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
2659 g_log<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2660 g_log<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
2661 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
2662 g_log<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
2663 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
2664
2665 //g_log<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
2666 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
2667
2668 g_log<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
2669 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
2670
2671 size_t idx = 0;
2672 for (const auto& threadInfo : s_threadInfos) {
2673 if(threadInfo.isWorker) {
2674 g_log<<Logger::Notice<<"stats: thread "<<idx<<" has been distributed "<<threadInfo.numberOfDistributedQueries<<" queries"<<endl;
2675 ++idx;
2676 }
2677 }
2678
2679 time_t now = time(0);
2680 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
2681 g_log<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
2682 }
2683 lastOutputTime = now;
2684 lastQueryCount = SyncRes::s_queries;
2685 }
2686 else if(statsWanted)
2687 g_log<<Logger::Notice<<"stats: no stats yet!"<<endl;
2688
2689 statsWanted=false;
2690 }
2691
2692 static void houseKeeping(void *)
2693 {
2694 static thread_local time_t last_rootupdate, last_prune, last_secpoll, last_trustAnchorUpdate{0};
2695 static thread_local int cleanCounter=0;
2696 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
2697 auto luaconfsLocal = g_luaconfs.getLocal();
2698
2699 if (last_trustAnchorUpdate == 0 && !luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0) {
2700 // Loading the Lua config file already "refreshed" the TAs
2701 last_trustAnchorUpdate = g_now.tv_sec + luaconfsLocal->trustAnchorFileInfo.interval * 3600;
2702 }
2703
2704 try {
2705 if(s_running) {
2706 return;
2707 }
2708 s_running=true;
2709
2710 struct timeval now;
2711 Utility::gettimeofday(&now, 0);
2712
2713 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
2714 t_RC->doPrune(g_maxCacheEntries / g_numThreads); // this function is local to a thread, so fine anyhow
2715 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
2716
2717 SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
2718
2719 if(!((cleanCounter++)%40)) { // this is a full scan!
2720 time_t limit=now.tv_sec-300;
2721 SyncRes::pruneNSSpeeds(limit);
2722 }
2723 last_prune=time(0);
2724 }
2725
2726 if(now.tv_sec - last_rootupdate > 7200) {
2727 int res = SyncRes::getRootNS(g_now, nullptr);
2728 if (!res)
2729 last_rootupdate=now.tv_sec;
2730 }
2731
2732 if(isHandlerThread()) {
2733
2734 if(now.tv_sec - last_secpoll >= 3600) {
2735 try {
2736 doSecPoll(&last_secpoll);
2737 }
2738 catch(std::exception& e)
2739 {
2740 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
2741 }
2742 catch(PDNSException& e)
2743 {
2744 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
2745 }
2746 catch(ImmediateServFailException &e)
2747 {
2748 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
2749 }
2750 catch(...)
2751 {
2752 g_log<<Logger::Error<<"Exception while performing security poll"<<endl;
2753 }
2754 }
2755
2756 if (!luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0 &&
2757 g_now.tv_sec - last_trustAnchorUpdate >= (luaconfsLocal->trustAnchorFileInfo.interval * 3600)) {
2758 g_log<<Logger::Debug<<"Refreshing Trust Anchors from file"<<endl;
2759 try {
2760 map<DNSName, dsmap_t> dsAnchors;
2761 if (updateTrustAnchorsFromFile(luaconfsLocal->trustAnchorFileInfo.fname, dsAnchors)) {
2762 g_luaconfs.modify([&dsAnchors](LuaConfigItems& lci) {
2763 lci.dsAnchors = dsAnchors;
2764 });
2765 }
2766 last_trustAnchorUpdate = now.tv_sec;
2767 } catch (const PDNSException &pe) {
2768 g_log<<Logger::Error<<"Unable to update Trust Anchors: "<<pe.reason<<endl;
2769 }
2770 }
2771 }
2772 s_running=false;
2773 }
2774 catch(PDNSException& ae)
2775 {
2776 s_running=false;
2777 g_log<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
2778 throw;
2779 }
2780 }
2781
2782 static void makeThreadPipes()
2783 {
2784 auto pipeBufferSize = ::arg().asNum("distribution-pipe-buffer-size");
2785 if (pipeBufferSize > 0) {
2786 g_log<<Logger::Info<<"Resizing the buffer of the distribution pipe to "<<pipeBufferSize<<endl;
2787 }
2788
2789 /* thread 0 is the handler / SNMP, we start at 1 */
2790 for(unsigned int n = 1; n <= (g_numWorkerThreads + g_numDistributorThreads); ++n) {
2791 auto& threadInfos = s_threadInfos.at(n);
2792
2793 int fd[2];
2794 if(pipe(fd) < 0)
2795 unixDie("Creating pipe for inter-thread communications");
2796
2797 threadInfos.pipes.readToThread = fd[0];
2798 threadInfos.pipes.writeToThread = fd[1];
2799
2800 if(pipe(fd) < 0)
2801 unixDie("Creating pipe for inter-thread communications");
2802
2803 threadInfos.pipes.readFromThread = fd[0];
2804 threadInfos.pipes.writeFromThread = fd[1];
2805
2806 if(pipe(fd) < 0)
2807 unixDie("Creating pipe for inter-thread communications");
2808
2809 threadInfos.pipes.readQueriesToThread = fd[0];
2810 threadInfos.pipes.writeQueriesToThread = fd[1];
2811
2812 if (pipeBufferSize > 0) {
2813 if (!setPipeBufferSize(threadInfos.pipes.writeQueriesToThread, pipeBufferSize)) {
2814 g_log<<Logger::Warning<<"Error resizing the buffer of the distribution pipe for thread "<<n<<" to "<<pipeBufferSize<<": "<<strerror(errno)<<endl;
2815 auto existingSize = getPipeBufferSize(threadInfos.pipes.writeQueriesToThread);
2816 if (existingSize > 0) {
2817 g_log<<Logger::Warning<<"The current size of the distribution pipe's buffer for thread "<<n<<" is "<<existingSize<<endl;
2818 }
2819 }
2820 }
2821
2822 if (!setNonBlocking(threadInfos.pipes.writeQueriesToThread)) {
2823 unixDie("Making pipe for inter-thread communications non-blocking");
2824 }
2825 }
2826 }
2827
2828 struct ThreadMSG
2829 {
2830 pipefunc_t func;
2831 bool wantAnswer;
2832 };
2833
2834 void broadcastFunction(const pipefunc_t& func)
2835 {
2836 /* This function might be called by the worker with t_id 0 during startup
2837 for the initialization of ACLs and domain maps. After that it should only
2838 be called by the handler. */
2839
2840 if (s_threadInfos.empty() && isHandlerThread()) {
2841 /* the handler and distributors will call themselves below, but
2842 during startup we get called while s_threadInfos has not been
2843 populated yet to update the ACL or domain maps, so we need to
2844 handle that case.
2845 */
2846 func();
2847 }
2848
2849 unsigned int n = 0;
2850 for (const auto& threadInfo : s_threadInfos) {
2851 if(n++ == t_id) {
2852 func(); // don't write to ourselves!
2853 continue;
2854 }
2855
2856 ThreadMSG* tmsg = new ThreadMSG();
2857 tmsg->func = func;
2858 tmsg->wantAnswer = true;
2859 if(write(threadInfo.pipes.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2860 delete tmsg;
2861
2862 unixDie("write to thread pipe returned wrong size or error");
2863 }
2864
2865 string* resp = nullptr;
2866 if(read(threadInfo.pipes.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2867 unixDie("read from thread pipe returned wrong size or error");
2868
2869 if(resp) {
2870 delete resp;
2871 resp = nullptr;
2872 }
2873 }
2874 }
2875
2876 static bool trySendingQueryToWorker(unsigned int target, ThreadMSG* tmsg)
2877 {
2878 auto& targetInfo = s_threadInfos[target];
2879 if(!targetInfo.isWorker) {
2880 g_log<<Logger::Error<<"distributeAsyncFunction() tried to assign a query to a non-worker thread"<<endl;
2881 exit(1);
2882 }
2883
2884 const auto& tps = targetInfo.pipes;
2885
2886 ssize_t written = write(tps.writeQueriesToThread, &tmsg, sizeof(tmsg));
2887 if (written > 0) {
2888 if (static_cast<size_t>(written) != sizeof(tmsg)) {
2889 delete tmsg;
2890 unixDie("write to thread pipe returned wrong size or error");
2891 }
2892 }
2893 else {
2894 int error = errno;
2895 if (error == EAGAIN || error == EWOULDBLOCK) {
2896 return false;
2897 } else {
2898 delete tmsg;
2899 unixDie("write to thread pipe returned wrong size or error:" + std::to_string(error));
2900 }
2901 }
2902
2903 ++targetInfo.numberOfDistributedQueries;
2904
2905 return true;
2906 }
2907
2908 static unsigned int getWorkerLoad(size_t workerIdx)
2909 {
2910 const auto mt = s_threadInfos[/* skip handler */ 1 + g_numDistributorThreads + workerIdx].mt;
2911 if (mt != nullptr) {
2912 return mt->numProcesses();
2913 }
2914 return 0;
2915 }
2916
2917 static unsigned int selectWorker(unsigned int hash)
2918 {
2919 if (s_balancingFactor == 0) {
2920 return /* skip handler */ 1 + g_numDistributorThreads + (hash % g_numWorkerThreads);
2921 }
2922
2923 /* we start with one, representing the query we are currently handling */
2924 double currentLoad = 1;
2925 std::vector<unsigned int> load(g_numWorkerThreads);
2926 for (size_t idx = 0; idx < g_numWorkerThreads; idx++) {
2927 load[idx] = getWorkerLoad(idx);
2928 currentLoad += load[idx];
2929 // cerr<<"load for worker "<<idx<<" is "<<load[idx]<<endl;
2930 }
2931
2932 double targetLoad = (currentLoad / g_numWorkerThreads) * s_balancingFactor;
2933 // cerr<<"total load is "<<currentLoad<<", number of workers is "<<g_numWorkerThreads<<", target load is "<<targetLoad<<endl;
2934
2935 unsigned int worker = hash % g_numWorkerThreads;
2936 /* at least one server has to be at or below the average load */
2937 if (load[worker] > targetLoad) {
2938 ++g_stats.rebalancedQueries;
2939 do {
2940 // cerr<<"worker "<<worker<<" is above the target load, selecting another one"<<endl;
2941 worker = (worker + 1) % g_numWorkerThreads;
2942 }
2943 while(load[worker] > targetLoad);
2944 }
2945
2946 return /* skip handler */ 1 + g_numDistributorThreads + worker;
2947 }
2948
2949 // This function is only called by the distributor threads, when pdns-distributes-queries is set
2950 void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
2951 {
2952 if (!isDistributorThread()) {
2953 g_log<<Logger::Error<<"distributeAsyncFunction() has been called by a worker ("<<t_id<<")"<<endl;
2954 exit(1);
2955 }
2956
2957 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
2958 unsigned int target = selectWorker(hash);
2959
2960 ThreadMSG* tmsg = new ThreadMSG();
2961 tmsg->func = func;
2962 tmsg->wantAnswer = false;
2963
2964 if (!trySendingQueryToWorker(target, tmsg)) {
2965 /* if this function failed but did not raise an exception, it means that the pipe
2966 was full, let's try another one */
2967 unsigned int newTarget = 0;
2968 do {
2969 newTarget = /* skip handler */ 1 + g_numDistributorThreads + dns_random(g_numWorkerThreads);
2970 } while (newTarget == target);
2971
2972 if (!trySendingQueryToWorker(newTarget, tmsg)) {
2973 g_stats.queryPipeFullDrops++;
2974 delete tmsg;
2975 }
2976 }
2977 }
2978
2979 static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
2980 {
2981 ThreadMSG* tmsg = nullptr;
2982
2983 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread || fd == readQueriesToThread
2984 unixDie("read from thread pipe returned wrong size or error");
2985 }
2986
2987 void *resp=0;
2988 try {
2989 resp = tmsg->func();
2990 }
2991 catch(std::exception& e) {
2992 if(g_logCommonErrors)
2993 g_log<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2994 }
2995 catch(PDNSException& e) {
2996 if(g_logCommonErrors)
2997 g_log<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2998 }
2999 if(tmsg->wantAnswer) {
3000 const auto& threadInfo = s_threadInfos.at(t_id);
3001 if(write(threadInfo.pipes.writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
3002 delete tmsg;
3003 unixDie("write to thread pipe returned wrong size or error");
3004 }
3005 }
3006
3007 delete tmsg;
3008 }
3009
3010 template<class T> void *voider(const boost::function<T*()>& func)
3011 {
3012 return func();
3013 }
3014
3015 vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
3016 {
3017 a.insert(a.end(), b.begin(), b.end());
3018 return a;
3019 }
3020
3021 vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
3022 {
3023 a.insert(a.end(), b.begin(), b.end());
3024 return a;
3025 }
3026
3027 vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
3028 {
3029 a.insert(a.end(), b.begin(), b.end());
3030 return a;
3031 }
3032
3033
3034 /*
3035 This function should only be called by the handler to gather metrics, wipe the cache,
3036 reload the Lua script (not the Lua config) or change the current trace regex,
3037 and by the SNMP thread to gather metrics. */
3038 template<class T> T broadcastAccFunction(const boost::function<T*()>& func)
3039 {
3040 if (!isHandlerThread()) {
3041 g_log<<Logger::Error<<"broadcastAccFunction has been called by a worker ("<<t_id<<")"<<endl;
3042 exit(1);
3043 }
3044
3045 unsigned int n = 0;
3046 T ret=T();
3047 for (const auto& threadInfo : s_threadInfos) {
3048 if (n++ == t_id) {
3049 continue;
3050 }
3051
3052 const auto& tps = threadInfo.pipes;
3053 ThreadMSG* tmsg = new ThreadMSG();
3054 tmsg->func = boost::bind(voider<T>, func);
3055 tmsg->wantAnswer = true;
3056
3057 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
3058 delete tmsg;
3059 unixDie("write to thread pipe returned wrong size or error");
3060 }
3061
3062 T* resp = nullptr;
3063 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
3064 unixDie("read from thread pipe returned wrong size or error");
3065
3066 if(resp) {
3067 ret += *resp;
3068 delete resp;
3069 resp = nullptr;
3070 }
3071 }
3072 return ret;
3073 }
3074
3075 template string broadcastAccFunction(const boost::function<string*()>& fun); // explicit instantiation
3076 template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun); // explicit instantiation
3077 template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun); // explicit instantiation
3078 template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun); // explicit instantiation
3079 template ThreadTimes broadcastAccFunction(const boost::function<ThreadTimes*()>& fun);
3080
3081 static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
3082 {
3083 try {
3084 string remote;
3085 string msg=s_rcc.recv(&remote);
3086 RecursorControlParser rcp;
3087 RecursorControlParser::func_t* command;
3088
3089 string answer=rcp.getAnswer(msg, &command);
3090
3091 // If we are inside a chroot, we need to strip
3092 if (!arg()["chroot"].empty()) {
3093 size_t len = arg()["chroot"].length();
3094 remote = remote.substr(len);
3095 }
3096
3097 s_rcc.send(answer, &remote);
3098 command();
3099 }
3100 catch(const std::exception& e) {
3101 g_log<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
3102 }
3103 catch(const PDNSException& ae) {
3104 g_log<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
3105 }
3106 }
3107
3108 static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
3109 {
3110 PacketID* pident=any_cast<PacketID>(&var);
3111 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
3112
3113 shared_array<char> buffer(new char[pident->inNeeded]);
3114
3115 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
3116 if(ret > 0) {
3117 pident->inMSG.append(&buffer[0], &buffer[ret]);
3118 pident->inNeeded-=(size_t)ret;
3119 if(!pident->inNeeded || pident->inIncompleteOkay) {
3120 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
3121 PacketID pid=*pident;
3122 string msg=pident->inMSG;
3123
3124 t_fdm->removeReadFD(fd);
3125 MT->sendEvent(pid, &msg);
3126 }
3127 else {
3128 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
3129 }
3130 }
3131 else {
3132 PacketID tmp=*pident;
3133 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
3134 string empty;
3135 MT->sendEvent(tmp, &empty); // this conveys error status
3136 }
3137 }
3138
3139 static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
3140 {
3141 PacketID* pid=any_cast<PacketID>(&var);
3142 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
3143 if(ret > 0) {
3144 pid->outPos+=(ssize_t)ret;
3145 if(pid->outPos==pid->outMSG.size()) {
3146 PacketID tmp=*pid;
3147 t_fdm->removeWriteFD(fd);
3148 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
3149 }
3150 }
3151 else { // error or EOF
3152 PacketID tmp(*pid);
3153 t_fdm->removeWriteFD(fd);
3154 string sent;
3155 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
3156 }
3157 }
3158
3159 // resend event to everybody chained onto it
3160 static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
3161 {
3162 if(iter->key.chain.empty())
3163 return;
3164 // cerr<<"doResends called!\n";
3165 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
3166 resend.fd=-1;
3167 resend.id=*i;
3168 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
3169
3170 MT->sendEvent(resend, &content);
3171 g_stats.chainResends++;
3172 }
3173 }
3174
3175 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
3176 {
3177 PacketID pid=any_cast<PacketID>(var);
3178 ssize_t len;
3179 std::string packet;
3180 packet.resize(g_outgoingEDNSBufsize);
3181 ComboAddress fromaddr;
3182 socklen_t addrlen=sizeof(fromaddr);
3183
3184 len=recvfrom(fd, &packet.at(0), packet.size(), 0, (sockaddr *)&fromaddr, &addrlen);
3185
3186 if(len < (ssize_t) sizeof(dnsheader)) {
3187 if(len < 0)
3188 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
3189 else {
3190 g_stats.serverParseError++;
3191 if(g_logCommonErrors)
3192 g_log<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
3193 ": packet smaller than DNS header"<<endl;
3194 }
3195
3196 t_udpclientsocks->returnSocket(fd);
3197 string empty;
3198
3199 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3200 if(iter != MT->d_waiters.end())
3201 doResends(iter, pid, empty);
3202
3203 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
3204 return;
3205 }
3206
3207 packet.resize(len);
3208 dnsheader dh;
3209 memcpy(&dh, &packet.at(0), sizeof(dh));
3210
3211 PacketID pident;
3212 pident.remote=fromaddr;
3213 pident.id=dh.id;
3214 pident.fd=fd;
3215
3216 if(!dh.qr && g_logCommonErrors) {
3217 g_log<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
3218 }
3219
3220 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
3221 !dh.qr) { // one weird server
3222 pident.domain.clear();
3223 pident.type = 0;
3224 }
3225 else {
3226 try {
3227 if(len > 12)
3228 pident.domain=DNSName(&packet.at(0), len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
3229 }
3230 catch(std::exception& e) {
3231 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
3232 g_log<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
3233 return;
3234 }
3235 }
3236
3237 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
3238 if(iter != MT->d_waiters.end()) {
3239 doResends(iter, pident, packet);
3240 }
3241
3242 retryWithName:
3243
3244 if(!MT->sendEvent(pident, &packet)) {
3245 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
3246 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
3247 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
3248 pident.domain == mthread->key.domain) {
3249 mthread->key.nearMisses++;
3250 }
3251
3252 // be a bit paranoid here since we're weakening our matching
3253 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
3254 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
3255 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
3256 pident.domain = mthread->key.domain;
3257 pident.type = mthread->key.type;
3258 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
3259 }
3260 }
3261 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
3262 if(g_logCommonErrors) {
3263 g_log<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
3264 }
3265 }
3266 else if(fd >= 0) {
3267 t_udpclientsocks->returnSocket(fd);
3268 }
3269 }
3270
3271 FDMultiplexer* getMultiplexer()
3272 {
3273 FDMultiplexer* ret;
3274 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
3275 try {
3276 ret=i.second();
3277 return ret;
3278 }
3279 catch(FDMultiplexerException &fe) {
3280 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
3281 }
3282 catch(...) {
3283 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
3284 }
3285 }
3286 g_log<<Logger::Error<<"No working multiplexer found!"<<endl;
3287 exit(1);
3288 }
3289
3290
3291 static string* doReloadLuaScript()
3292 {
3293 string fname= ::arg()["lua-dns-script"];
3294 try {
3295 if(fname.empty()) {
3296 t_pdl.reset();
3297 g_log<<Logger::Info<<t_id<<" Unloaded current lua script"<<endl;
3298 return new string("unloaded\n");
3299 }
3300 else {
3301 t_pdl = std::make_shared<RecursorLua4>();
3302 t_pdl->loadFile(fname);
3303 }
3304 }
3305 catch(std::exception& e) {
3306 g_log<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
3307 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
3308 }
3309
3310 g_log<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
3311 return new string("(re)loaded '"+fname+"'\n");
3312 }
3313
3314 string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3315 {
3316 if(begin != end)
3317 ::arg().set("lua-dns-script") = *begin;
3318
3319 return broadcastAccFunction<string>(doReloadLuaScript);
3320 }
3321
3322 static string* pleaseUseNewTraceRegex(const std::string& newRegex)
3323 try
3324 {
3325 if(newRegex.empty()) {
3326 t_traceRegex.reset();
3327 return new string("unset\n");
3328 }
3329 else {
3330 t_traceRegex = std::make_shared<Regex>(newRegex);
3331 return new string("ok\n");
3332 }
3333 }
3334 catch(PDNSException& ae)
3335 {
3336 return new string(ae.reason+"\n");
3337 }
3338
3339 string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3340 {
3341 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
3342 }
3343
3344 static void checkLinuxIPv6Limits()
3345 {
3346 #ifdef __linux__
3347 string line;
3348 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
3349 int lim=std::stoi(line);
3350 if(lim < 16384) {
3351 g_log<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
3352 }
3353 }
3354 #endif
3355 }
3356 static void checkOrFixFDS()
3357 {
3358 unsigned int availFDs=getFilenumLimit();
3359 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
3360
3361 if(wantFDs > availFDs) {
3362 unsigned int hardlimit= getFilenumLimit(true);
3363 if(hardlimit >= wantFDs) {
3364 setFilenumLimit(wantFDs);
3365 g_log<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
3366 }
3367 else {
3368 int newval = (hardlimit - 25) / g_numWorkerThreads;
3369 g_log<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
3370 g_maxMThreads = newval;
3371 setFilenumLimit(hardlimit);
3372 }
3373 }
3374 }
3375
3376 static void* recursorThread(unsigned int tid, const string& threadName);
3377
3378 static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
3379 {
3380 t_allowFrom = ng;
3381 return nullptr;
3382 }
3383
3384 int g_argc;
3385 char** g_argv;
3386
3387 void parseACLs()
3388 {
3389 static bool l_initialized;
3390
3391 if(l_initialized) { // only reload configuration file on second call
3392 string configname=::arg()["config-dir"]+"/recursor.conf";
3393 if(::arg()["config-name"]!="") {
3394 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3395 }
3396 cleanSlashes(configname);
3397
3398 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
3399 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
3400 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
3401 ::arg().preParseFile(configname.c_str(), "include-dir");
3402 ::arg().preParse(g_argc, g_argv, "include-dir");
3403
3404 // then process includes
3405 std::vector<std::string> extraConfigs;
3406 ::arg().gatherIncludes(extraConfigs);
3407
3408 for(const std::string& fn : extraConfigs) {
3409 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
3410 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
3411 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
3412 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
3413 }
3414
3415 ::arg().preParse(g_argc, g_argv, "allow-from-file");
3416 ::arg().preParse(g_argc, g_argv, "allow-from");
3417 }
3418
3419 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
3420 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
3421
3422 if(!::arg()["allow-from-file"].empty()) {
3423 string line;
3424 ifstream ifs(::arg()["allow-from-file"].c_str());
3425 if(!ifs) {
3426 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
3427 }
3428
3429 string::size_type pos;
3430 while(getline(ifs,line)) {
3431 pos=line.find('#');
3432 if(pos!=string::npos)
3433 line.resize(pos);
3434 trim(line);
3435 if(line.empty())
3436 continue;
3437
3438 allowFrom->addMask(line);
3439 }
3440 g_log<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
3441 }
3442 else if(!::arg()["allow-from"].empty()) {
3443 vector<string> ips;
3444 stringtok(ips, ::arg()["allow-from"], ", ");
3445
3446 g_log<<Logger::Warning<<"Only allowing queries from: ";
3447 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
3448 allowFrom->addMask(*i);
3449 if(i!=ips.begin())
3450 g_log<<Logger::Warning<<", ";
3451 g_log<<Logger::Warning<<*i;
3452 }
3453 g_log<<Logger::Warning<<endl;
3454 }
3455 else {
3456 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
3457 g_log<<Logger::Warning<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
3458 allowFrom = nullptr;
3459 }
3460
3461 g_initialAllowFrom = allowFrom;
3462 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
3463 oldAllowFrom = nullptr;
3464
3465 l_initialized = true;
3466 }
3467
3468
3469 static void setupDelegationOnly()
3470 {
3471 vector<string> parts;
3472 stringtok(parts, ::arg()["delegation-only"], ", \t");
3473 for(const auto& p : parts) {
3474 SyncRes::addDelegationOnly(DNSName(p));
3475 }
3476 }
3477
3478 static std::map<unsigned int, std::set<int> > parseCPUMap()
3479 {
3480 std::map<unsigned int, std::set<int> > result;
3481
3482 const std::string value = ::arg()["cpu-map"];
3483
3484 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
3485 g_log<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
3486 return result;
3487 }
3488
3489 std::vector<std::string> parts;
3490
3491 stringtok(parts, value, " \t");
3492
3493 for(const auto& part : parts) {
3494 if (part.find('=') == string::npos)
3495 continue;
3496
3497 try {
3498 auto headers = splitField(part, '=');
3499 trim(headers.first);
3500 trim(headers.second);
3501
3502 unsigned int threadId = pdns_stou(headers.first);
3503 std::vector<std::string> cpus;
3504
3505 stringtok(cpus, headers.second, ",");
3506
3507 for(const auto& cpu : cpus) {
3508 int cpuId = std::stoi(cpu);
3509
3510 result[threadId].insert(cpuId);
3511 }
3512 }
3513 catch(const std::exception& e) {
3514 g_log<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
3515 }
3516 }
3517
3518 return result;
3519 }
3520
3521 static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
3522 {
3523 const auto& cpuMapping = cpusMap.find(n);
3524 if (cpuMapping != cpusMap.cend()) {
3525 int rc = mapThreadToCPUList(tid, cpuMapping->second);
3526 if (rc == 0) {
3527 g_log<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
3528 for (const auto cpu : cpuMapping->second) {
3529 g_log<<Logger::Info<<" "<<cpu;
3530 }
3531 g_log<<Logger::Info<<endl;
3532 }
3533 else {
3534 g_log<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
3535 for (const auto cpu : cpuMapping->second) {
3536 g_log<<Logger::Info<<" "<<cpu;
3537 }
3538 g_log<<Logger::Info<<strerror(rc)<<endl;
3539 }
3540 }
3541 }
3542
3543 #ifdef NOD_ENABLED
3544 static void setupNODThread()
3545 {
3546 if (g_nodEnabled) {
3547 uint32_t num_cells = ::arg().asNum("new-domain-db-size");
3548 t_nodDBp = std::make_shared<nod::NODDB>(num_cells);
3549 try {
3550 t_nodDBp->setCacheDir(::arg()["new-domain-history-dir"]);
3551 }
3552 catch (const PDNSException& e) {
3553 g_log<<Logger::Error<<"new-domain-history-dir (" << ::arg()["new-domain-history-dir"] << ") is not readable or does not exist"<<endl;
3554 _exit(1);
3555 }
3556 if (!t_nodDBp->init()) {
3557 g_log<<Logger::Error<<"Could not initialize domain tracking"<<endl;
3558 _exit(1);
3559 }
3560 std::thread t(nod::NODDB::startHousekeepingThread, t_nodDBp, std::this_thread::get_id());
3561 t.detach();
3562 g_nod_pbtag = ::arg()["new-domain-pb-tag"];
3563 }
3564 if (g_udrEnabled) {
3565 uint32_t num_cells = ::arg().asNum("unique-response-db-size");
3566 t_udrDBp = std::make_shared<nod::UniqueResponseDB>(num_cells);
3567 try {
3568 t_udrDBp->setCacheDir(::arg()["unique-response-history-dir"]);
3569 }
3570 catch (const PDNSException& e) {
3571 g_log<<Logger::Error<<"unique-response-history-dir (" << ::arg()["unique-response-history-dir"] << ") is not readable or does not exist"<<endl;
3572 _exit(1);
3573 }
3574 if (!t_udrDBp->init()) {
3575 g_log<<Logger::Error<<"Could not initialize unique response tracking"<<endl;
3576 _exit(1);
3577 }
3578 std::thread t(nod::UniqueResponseDB::startHousekeepingThread, t_udrDBp, std::this_thread::get_id());
3579 t.detach();
3580 g_udr_pbtag = ::arg()["unique-response-pb-tag"];
3581 }
3582 }
3583
3584 void parseNODWhitelist(const std::string& wlist)
3585 {
3586 vector<string> parts;
3587 stringtok(parts, wlist, ",; ");
3588 for(const auto& a : parts) {
3589 g_nodDomainWL.add(DNSName(a));
3590 }
3591 }
3592
3593 static void setupNODGlobal()
3594 {
3595 // Setup NOD subsystem
3596 g_nodEnabled = ::arg().mustDo("new-domain-tracking");
3597 g_nodLookupDomain = DNSName(::arg()["new-domain-lookup"]);
3598 g_nodLog = ::arg().mustDo("new-domain-log");
3599 parseNODWhitelist(::arg()["new-domain-whitelist"]);
3600
3601 // Setup Unique DNS Response subsystem
3602 g_udrEnabled = ::arg().mustDo("unique-response-tracking");
3603 g_udrLog = ::arg().mustDo("unique-response-log");
3604 }
3605 #endif /* NOD_ENABLED */
3606
3607 static int serviceMain(int argc, char*argv[])
3608 {
3609 g_log.setName(s_programname);
3610 g_log.disableSyslog(::arg().mustDo("disable-syslog"));
3611 g_log.setTimestamps(::arg().mustDo("log-timestamp"));
3612
3613 if(!::arg()["logging-facility"].empty()) {
3614 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
3615 if(val >= 0)
3616 g_log.setFacility(val);
3617 else
3618 g_log<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
3619 }
3620
3621 showProductVersion();
3622
3623 g_disthashseed=dns_random(0xffffffff);
3624
3625 checkLinuxIPv6Limits();
3626 try {
3627 vector<string> addrs;
3628 if(!::arg()["query-local-address6"].empty()) {
3629 SyncRes::s_doIPv6=true;
3630 g_log<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
3631
3632 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
3633 for(const string& addr : addrs) {
3634 g_localQueryAddresses6.push_back(ComboAddress(addr));
3635 }
3636 }
3637 else {
3638 g_log<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
3639 }
3640 addrs.clear();
3641 stringtok(addrs, ::arg()["query-local-address"], ", ;");
3642 for(const string& addr : addrs) {
3643 g_localQueryAddresses4.push_back(ComboAddress(addr));
3644 }
3645 }
3646 catch(std::exception& e) {
3647 g_log<<Logger::Error<<"Assigning local query addresses: "<<e.what();
3648 exit(99);
3649 }
3650
3651 // keep this ABOVE loadRecursorLuaConfig!
3652 if(::arg()["dnssec"]=="off")
3653 g_dnssecmode=DNSSECMode::Off;
3654 else if(::arg()["dnssec"]=="process-no-validate")
3655 g_dnssecmode=DNSSECMode::ProcessNoValidate;
3656 else if(::arg()["dnssec"]=="process")
3657 g_dnssecmode=DNSSECMode::Process;
3658 else if(::arg()["dnssec"]=="validate")
3659 g_dnssecmode=DNSSECMode::ValidateAll;
3660 else if(::arg()["dnssec"]=="log-fail")
3661 g_dnssecmode=DNSSECMode::ValidateForLog;
3662 else {
3663 g_log<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
3664 exit(1);
3665 }
3666
3667 g_signatureInceptionSkew = ::arg().asNum("signature-inception-skew");
3668 if (g_signatureInceptionSkew < 0) {
3669 g_log<<Logger::Error<<"A negative value for 'signature-inception-skew' is not allowed"<<endl;
3670 exit(1);
3671 }
3672
3673 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
3674 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
3675
3676 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
3677 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
3678
3679 luaConfigDelayedThreads delayedLuaThreads;
3680 try {
3681 loadRecursorLuaConfig(::arg()["lua-config-file"], delayedLuaThreads);
3682 }
3683 catch (PDNSException &e) {
3684 g_log<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
3685 exit(1);
3686 }
3687
3688 parseACLs();
3689 initPublicSuffixList(::arg()["public-suffix-list-file"]);
3690
3691 if(!::arg()["dont-query"].empty()) {
3692 vector<string> ips;
3693 stringtok(ips, ::arg()["dont-query"], ", ");
3694 ips.push_back("0.0.0.0");
3695 ips.push_back("::");
3696
3697 g_log<<Logger::Warning<<"Will not send queries to: ";
3698 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
3699 SyncRes::addDontQuery(*i);
3700 if(i!=ips.begin())
3701 g_log<<Logger::Warning<<", ";
3702 g_log<<Logger::Warning<<*i;
3703 }
3704 g_log<<Logger::Warning<<endl;
3705 }
3706
3707 g_quiet=::arg().mustDo("quiet");
3708
3709 /* this needs to be done before parseACLs(), which call broadcastFunction() */
3710 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
3711 if(g_weDistributeQueries) {
3712 g_log<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
3713 }
3714
3715 setupDelegationOnly();
3716 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
3717
3718 if(::arg()["trace"]=="fail") {
3719 SyncRes::setDefaultLogMode(SyncRes::Store);
3720 }
3721 else if(::arg().mustDo("trace")) {
3722 SyncRes::setDefaultLogMode(SyncRes::Log);
3723 ::arg().set("quiet")="no";
3724 g_quiet=false;
3725 g_dnssecLOG=true;
3726 }
3727 string myHostname = getHostname();
3728 if (myHostname == "UNKNOWN"){
3729 g_log<<Logger::Warning<<"Unable to get the hostname, NSID and id.server values will be empty"<<endl;
3730 myHostname = "";
3731 }
3732
3733 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
3734 SyncRes::s_minimumECSTTL = ::arg().asNum("ecs-minimum-ttl-override");
3735
3736 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
3737
3738 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
3739 SyncRes::s_maxbogusttl=::arg().asNum("max-cache-bogus-ttl");
3740 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
3741 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
3742 // Cap the packetcache-servfail-ttl to the packetcache-ttl
3743 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
3744 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
3745 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
3746 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
3747 SyncRes::s_serverID=::arg()["server-id"];
3748 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
3749 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
3750 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
3751 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
3752 if(SyncRes::s_serverID.empty()) {
3753 SyncRes::s_serverID = myHostname;
3754 }
3755
3756 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
3757 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
3758 SyncRes::clearECSStats();
3759 SyncRes::s_ecsipv4cachelimit = ::arg().asNum("ecs-ipv4-cache-bits");
3760 SyncRes::s_ecsipv6cachelimit = ::arg().asNum("ecs-ipv6-cache-bits");
3761 SyncRes::s_ecscachelimitttl = ::arg().asNum("ecs-cache-limit-ttl");
3762
3763 if (!::arg().isEmpty("ecs-scope-zero-address")) {
3764 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
3765 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
3766 }
3767 else {
3768 bool found = false;
3769 for (const auto& addr : g_localQueryAddresses4) {
3770 if (!IsAnyAddress(addr)) {
3771 SyncRes::setECSScopeZeroAddress(Netmask(addr, 32));
3772 found = true;
3773 break;
3774 }
3775 }
3776 if (!found) {
3777 for (const auto& addr : g_localQueryAddresses6) {
3778 if (!IsAnyAddress(addr)) {
3779 SyncRes::setECSScopeZeroAddress(Netmask(addr, 128));
3780 found = true;
3781 break;
3782 }
3783 }
3784 if (!found) {
3785 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
3786 }
3787 }
3788 }
3789
3790 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
3791 SyncRes::parseEDNSSubnetAddFor(::arg()["ecs-add-for"]);
3792 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
3793
3794 g_XPFAcl.toMasks(::arg()["xpf-allow-from"]);
3795 g_xpfRRCode = ::arg().asNum("xpf-rr-code");
3796
3797 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
3798
3799 g_initialDomainMap = parseAuthAndForwards();
3800
3801 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3802
3803 g_logCommonErrors=::arg().mustDo("log-common-errors");
3804 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
3805
3806 g_anyToTcp = ::arg().mustDo("any-to-tcp");
3807 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
3808
3809 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
3810
3811 g_numDistributorThreads = ::arg().asNum("distributor-threads");
3812 g_numWorkerThreads = ::arg().asNum("threads");
3813 if (g_numWorkerThreads < 1) {
3814 g_log<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
3815 g_numWorkerThreads = 1;
3816 }
3817
3818 g_numThreads = g_numDistributorThreads + g_numWorkerThreads;
3819 g_maxMThreads = ::arg().asNum("max-mthreads");
3820
3821 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
3822
3823 g_statisticsInterval = ::arg().asNum("statistics-interval");
3824
3825 {
3826 SuffixMatchNode dontThrottleNames;
3827 vector<string> parts;
3828 stringtok(parts, ::arg()["dont-throttle-names"]);
3829 for (const auto &p : parts) {
3830 dontThrottleNames.add(DNSName(p));
3831 }
3832 g_dontThrottleNames.setState(dontThrottleNames);
3833
3834 NetmaskGroup dontThrottleNetmasks;
3835 stringtok(parts, ::arg()["dont-throttle-netmasks"]);
3836 for (const auto &p : parts) {
3837 dontThrottleNetmasks.addMask(Netmask(p));
3838 }
3839 g_dontThrottleNetmasks.setState(dontThrottleNetmasks);
3840 }
3841
3842 s_balancingFactor = ::arg().asDouble("distribution-load-factor");
3843 if (s_balancingFactor != 0.0 && s_balancingFactor < 1.0) {
3844 s_balancingFactor = 0.0;
3845 g_log<<Logger::Warning<<"Asked to run with a distribution-load-factor below 1.0, disabling it instead"<<endl;
3846 }
3847
3848 #ifdef SO_REUSEPORT
3849 g_reusePort = ::arg().mustDo("reuseport");
3850 #endif
3851
3852 s_threadInfos.resize(g_numDistributorThreads + g_numWorkerThreads + /* handler */ 1);
3853
3854 if (g_reusePort) {
3855 if (g_weDistributeQueries) {
3856 /* first thread is the handler, then distributors */
3857 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3858 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
3859 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
3860 makeUDPServerSockets(deferredAdds);
3861 makeTCPServerSockets(deferredAdds, tcpSockets);
3862 }
3863 }
3864 else {
3865 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3866 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3867 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
3868 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
3869 makeUDPServerSockets(deferredAdds);
3870 makeTCPServerSockets(deferredAdds, tcpSockets);
3871 }
3872 }
3873 }
3874 else {
3875 std::set<int> tcpSockets;
3876 /* we don't have reuseport so we can only open one socket per
3877 listening addr:port and everyone will listen on it */
3878 makeUDPServerSockets(g_deferredAdds);
3879 makeTCPServerSockets(g_deferredAdds, tcpSockets);
3880
3881 /* every listener (so distributor if g_weDistributeQueries, workers otherwise)
3882 needs to listen to the shared sockets */
3883 if (g_weDistributeQueries) {
3884 /* first thread is the handler, then distributors */
3885 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3886 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3887 }
3888 }
3889 else {
3890 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3891 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3892 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3893 }
3894 }
3895 }
3896
3897 #ifdef NOD_ENABLED
3898 // Setup newly observed domain globals
3899 setupNODGlobal();
3900 #endif /* NOD_ENABLED */
3901
3902 int forks;
3903 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
3904 if(!fork()) // we are child
3905 break;
3906 }
3907
3908 if(::arg().mustDo("daemon")) {
3909 g_log<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
3910 g_log.toConsole(Logger::Critical);
3911 daemonize();
3912 }
3913 signal(SIGUSR1,usr1Handler);
3914 signal(SIGUSR2,usr2Handler);
3915 signal(SIGPIPE,SIG_IGN);
3916
3917 checkOrFixFDS();
3918
3919 #ifdef HAVE_LIBSODIUM
3920 if (sodium_init() == -1) {
3921 g_log<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
3922 exit(99);
3923 }
3924 #endif
3925
3926 openssl_thread_setup();
3927 openssl_seed();
3928 /* setup rng before chroot */
3929 dns_random_init();
3930
3931 if(::arg()["server-id"].empty()) {
3932 ::arg().set("server-id") = myHostname;
3933 }
3934
3935 int newgid=0;
3936 if(!::arg()["setgid"].empty())
3937 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
3938 int newuid=0;
3939 if(!::arg()["setuid"].empty())
3940 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
3941
3942 Utility::dropGroupPrivs(newuid, newgid);
3943
3944 if (!::arg()["chroot"].empty()) {
3945 #ifdef HAVE_SYSTEMD
3946 char *ns;
3947 ns = getenv("NOTIFY_SOCKET");
3948 if (ns != nullptr) {
3949 g_log<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
3950 exit(1);
3951 }
3952 #endif
3953 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
3954 g_log<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
3955 exit(1);
3956 }
3957 else
3958 g_log<<Logger::Info<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
3959 }
3960
3961 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
3962 if(!s_pidfname.empty())
3963 unlink(s_pidfname.c_str()); // remove possible old pid file
3964 writePid();
3965
3966 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
3967
3968 Utility::dropUserPrivs(newuid);
3969 try {
3970 /* we might still have capabilities remaining, for example if we have been started as root
3971 without --setuid (please don't do that) or as an unprivileged user with ambient capabilities
3972 like CAP_NET_BIND_SERVICE.
3973 */
3974 dropCapabilities();
3975 }
3976 catch(const std::exception& e) {
3977 g_log<<Logger::Warning<<e.what()<<endl;
3978 }
3979
3980 startLuaConfigDelayedThreads(delayedLuaThreads, g_luaconfs.getCopy().generation);
3981
3982 makeThreadPipes();
3983
3984 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
3985 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
3986 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
3987 s_maxUDPQueriesPerRound=::arg().asNum("max-udp-queries-per-round");
3988
3989 g_useKernelTimestamp = ::arg().mustDo("protobuf-use-kernel-timestamp");
3990
3991 blacklistStats(StatComponent::API, ::arg()["stats-api-blacklist"]);
3992 blacklistStats(StatComponent::Carbon, ::arg()["stats-carbon-blacklist"]);
3993 blacklistStats(StatComponent::RecControl, ::arg()["stats-rec-control-blacklist"]);
3994 blacklistStats(StatComponent::SNMP, ::arg()["stats-snmp-blacklist"]);
3995
3996 if (::arg().mustDo("snmp-agent")) {
3997 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
3998 g_snmpAgent->run();
3999 }
4000
4001 int port = ::arg().asNum("udp-source-port-min");
4002 if(port < 1024 || port > 65535){
4003 g_log<<Logger::Error<<"Unable to launch, udp-source-port-min is not a valid port number"<<endl;
4004 exit(99); // this isn't going to fix itself either
4005 }
4006 s_minUdpSourcePort = port;
4007 port = ::arg().asNum("udp-source-port-max");
4008 if(port < 1024 || port > 65535 || port < s_minUdpSourcePort){
4009 g_log<<Logger::Error<<"Unable to launch, udp-source-port-max is not a valid port number or is smaller than udp-source-port-min"<<endl;
4010 exit(99); // this isn't going to fix itself either
4011 }
4012 s_maxUdpSourcePort = port;
4013 std::vector<string> parts {};
4014 stringtok(parts, ::arg()["udp-source-port-avoid"], ", ");
4015 for (const auto &part : parts)
4016 {
4017 port = std::stoi(part);
4018 if(port < 1024 || port > 65535){
4019 g_log<<Logger::Error<<"Unable to launch, udp-source-port-avoid contains an invalid port number: "<<part<<endl;
4020 exit(99); // this isn't going to fix itself either
4021 }
4022 s_avoidUdpSourcePorts.insert(port);
4023 }
4024
4025 unsigned int currentThreadId = 1;
4026 const auto cpusMap = parseCPUMap();
4027
4028 if(g_numThreads == 1) {
4029 g_log<<Logger::Warning<<"Operating unthreaded"<<endl;
4030 #ifdef HAVE_SYSTEMD
4031 sd_notify(0, "READY=1");
4032 #endif
4033
4034 /* This thread handles the web server, carbon, statistics and the control channel */
4035 auto& handlerInfos = s_threadInfos.at(0);
4036 handlerInfos.isHandler = true;
4037 handlerInfos.thread = std::thread(recursorThread, 0, "main");
4038
4039 setCPUMap(cpusMap, currentThreadId, pthread_self());
4040
4041 auto& infos = s_threadInfos.at(currentThreadId);
4042 infos.isListener = true;
4043 infos.isWorker = true;
4044 recursorThread(currentThreadId++, "worker");
4045 }
4046 else {
4047
4048 if (g_weDistributeQueries) {
4049 g_log<<Logger::Warning<<"Launching "<< g_numDistributorThreads <<" distributor threads"<<endl;
4050 for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
4051 auto& infos = s_threadInfos.at(currentThreadId);
4052 infos.isListener = true;
4053 infos.thread = std::thread(recursorThread, currentThreadId++, "distr");
4054
4055 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
4056 }
4057 }
4058
4059 g_log<<Logger::Warning<<"Launching "<< g_numWorkerThreads <<" worker threads"<<endl;
4060
4061 for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
4062 auto& infos = s_threadInfos.at(currentThreadId);
4063 infos.isListener = g_weDistributeQueries ? false : true;
4064 infos.isWorker = true;
4065 infos.thread = std::thread(recursorThread, currentThreadId++, "worker");
4066
4067 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
4068 }
4069
4070 #ifdef HAVE_SYSTEMD
4071 sd_notify(0, "READY=1");
4072 #endif
4073
4074 /* This thread handles the web server, carbon, statistics and the control channel */
4075 auto& infos = s_threadInfos.at(0);
4076 infos.isHandler = true;
4077 infos.thread = std::thread(recursorThread, 0, "web+stat");
4078
4079 s_threadInfos.at(0).thread.join();
4080 }
4081 return 0;
4082 }
4083
4084 static void* recursorThread(unsigned int n, const string& threadName)
4085 try
4086 {
4087 t_id=n;
4088 auto& threadInfo = s_threadInfos.at(t_id);
4089
4090 static string threadPrefix = "pdns-r/";
4091 setThreadName(threadPrefix + threadName);
4092
4093 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
4094 SyncRes::setDomainMap(g_initialDomainMap);
4095 t_allowFrom = g_initialAllowFrom;
4096 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
4097 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
4098 primeHints();
4099
4100 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
4101
4102 g_log<<Logger::Warning<<"Done priming cache with root hints"<<endl;
4103
4104 #ifdef NOD_ENABLED
4105 if (threadInfo.isWorker)
4106 setupNODThread();
4107 #endif /* NOD_ENABLED */
4108
4109 /* the listener threads handle TCP queries */
4110 if(threadInfo.isWorker || threadInfo.isListener) {
4111 try {
4112 if(!::arg()["lua-dns-script"].empty()) {
4113 t_pdl = std::make_shared<RecursorLua4>();
4114 t_pdl->loadFile(::arg()["lua-dns-script"]);
4115 g_log<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
4116 }
4117 }
4118 catch(std::exception &e) {
4119 g_log<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
4120 _exit(99);
4121 }
4122 }
4123
4124 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
4125 if(ringsize) {
4126 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4127 if(g_weDistributeQueries)
4128 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries") / g_numDistributorThreads);
4129 else
4130 t_remotes->set_capacity(ringsize);
4131 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4132 t_servfailremotes->set_capacity(ringsize);
4133 t_bogusremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4134 t_bogusremotes->set_capacity(ringsize);
4135 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4136 t_largeanswerremotes->set_capacity(ringsize);
4137 t_timeouts = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4138 t_timeouts->set_capacity(ringsize);
4139
4140 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4141 t_queryring->set_capacity(ringsize);
4142 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4143 t_servfailqueryring->set_capacity(ringsize);
4144 t_bogusqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4145 t_bogusqueryring->set_capacity(ringsize);
4146 }
4147
4148 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
4149 threadInfo.mt = MT.get();
4150
4151 #ifdef HAVE_PROTOBUF
4152 /* start protobuf export threads if needed */
4153 auto luaconfsLocal = g_luaconfs.getLocal();
4154 checkProtobufExport(luaconfsLocal);
4155 checkOutgoingProtobufExport(luaconfsLocal);
4156 #endif /* HAVE_PROTOBUF */
4157
4158 PacketID pident;
4159
4160 t_fdm=getMultiplexer();
4161
4162 if(threadInfo.isHandler) {
4163 if(::arg().mustDo("webserver")) {
4164 g_log<<Logger::Warning << "Enabling web server" << endl;
4165 try {
4166 new RecursorWebServer(t_fdm);
4167 }
4168 catch(PDNSException &e) {
4169 g_log<<Logger::Error<<"Exception: "<<e.reason<<endl;
4170 exit(99);
4171 }
4172 }
4173 g_log<<Logger::Info<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
4174 }
4175 else {
4176
4177 t_fdm->addReadFD(threadInfo.pipes.readToThread, handlePipeRequest);
4178 t_fdm->addReadFD(threadInfo.pipes.readQueriesToThread, handlePipeRequest);
4179
4180 if (threadInfo.isListener) {
4181 if (g_reusePort) {
4182 /* then every listener has its own FDs */
4183 for(const auto deferred : threadInfo.deferredAdds) {
4184 t_fdm->addReadFD(deferred.first, deferred.second);
4185 }
4186 }
4187 else {
4188 /* otherwise all listeners are listening on the same ones */
4189 for(const auto deferred : g_deferredAdds) {
4190 t_fdm->addReadFD(deferred.first, deferred.second);
4191 }
4192 }
4193 }
4194 }
4195
4196 registerAllStats();
4197
4198 if(threadInfo.isHandler) {
4199 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
4200 }
4201
4202 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
4203
4204 bool listenOnTCP(true);
4205
4206 time_t last_stat = 0;
4207 time_t last_carbon=0, last_lua_maintenance=0;
4208 time_t carbonInterval=::arg().asNum("carbon-interval");
4209 time_t luaMaintenanceInterval=::arg().asNum("lua-maintenance-interval");
4210 counter.store(0); // used to periodically execute certain tasks
4211 for(;;) {
4212 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
4213
4214 if(!(counter%500)) {
4215 MT->makeThread(houseKeeping, 0);
4216 }
4217
4218 if(!(counter%55)) {
4219 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
4220 expired_t expired=t_fdm->getTimeouts(g_now);
4221
4222 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
4223 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4224 if(g_logCommonErrors)
4225 g_log<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4226 t_fdm->removeReadFD(i->first);
4227 }
4228 }
4229
4230 counter++;
4231
4232 if(threadInfo.isHandler) {
4233 if(statsWanted || (g_statisticsInterval > 0 && (g_now.tv_sec - last_stat) >= g_statisticsInterval)) {
4234 doStats();
4235 last_stat = g_now.tv_sec;
4236 }
4237
4238 Utility::gettimeofday(&g_now, 0);
4239
4240 if((g_now.tv_sec - last_carbon) >= carbonInterval) {
4241 MT->makeThread(doCarbonDump, 0);
4242 last_carbon = g_now.tv_sec;
4243 }
4244 }
4245 if (t_pdl != nullptr) {
4246 // lua-dns-script directive is present, call the maintenance callback if needed
4247 /* remember that the listener threads handle TCP queries */
4248 if (threadInfo.isWorker || threadInfo.isListener) {
4249 // Only on threads processing queries
4250 if(g_now.tv_sec - last_lua_maintenance >= luaMaintenanceInterval) {
4251 t_pdl->maintenance();
4252 last_lua_maintenance = g_now.tv_sec;
4253 }
4254 }
4255 }
4256
4257 t_fdm->run(&g_now);
4258 // 'run' updates g_now for us
4259
4260 if(threadInfo.isListener) {
4261 if(listenOnTCP) {
4262 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
4263 for(const auto fd : threadInfo.tcpSockets) {
4264 t_fdm->removeReadFD(fd);
4265 }
4266 listenOnTCP=false;
4267 }
4268 }
4269 else {
4270 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
4271 for(const auto fd : threadInfo.tcpSockets) {
4272 t_fdm->addReadFD(fd, handleNewTCPQuestion);
4273 }
4274 listenOnTCP=true;
4275 }
4276 }
4277 }
4278 }
4279 }
4280 catch(PDNSException &ae) {
4281 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
4282 return 0;
4283 }
4284 catch(std::exception &e) {
4285 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
4286 return 0;
4287 }
4288 catch(...) {
4289 g_log<<Logger::Error<<"any other exception in main: "<<endl;
4290 return 0;
4291 }
4292
4293
4294 int main(int argc, char **argv)
4295 {
4296 g_argc = argc;
4297 g_argv = argv;
4298 g_stats.startupTime=time(0);
4299 Utility::srandom();
4300 versionSetProduct(ProductRecursor);
4301 reportBasicTypes();
4302 reportOtherTypes();
4303
4304 int ret = EXIT_SUCCESS;
4305
4306 try {
4307 ::arg().set("stack-size","stack size per mthread")="200000";
4308 ::arg().set("soa-minimum-ttl","Don't change")="0";
4309 ::arg().set("no-shuffle","Don't change")="off";
4310 ::arg().set("local-port","port to listen on")="53";
4311 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
4312 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
4313 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
4314 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
4315 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
4316 ::arg().set("signature-inception-skew", "Allow the signature inception to be off by this number of seconds")="60";
4317 ::arg().set("daemon","Operate as a daemon")="no";
4318 ::arg().setSwitch("write-pid","Write a PID file")="yes";
4319 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
4320 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
4321 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
4322 ::arg().set("log-common-errors","If we should log rather common errors")="no";
4323 ::arg().set("chroot","switch to chroot jail")="";
4324 ::arg().set("setgid","If set, change group id to this gid for more security")="";
4325 ::arg().set("setuid","If set, change user id to this uid for more security")="";
4326 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
4327 ::arg().set("threads", "Launch this number of threads")="2";
4328 ::arg().set("distributor-threads", "Launch this number of distributor threads, distributing queries to other threads")="0";
4329 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
4330 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
4331 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
4332 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
4333 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
4334 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
4335 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
4336 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
4337 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
4338 ::arg().set("webserver-loglevel", "Amount of logging in the webserver (none, normal, detailed)") = "normal";
4339 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
4340 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
4341 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
4342 ::arg().set("carbon-namespace", "If set overwrites the first part of the carbon string")="pdns";
4343 ::arg().set("carbon-instance", "If set overwrites the the instance name default")="recursor";
4344
4345 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
4346 ::arg().set("quiet","Suppress logging of questions and answers")="";
4347 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
4348 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
4349 ::arg().set("socket-owner","Owner of socket")="";
4350 ::arg().set("socket-group","Group of socket")="";
4351 ::arg().set("socket-mode", "Permissions for socket")="";
4352
4353 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
4354 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
4355 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
4356 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
4357 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
4358 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
4359 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
4360 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
4361 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
4362 ::arg().set("dont-throttle-names", "Do not throttle nameservers with this name or suffix")="";
4363 ::arg().set("dont-throttle-netmasks", "Do not throttle nameservers with this IP netmask")="";
4364 ::arg().set("hint-file", "If set, load root hints from this file")="";
4365 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
4366 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
4367 ::arg().set("max-cache-bogus-ttl", "maximum number of seconds to keep a Bogus (positive or negative) cached entry in memory")="3600";
4368 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
4369 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
4370 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
4371 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
4372 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname, set custom or 'disabled'")="";
4373 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
4374 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
4375 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
4376 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
4377 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
4378 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4379 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
4380 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
4381 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4382 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
4383 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
4384 ::arg().set("lua-config-file", "More powerful configuration options")="";
4385
4386 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
4387 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
4388 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
4389 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
4390 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
4391 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
4392 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
4393 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
4394 ::arg().set("lua-maintenance-interval", "Number of seconds between calls to the lua user defined maintenance() function")="1";
4395 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
4396 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
4397 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
4398 ::arg().set("ecs-ipv4-cache-bits", "Maximum number of bits of IPv4 mask to cache ECS response")="24";
4399 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
4400 ::arg().set("ecs-ipv6-cache-bits", "Maximum number of bits of IPv6 mask to cache ECS response")="56";
4401 ::arg().set("ecs-minimum-ttl-override", "Set under adverse conditions, a minimum TTL for records in ECS-specific answers")="0";
4402 ::arg().set("ecs-cache-limit-ttl", "Minimum TTL to cache ECS response")="0";
4403 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
4404 ::arg().set("ecs-add-for", "List of client netmasks for which EDNS Client Subnet will be added")="0.0.0.0/0, ::/0, " LOCAL_NETS_INVERSE;
4405 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
4406 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
4407 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
4408 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
4409 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
4410 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
4411 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
4412 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1232";
4413 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1232";
4414 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
4415 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
4416 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
4417 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
4418 ::arg().set("max-udp-queries-per-round", "Maximum number of UDP queries processed per recvmsg() round, before returning back to normal processing")="10000";
4419 ::arg().set("protobuf-use-kernel-timestamp", "Compute the latency of queries in protobuf messages by using the timestamp set by the kernel when the query was received (when available)")="";
4420 ::arg().set("distribution-pipe-buffer-size", "Size in bytes of the internal buffer of the pipe used by the distributor to pass incoming queries to a worker thread")="0";
4421
4422 ::arg().set("include-dir","Include *.conf files from this directory")="";
4423 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
4424
4425 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
4426
4427 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
4428 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
4429
4430 std::string defaultBlacklistedStats = "cache-bytes, packetcache-bytes, special-memory-usage";
4431 for (size_t idx = 0; idx < 32; idx++) {
4432 defaultBlacklistedStats += ", ecs-v4-response-bits-" + std::to_string(idx + 1);
4433 }
4434 for (size_t idx = 0; idx < 128; idx++) {
4435 defaultBlacklistedStats += ", ecs-v6-response-bits-" + std::to_string(idx + 1);
4436 }
4437 ::arg().set("stats-api-blacklist", "List of statistics that are disabled when retrieving the complete list of statistics via the API")=defaultBlacklistedStats;
4438 ::arg().set("stats-carbon-blacklist", "List of statistics that are prevented from being exported via Carbon")=defaultBlacklistedStats;
4439 ::arg().set("stats-rec-control-blacklist", "List of statistics that are prevented from being exported via rec_control get-all")=defaultBlacklistedStats;
4440 ::arg().set("stats-snmp-blacklist", "List of statistics that are prevented from being exported via SNMP")=defaultBlacklistedStats;
4441
4442 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
4443 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
4444
4445 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
4446
4447 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
4448
4449 ::arg().set("xpf-allow-from","XPF information is only processed from these subnets")="";
4450 ::arg().set("xpf-rr-code","XPF option code to use")="0";
4451
4452 ::arg().set("udp-source-port-min", "Minimum UDP port to bind on")="1024";
4453 ::arg().set("udp-source-port-max", "Maximum UDP port to bind on")="65535";
4454 ::arg().set("udp-source-port-avoid", "List of comma separated UDP port number to avoid")="11211";
4455 ::arg().set("rng", "Specify random number generator to use. Valid values are auto,sodium,openssl,getrandom,arc4random,urandom.")="auto";
4456 ::arg().set("public-suffix-list-file", "Path to the Public Suffix List file, if any")="";
4457 ::arg().set("distribution-load-factor", "The load factor used when PowerDNS is distributing queries to worker threads")="0.0";
4458 #ifdef NOD_ENABLED
4459 ::arg().set("new-domain-tracking", "Track newly observed domains (i.e. never seen before).")="no";
4460 ::arg().set("new-domain-log", "Log newly observed domains.")="yes";
4461 ::arg().set("new-domain-lookup", "Perform a DNS lookup newly observed domains as a subdomain of the configured domain")="";
4462 ::arg().set("new-domain-history-dir", "Persist new domain tracking data here to persist between restarts")=string(NODCACHEDIR)+"/nod";
4463 ::arg().set("new-domain-whitelist", "List of domains (and implicitly all subdomains) which will never be considered a new domain")="";
4464 ::arg().set("new-domain-db-size", "Size of the DB used to track new domains in terms of number of cells. Defaults to 67108864")="67108864";
4465 ::arg().set("new-domain-pb-tag", "If protobuf is configured, the tag to use for messages containing newly observed domains. Defaults to 'pdns-nod'")="pdns-nod";
4466 ::arg().set("unique-response-tracking", "Track unique responses (tuple of query name, type and RR).")="no";
4467 ::arg().set("unique-response-log", "Log unique responses")="yes";
4468 ::arg().set("unique-response-history-dir", "Persist unique response tracking data here to persist between restarts")=string(NODCACHEDIR)+"/udr";
4469 ::arg().set("unique-response-db-size", "Size of the DB used to track unique responses in terms of number of cells. Defaults to 67108864")="67108864";
4470 ::arg().set("unique-response-pb-tag", "If protobuf is configured, the tag to use for messages containing unique DNS responses. Defaults to 'pdns-udr'")="pdns-udr";
4471 #endif /* NOD_ENABLED */
4472 ::arg().setCmd("help","Provide a helpful message");
4473 ::arg().setCmd("version","Print version string");
4474 ::arg().setCmd("config","Output blank configuration");
4475 g_log.toConsole(Logger::Info);
4476 ::arg().laxParse(argc,argv); // do a lax parse
4477
4478 string configname=::arg()["config-dir"]+"/recursor.conf";
4479 if(::arg()["config-name"]!="") {
4480 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
4481 s_programname+="-"+::arg()["config-name"];
4482 }
4483 cleanSlashes(configname);
4484
4485 if(!::arg().getCommands().empty()) {
4486 cerr<<"Fatal: non-option on the command line, perhaps a '--setting=123' statement missed the '='?"<<endl;
4487 exit(99);
4488 }
4489
4490 if(::arg().mustDo("config")) {
4491 cout<<::arg().configstring()<<endl;
4492 exit(0);
4493 }
4494
4495 if(!::arg().file(configname.c_str()))
4496 g_log<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
4497
4498 ::arg().parse(argc,argv);
4499
4500 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() ) {
4501 g_log<<Logger::Error<<"Using chroot and enabling the API is not possible"<<endl;
4502 exit(EXIT_FAILURE);
4503 }
4504
4505 if (::arg()["socket-dir"].empty()) {
4506 if (::arg()["chroot"].empty())
4507 ::arg().set("socket-dir") = LOCALSTATEDIR;
4508 else
4509 ::arg().set("socket-dir") = "/";
4510 }
4511
4512 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
4513
4514 if(::arg().asNum("threads")==1) {
4515 if (::arg().mustDo("pdns-distributes-queries")) {
4516 g_log<<Logger::Warning<<"Only one thread, no need to distribute queries ourselves"<<endl;
4517 ::arg().set("pdns-distributes-queries")="no";
4518 }
4519 }
4520
4521 if(::arg().mustDo("pdns-distributes-queries") && ::arg().asNum("distributor-threads") <= 0) {
4522 g_log<<Logger::Warning<<"Asked to run with pdns-distributes-queries set but no distributor threads, raising to 1"<<endl;
4523 ::arg().set("distributor-threads")="1";
4524 }
4525
4526 if (!::arg().mustDo("pdns-distributes-queries")) {
4527 ::arg().set("distributor-threads")="0";
4528 }
4529
4530 if(::arg().mustDo("help")) {
4531 cout<<"syntax:"<<endl<<endl;
4532 cout<<::arg().helpstring(::arg()["help"])<<endl;
4533 exit(0);
4534 }
4535 if(::arg().mustDo("version")) {
4536 showProductVersion();
4537 showBuildConfiguration();
4538 exit(0);
4539 }
4540
4541 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
4542
4543 if (logUrgency < Logger::Error)
4544 logUrgency = Logger::Error;
4545 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
4546 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
4547 }
4548 g_log.setLoglevel(logUrgency);
4549 g_log.toConsole(logUrgency);
4550
4551 serviceMain(argc, argv);
4552 }
4553 catch(PDNSException &ae) {
4554 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
4555 ret=EXIT_FAILURE;
4556 }
4557 catch(std::exception &e) {
4558 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
4559 ret=EXIT_FAILURE;
4560 }
4561 catch(...) {
4562 g_log<<Logger::Error<<"any other exception in main: "<<endl;
4563 ret=EXIT_FAILURE;
4564 }
4565
4566 return ret;
4567 }