]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/pdns_recursor.cc
rec: Add statistics about ECS response sizes
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25
26 #include <netdb.h>
27 #include <sys/stat.h>
28 #include <unistd.h>
29 #ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
30 #include <boost/container/flat_set.hpp>
31 #endif
32 #include "ws-recursor.hh"
33 #include <thread>
34 #include "threadname.hh"
35 #include "recpacketcache.hh"
36 #include "utility.hh"
37 #include "dns_random.hh"
38 #ifdef HAVE_LIBSODIUM
39 #include <sodium.h>
40 #endif
41 #include "opensslsigners.hh"
42 #include <iostream>
43 #include <errno.h>
44 #include <boost/static_assert.hpp>
45 #include <map>
46 #include <set>
47 #include "recursor_cache.hh"
48 #include "cachecleaner.hh"
49 #include <stdio.h>
50 #include <signal.h>
51 #include <stdlib.h>
52 #include "misc.hh"
53 #include "mtasker.hh"
54 #include <utility>
55 #include "arguments.hh"
56 #include "syncres.hh"
57 #include <fcntl.h>
58 #include <fstream>
59 #include "sortlist.hh"
60 #include "sstuff.hh"
61 #include <boost/tuple/tuple.hpp>
62 #include <boost/tuple/tuple_comparison.hpp>
63 #include <boost/shared_array.hpp>
64 #include <boost/function.hpp>
65 #include <boost/algorithm/string.hpp>
66 #ifdef MALLOC_TRACE
67 #include "malloctrace.hh"
68 #endif
69 #include <netinet/tcp.h>
70 #include "capabilities.hh"
71 #include "dnsparser.hh"
72 #include "dnswriter.hh"
73 #include "dnsrecords.hh"
74 #include "zoneparser-tng.hh"
75 #include "rec_channel.hh"
76 #include "logger.hh"
77 #include "iputils.hh"
78 #include "mplexer.hh"
79 #include "config.h"
80 #include "lua-recursor4.hh"
81 #include "version.hh"
82 #include "responsestats.hh"
83 #include "secpoll-recursor.hh"
84 #include "dnsname.hh"
85 #include "filterpo.hh"
86 #include "rpzloader.hh"
87 #include "validate-recursor.hh"
88 #include "rec-lua-conf.hh"
89 #include "ednsoptions.hh"
90 #include "gettime.hh"
91 #include "pubsuffix.hh"
92 #ifdef NOD_ENABLED
93 #include "nod.hh"
94 #endif /* NOD_ENABLED */
95
96 #include "rec-protobuf.hh"
97 #include "rec-snmp.hh"
98
99 #ifdef HAVE_SYSTEMD
100 #include <systemd/sd-daemon.h>
101 #endif
102
103 #include "namespaces.hh"
104
105 #ifdef HAVE_PROTOBUF
106 #include "uuid-utils.hh"
107 #endif
108
109 #include "xpf.hh"
110
111 typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
112
113 static thread_local std::shared_ptr<RecursorLua4> t_pdl;
114 static thread_local unsigned int t_id = 0;
115 static thread_local std::shared_ptr<Regex> t_traceRegex;
116 static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
117 #ifdef HAVE_PROTOBUF
118 static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_protobufServers{nullptr};
119 static thread_local uint64_t t_protobufServersGeneration;
120 static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_outgoingProtobufServers{nullptr};
121 static thread_local uint64_t t_outgoingProtobufServersGeneration;
122 #endif /* HAVE_PROTOBUF */
123
124 thread_local std::unique_ptr<MT_t> MT; // the big MTasker
125 thread_local std::unique_ptr<MemRecursorCache> t_RC;
126 thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
127 thread_local FDMultiplexer* t_fdm{nullptr};
128 thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes, t_bogusremotes;
129 thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring, t_bogusqueryring;
130 thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
131 #ifdef NOD_ENABLED
132 thread_local std::shared_ptr<nod::NODDB> t_nodDBp;
133 thread_local std::shared_ptr<nod::UniqueResponseDB> t_udrDBp;
134 #endif /* NOD_ENABLED */
135 __thread struct timeval g_now; // timestamp, updated (too) frequently
136
137 typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
138
139 // for communicating with our threads
140 // effectively readonly after startup
141 struct RecThreadInfo
142 {
143 struct ThreadPipeSet
144 {
145 int writeToThread{-1};
146 int readToThread{-1};
147 int writeFromThread{-1};
148 int readFromThread{-1};
149 int writeQueriesToThread{-1}; // this one is non-blocking
150 int readQueriesToThread{-1};
151 };
152
153 /* FD corresponding to TCP sockets this thread is listening
154 on.
155 These FDs are also in deferredAdds when we have one
156 socket per listener, and in g_deferredAdds instead. */
157 std::set<int> tcpSockets;
158 /* FD corresponding to listening sockets if we have one socket per
159 listener (with reuseport), otherwise all listeners share the
160 same FD and g_deferredAdds is then used instead */
161 deferredAdd_t deferredAdds;
162 struct ThreadPipeSet pipes;
163 std::thread thread;
164 /* handle the web server, carbon, statistics and the control channel */
165 bool isHandler{false};
166 /* accept incoming queries (and distributes them to the workers if pdns-distributes-queries is set) */
167 bool isListener{false};
168 /* process queries */
169 bool isWorker{false};
170 };
171
172 /* first we have the handler thread, t_id == 0 (some other
173 helper threads like SNMP might have t_id == 0 as well)
174 then the distributor threads if any
175 and finally the workers */
176 static std::vector<RecThreadInfo> s_threadInfos;
177 /* without reuseport, all listeners share the same sockets */
178 static deferredAdd_t g_deferredAdds;
179
180 typedef vector<int> tcpListenSockets_t;
181 typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
182
183 static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
184 static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
185 static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
186 static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
187 static AtomicCounter counter;
188 static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
189 static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
190 static NetmaskGroup g_XPFAcl;
191 static size_t g_tcpMaxQueriesPerConn;
192 static size_t s_maxUDPQueriesPerRound;
193 static uint64_t g_latencyStatSize;
194 static uint32_t g_disthashseed;
195 static unsigned int g_maxTCPPerClient;
196 static unsigned int g_maxMThreads;
197 static unsigned int g_numDistributorThreads;
198 static unsigned int g_numWorkerThreads;
199 static int g_tcpTimeout;
200 static uint16_t g_udpTruncationThreshold;
201 static uint16_t g_xpfRRCode{0};
202 static std::atomic<bool> statsWanted;
203 static std::atomic<bool> g_quiet;
204 static bool g_logCommonErrors;
205 static bool g_anyToTcp;
206 static bool g_weDistributeQueries; // if true, 1 or more threads listen on the incoming query sockets and distribute them to workers
207 static bool g_reusePort{false};
208 static bool g_gettagNeedsEDNSOptions{false};
209 static time_t g_statisticsInterval;
210 static bool g_useIncomingECS;
211 std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
212 #ifdef NOD_ENABLED
213 static bool g_nodEnabled;
214 static DNSName g_nodLookupDomain;
215 static bool g_nodLog;
216 static SuffixMatchNode g_nodDomainWL;
217 static std::string g_nod_pbtag;
218 static bool g_udrEnabled;
219 static bool g_udrLog;
220 static std::string g_udr_pbtag;
221 #endif /* NOD_ENABLED */
222 #ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
223 static boost::container::flat_set<uint16_t> s_avoidUdpSourcePorts;
224 #else
225 static std::set<uint16_t> s_avoidUdpSourcePorts;
226 #endif
227 static uint16_t s_minUdpSourcePort;
228 static uint16_t s_maxUdpSourcePort;
229
230 RecursorControlChannel s_rcc; // only active in the handler thread
231 RecursorStats g_stats;
232 string s_programname="pdns_recursor";
233 string s_pidfname;
234 bool g_lowercaseOutgoing;
235 unsigned int g_networkTimeoutMsec;
236 unsigned int g_numThreads;
237 uint16_t g_outgoingEDNSBufsize;
238 bool g_logRPZChanges{false};
239
240 #define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
241 #define LOCAL_NETS_INVERSE "!127.0.0.0/8, !10.0.0.0/8, !100.64.0.0/10, !169.254.0.0/16, !192.168.0.0/16, !172.16.0.0/12, !::1/128, !fc00::/7, !fe80::/10"
242 // Bad Nets taken from both:
243 // http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
244 // and
245 // http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
246 // where such a network may not be considered a valid destination
247 #define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
248 #define DONT_QUERY LOCAL_NETS ", " BAD_NETS
249
250 //! used to send information to a newborn mthread
251 struct DNSComboWriter {
252 DNSComboWriter(const std::string& query, const struct timeval& now): d_mdp(true, query), d_now(now), d_query(query)
253 {
254 }
255
256 DNSComboWriter(const std::string& query, const struct timeval& now, std::vector<std::string>&& policyTags, LuaContext::LuaObject&& data): d_mdp(true, query), d_now(now), d_query(query), d_policyTags(std::move(policyTags)), d_data(std::move(data))
257 {
258 }
259
260 void setRemote(const ComboAddress& sa)
261 {
262 d_remote=sa;
263 }
264
265 void setSource(const ComboAddress& sa)
266 {
267 d_source=sa;
268 }
269
270 void setLocal(const ComboAddress& sa)
271 {
272 d_local=sa;
273 }
274
275 void setDestination(const ComboAddress& sa)
276 {
277 d_destination=sa;
278 }
279
280 void setSocket(int sock)
281 {
282 d_socket=sock;
283 }
284
285 string getRemote() const
286 {
287 if (d_source == d_remote) {
288 return d_source.toStringWithPort();
289 }
290 return d_source.toStringWithPort() + " (proxied by " + d_remote.toStringWithPort() + ")";
291 }
292
293 MOADNSParser d_mdp;
294 struct timeval d_now;
295 /* Remote client, might differ from d_source
296 in case of XPF, in which case d_source holds
297 the IP of the client and d_remote of the proxy
298 */
299 ComboAddress d_remote;
300 ComboAddress d_source;
301 /* Destination address, might differ from
302 d_destination in case of XPF, in which case
303 d_destination holds the IP of the proxy and
304 d_local holds our own. */
305 ComboAddress d_local;
306 ComboAddress d_destination;
307 #ifdef HAVE_PROTOBUF
308 boost::uuids::uuid d_uuid;
309 string d_requestorId;
310 string d_deviceId;
311 #endif
312 std::string d_query;
313 std::vector<std::string> d_policyTags;
314 LuaContext::LuaObject d_data;
315 EDNSSubnetOpts d_ednssubnet;
316 shared_ptr<TCPConnection> d_tcpConnection;
317 int d_socket;
318 unsigned int d_tag{0};
319 uint32_t d_qhash{0};
320 uint32_t d_ttlCap{std::numeric_limits<uint32_t>::max()};
321 uint16_t d_ecsBegin{0};
322 uint16_t d_ecsEnd{0};
323 bool d_variable{false};
324 bool d_ecsFound{false};
325 bool d_ecsParsed{false};
326 bool d_tcp;
327 };
328
329 MT_t* getMT()
330 {
331 return MT ? MT.get() : nullptr;
332 }
333
334 ArgvMap &arg()
335 {
336 static ArgvMap theArg;
337 return theArg;
338 }
339
340 unsigned int getRecursorThreadId()
341 {
342 return t_id;
343 }
344
345 int getMTaskerTID()
346 {
347 return MT->getTid();
348 }
349
350 static bool isDistributorThread()
351 {
352 if (t_id == 0) {
353 return false;
354 }
355
356 return g_weDistributeQueries && s_threadInfos.at(t_id).isListener;
357 }
358
359 static bool isHandlerThread()
360 {
361 if (t_id == 0) {
362 return true;
363 }
364
365 return s_threadInfos.at(t_id).isHandler;
366 }
367
368 static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
369
370 // -1 is error, 0 is timeout, 1 is success
371 int asendtcp(const string& data, Socket* sock)
372 {
373 PacketID pident;
374 pident.sock=sock;
375 pident.outMSG=data;
376
377 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
378 string packet;
379
380 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
381
382 if(!ret || ret==-1) { // timeout
383 t_fdm->removeWriteFD(sock->getHandle());
384 }
385 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
386 return -1;
387 }
388 return ret;
389 }
390
391 static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
392
393 // -1 is error, 0 is timeout, 1 is success
394 int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
395 {
396 data.clear();
397 PacketID pident;
398 pident.sock=sock;
399 pident.inNeeded=len;
400 pident.inIncompleteOkay=incompleteOkay;
401 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
402
403 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
404 if(!ret || ret==-1) { // timeout
405 t_fdm->removeReadFD(sock->getHandle());
406 }
407 else if(data.empty()) {// error, EOF or other
408 return -1;
409 }
410
411 return ret;
412 }
413
414 static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
415 {
416 PacketID pident=*any_cast<PacketID>(&var);
417 char resp[512];
418 ComboAddress fromaddr;
419 socklen_t addrlen=sizeof(fromaddr);
420
421 ssize_t ret=recvfrom(fd, resp, sizeof(resp), 0, (sockaddr *)&fromaddr, &addrlen);
422 if (fromaddr != pident.remote) {
423 g_log<<Logger::Notice<<"Response received from the wrong remote host ("<<fromaddr.toStringWithPort()<<" instead of "<<pident.remote.toStringWithPort()<<"), discarding"<<endl;
424
425 }
426
427 t_fdm->removeReadFD(fd);
428 if(ret >= 0) {
429 string data(resp, (size_t) ret);
430 MT->sendEvent(pident, &data);
431 }
432 else {
433 string empty;
434 MT->sendEvent(pident, &empty);
435 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
436 }
437 }
438 string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
439 {
440 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
441 s.setNonBlocking();
442 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
443
444 s.bind(local);
445 s.connect(dest);
446 s.send(query);
447
448 PacketID pident;
449 pident.sock=&s;
450 pident.remote=dest;
451 pident.type=0;
452 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
453
454 string data;
455
456 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
457
458 if(!ret || ret==-1) { // timeout
459 t_fdm->removeReadFD(s.getHandle());
460 }
461 else if(data.empty()) {// error, EOF or other
462 // we could special case this
463 return data;
464 }
465 return data;
466 }
467
468 //! pick a random query local address
469 ComboAddress getQueryLocalAddress(int family, uint16_t port)
470 {
471 ComboAddress ret;
472 if(family==AF_INET) {
473 if(g_localQueryAddresses4.empty())
474 ret = g_local4;
475 else
476 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
477 ret.sin4.sin_port = htons(port);
478 }
479 else {
480 if(g_localQueryAddresses6.empty())
481 ret = g_local6;
482 else
483 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
484
485 ret.sin6.sin6_port = htons(port);
486 }
487 return ret;
488 }
489
490 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
491
492 static void setSocketBuffer(int fd, int optname, uint32_t size)
493 {
494 uint32_t psize=0;
495 socklen_t len=sizeof(psize);
496
497 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
498 g_log<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
499 return;
500 }
501
502 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
503 g_log<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
504 }
505
506
507 static void setSocketReceiveBuffer(int fd, uint32_t size)
508 {
509 setSocketBuffer(fd, SO_RCVBUF, size);
510 }
511
512 static void setSocketSendBuffer(int fd, uint32_t size)
513 {
514 setSocketBuffer(fd, SO_SNDBUF, size);
515 }
516
517
518 // you can ask this class for a UDP socket to send a query from
519 // this socket is not yours, don't even think about deleting it
520 // but after you call 'returnSocket' on it, don't assume anything anymore
521 class UDPClientSocks
522 {
523 unsigned int d_numsocks;
524 public:
525 UDPClientSocks() : d_numsocks(0)
526 {
527 }
528
529 typedef set<int> socks_t;
530 socks_t d_socks;
531
532 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
533 int getSocket(const ComboAddress& toaddr, int* fd)
534 {
535 *fd=makeClientSocket(toaddr.sin4.sin_family);
536 if(*fd < 0) // temporary error - receive exception otherwise
537 return -2;
538
539 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
540 int err = errno;
541 // returnSocket(*fd);
542 try {
543 closesocket(*fd);
544 }
545 catch(const PDNSException& e) {
546 g_log<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
547 }
548
549 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
550 return -2;
551 return -1;
552 }
553
554 d_socks.insert(*fd);
555 d_numsocks++;
556 return 0;
557 }
558
559 void returnSocket(int fd)
560 {
561 socks_t::iterator i=d_socks.find(fd);
562 if(i==d_socks.end()) {
563 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd)+") not in the pool");
564 }
565 returnSocketLocked(i);
566 }
567
568 // return a socket to the pool, or simply erase it
569 void returnSocketLocked(socks_t::iterator& i)
570 {
571 if(i==d_socks.end()) {
572 throw PDNSException("Trying to return a socket not in the pool");
573 }
574 try {
575 t_fdm->removeReadFD(*i);
576 }
577 catch(FDMultiplexerException& e) {
578 // we sometimes return a socket that has not yet been assigned to t_fdm
579 }
580 try {
581 closesocket(*i);
582 }
583 catch(const PDNSException& e) {
584 g_log<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
585 }
586
587 d_socks.erase(i++);
588 --d_numsocks;
589 }
590
591 // returns -1 for errors which might go away, throws for ones that won't
592 static int makeClientSocket(int family)
593 {
594 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
595
596 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
597 return ret;
598
599 if(ret<0)
600 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
601
602 // setCloseOnExec(ret); // we're not going to exec
603
604 int tries=10;
605 ComboAddress sin;
606 while(--tries) {
607 uint16_t port;
608
609 if(tries==1) // fall back to kernel 'random'
610 port = 0;
611 else {
612 do {
613 port = s_minUdpSourcePort + dns_random(s_maxUdpSourcePort - s_minUdpSourcePort + 1);
614 }
615 while (s_avoidUdpSourcePorts.count(port));
616 }
617
618 sin=getQueryLocalAddress(family, port); // does htons for us
619
620 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
621 break;
622 }
623 if(!tries)
624 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
625
626 setNonBlocking(ret);
627 return ret;
628 }
629 };
630
631 static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
632
633 /* these two functions are used by LWRes */
634 // -2 is OS error, -1 is error that depends on the remote, > 0 is success
635 int asendto(const char *data, size_t len, int flags,
636 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
637 {
638
639 PacketID pident;
640 pident.domain = domain;
641 pident.remote = toaddr;
642 pident.type = qtype;
643
644 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
645 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
646
647 for(; chain.first != chain.second; chain.first++) {
648 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
649 /*
650 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
651 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
652 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
653 */
654 chain.first->key.chain.insert(id); // we can chain
655 *fd=-1; // gets used in waitEvent / sendEvent later on
656 return 1;
657 }
658 }
659
660 int ret=t_udpclientsocks->getSocket(toaddr, fd);
661 if(ret < 0)
662 return ret;
663
664 pident.fd=*fd;
665 pident.id=id;
666
667 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
668 ret = send(*fd, data, len, 0);
669
670 int tmp = errno;
671
672 if(ret < 0)
673 t_udpclientsocks->returnSocket(*fd);
674
675 errno = tmp; // this is for logging purposes only
676 return ret;
677 }
678
679 // -1 is error, 0 is timeout, 1 is success
680 int arecvfrom(std::string& packet, int flags, const ComboAddress& fromaddr, size_t *d_len,
681 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
682 {
683 static optional<unsigned int> nearMissLimit;
684 if(!nearMissLimit)
685 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
686
687 PacketID pident;
688 pident.fd=fd;
689 pident.id=id;
690 pident.domain=domain;
691 pident.type = qtype;
692 pident.remote=fromaddr;
693
694 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
695
696 if(ret > 0) {
697 if(packet.empty()) // means "error"
698 return -1;
699
700 *d_len=packet.size();
701
702 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
703 g_log<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
704 g_stats.spoofCount++;
705 return -1;
706 }
707 }
708 else {
709 if(fd >= 0)
710 t_udpclientsocks->returnSocket(fd);
711 }
712 return ret;
713 }
714
715 static void writePid(void)
716 {
717 if(!::arg().mustDo("write-pid"))
718 return;
719 ofstream of(s_pidfname.c_str(), std::ios_base::app);
720 if(of)
721 of<< Utility::getpid() <<endl;
722 else
723 g_log<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
724 }
725
726 TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : data(2, 0), d_remote(addr), d_fd(fd)
727 {
728 ++s_currentConnections;
729 (*t_tcpClientCounts)[d_remote]++;
730 }
731
732 TCPConnection::~TCPConnection()
733 {
734 try {
735 if(closesocket(d_fd) < 0)
736 g_log<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
737 }
738 catch(const PDNSException& e) {
739 g_log<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
740 }
741
742 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
743 t_tcpClientCounts->erase(d_remote);
744 --s_currentConnections;
745 }
746
747 AtomicCounter TCPConnection::s_currentConnections;
748
749 static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
750
751 // the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
752 static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
753 {
754 if(packetsize > 1000 && t_largeanswerremotes)
755 t_largeanswerremotes->push_back(remote);
756 switch(res) {
757 case RCode::ServFail:
758 if(t_servfailremotes) {
759 t_servfailremotes->push_back(remote);
760 if(query && t_servfailqueryring) // packet cache
761 t_servfailqueryring->push_back(make_pair(*query, qtype));
762 }
763 g_stats.servFails++;
764 break;
765 case RCode::NXDomain:
766 g_stats.nxDomains++;
767 break;
768 case RCode::NoError:
769 g_stats.noErrors++;
770 break;
771 }
772 }
773
774 static string makeLoginfo(const std::unique_ptr<DNSComboWriter>& dc)
775 try
776 {
777 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->getRemote())+")";
778 }
779 catch(...)
780 {
781 return "Exception making error message for exception";
782 }
783
784 #ifdef HAVE_PROTOBUF
785 static void protobufLogQuery(uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId)
786 {
787 if (!t_protobufServers) {
788 return;
789 }
790
791 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
792 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
793 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
794 message.setServerIdentity(SyncRes::s_serverID);
795 message.setEDNSSubnet(ednssubnet, ednssubnet.isIpv4() ? maskV4 : maskV6);
796 message.setRequestorId(requestorId);
797 message.setDeviceId(deviceId);
798
799 if (!policyTags.empty()) {
800 message.setPolicyTags(policyTags);
801 }
802
803 // cerr <<message.toDebugString()<<endl;
804 std::string str;
805 message.serialize(str);
806
807 for (auto& server : *t_protobufServers) {
808 server->queueData(str);
809 }
810 }
811
812 static void protobufLogResponse(const RecProtoBufMessage& message)
813 {
814 if (!t_protobufServers) {
815 return;
816 }
817
818 // cerr <<message.toDebugString()<<endl;
819 std::string str;
820 message.serialize(str);
821
822 for (auto& server : *t_protobufServers) {
823 server->queueData(str);
824 }
825 }
826 #endif
827
828 /**
829 * Chases the CNAME provided by the PolicyCustom RPZ policy.
830 *
831 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
832 * @param qtype: The QType of the original query
833 * @param sr: A SyncRes
834 * @param res: An integer that will contain the RCODE of the lookup we do
835 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
836 */
837 static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
838 {
839 if (spoofed.d_type == QType::CNAME) {
840 bool oldWantsRPZ = sr.getWantsRPZ();
841 sr.setWantsRPZ(false);
842 vector<DNSRecord> ans;
843 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, QClass::IN, ans);
844 for (const auto& rec : ans) {
845 if(rec.d_place == DNSResourceRecord::ANSWER) {
846 ret.push_back(rec);
847 }
848 }
849 // Reset the RPZ state of the SyncRes
850 sr.setWantsRPZ(oldWantsRPZ);
851 }
852 }
853
854 static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, uint32_t ttlCap, const uint16_t maxAnswerSize)
855 {
856 pw.startRecord(rec.d_name, rec.d_type, (rec.d_ttl > ttlCap ? ttlCap : rec.d_ttl), rec.d_class, rec.d_place);
857
858 if(rec.d_type != QType::OPT) // their TTL ain't real
859 minTTL = min(minTTL, rec.d_ttl);
860
861 rec.d_content->toPacket(pw);
862 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
863 pw.rollback();
864 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
865 pw.getHeader()->tc=1;
866 pw.truncate();
867 }
868 return false;
869 }
870
871 return true;
872 }
873
874 #ifdef HAVE_PROTOBUF
875 static std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> startProtobufServers(const ProtobufExportConfig& config)
876 {
877 auto result = std::make_shared<std::vector<std::unique_ptr<RemoteLogger>>>();
878
879 for (const auto& server : config.servers) {
880 try {
881 result->emplace_back(new RemoteLogger(server, config.timeout, 100*config.maxQueuedEntries, config.reconnectWaitTime, config.asyncConnect));
882 }
883 catch(const std::exception& e) {
884 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.what()<<endl;
885 }
886 catch(const PDNSException& e) {
887 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.reason<<endl;
888 }
889 }
890
891 return result;
892 }
893
894 static bool checkProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
895 {
896 if (!luaconfsLocal->protobufExportConfig.enabled) {
897 if (t_protobufServers) {
898 for (auto& server : *t_protobufServers) {
899 server->stop();
900 }
901 t_protobufServers.reset();
902 }
903
904 return false;
905 }
906
907 /* if the server was not running, or if it was running according to a
908 previous configuration */
909 if (!t_protobufServers ||
910 t_protobufServersGeneration < luaconfsLocal->generation) {
911
912 if (t_protobufServers) {
913 for (auto& server : *t_protobufServers) {
914 server->stop();
915 }
916 }
917 t_protobufServers.reset();
918
919 t_protobufServers = startProtobufServers(luaconfsLocal->protobufExportConfig);
920 t_protobufServersGeneration = luaconfsLocal->generation;
921 }
922
923 return true;
924 }
925
926 static bool checkOutgoingProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
927 {
928 if (!luaconfsLocal->outgoingProtobufExportConfig.enabled) {
929 if (t_outgoingProtobufServers) {
930 for (auto& server : *t_outgoingProtobufServers) {
931 server->stop();
932 }
933 }
934 t_outgoingProtobufServers.reset();
935
936 return false;
937 }
938
939 /* if the server was not running, or if it was running according to a
940 previous configuration */
941 if (!t_outgoingProtobufServers ||
942 t_outgoingProtobufServersGeneration < luaconfsLocal->generation) {
943
944 if (t_outgoingProtobufServers) {
945 for (auto& server : *t_outgoingProtobufServers) {
946 server->stop();
947 }
948 }
949 t_outgoingProtobufServers.reset();
950
951 t_outgoingProtobufServers = startProtobufServers(luaconfsLocal->outgoingProtobufExportConfig);
952 t_outgoingProtobufServersGeneration = luaconfsLocal->generation;
953 }
954
955 return true;
956 }
957 #endif /* HAVE_PROTOBUF */
958
959 #ifdef NOD_ENABLED
960 static bool nodCheckNewDomain(const DNSName& dname)
961 {
962 static const QType qt(QType::A);
963 static const uint16_t qc(QClass::IN);
964 bool ret = false;
965 // First check the (sub)domain isn't whitelisted for NOD purposes
966 if (!g_nodDomainWL.check(dname)) {
967 // Now check the NODDB (note this is probablistic so can have FNs/FPs)
968 if (t_nodDBp && t_nodDBp->isNewDomain(dname)) {
969 if (g_nodLog) {
970 // This should probably log to a dedicated log file
971 g_log<<Logger::Notice<<"Newly observed domain nod="<<dname.toLogString()<<endl;
972 }
973 if (!(g_nodLookupDomain.isRoot())) {
974 // Send a DNS A query to <domain>.g_nodLookupDomain
975 DNSName qname = dname;
976 vector<DNSRecord> dummy;
977 qname += g_nodLookupDomain;
978 directResolve(qname, qt, qc, dummy);
979 }
980 ret = true;
981 }
982 }
983 return ret;
984 }
985
986 static void nodAddDomain(const DNSName& dname)
987 {
988 // Don't bother adding domains on the nod whitelist
989 if (!g_nodDomainWL.check(dname)) {
990 if (t_nodDBp) {
991 // This keeps the nod info up to date
992 t_nodDBp->addDomain(dname);
993 }
994 }
995 }
996
997 static bool udrCheckUniqueDNSRecord(const DNSName& dname, uint16_t qtype, const DNSRecord& record)
998 {
999 bool ret = false;
1000 if (record.d_place == DNSResourceRecord::ANSWER ||
1001 record.d_place == DNSResourceRecord::ADDITIONAL) {
1002 // Create a string that represent a triplet of (qname, qtype and RR[type, name, content])
1003 std::stringstream ss;
1004 ss << dname.toDNSStringLC() << ":" << qtype << ":" << qtype << ":" << record.d_type << ":" << record.d_name.toDNSStringLC() << ":" << record.d_content->getZoneRepresentation();
1005 if (t_udrDBp && t_udrDBp->isUniqueResponse(ss.str())) {
1006 if (g_udrLog) {
1007 // This should also probably log to a dedicated file.
1008 g_log<<Logger::Notice<<"Unique response observed: qname="<<dname.toLogString()<<" qtype="<<QType(qtype).getName()<< " rrtype=" << QType(record.d_type).getName() << " rrname=" << record.d_name.toLogString() << " rrcontent=" << record.d_content->getZoneRepresentation() << endl;
1009 }
1010 ret = true;
1011 }
1012 }
1013 return ret;
1014 }
1015 #endif /* NOD_ENABLED */
1016
1017 static void startDoResolve(void *p)
1018 {
1019 auto dc=std::unique_ptr<DNSComboWriter>(reinterpret_cast<DNSComboWriter*>(p));
1020 try {
1021 if (t_queryring)
1022 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
1023
1024 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
1025 EDNSOpts edo;
1026 std::vector<pair<uint16_t, string> > ednsOpts;
1027 bool variableAnswer = dc->d_variable;
1028 bool haveEDNS=false;
1029 #ifdef NOD_ENABLED
1030 bool hasUDR = false;
1031 #endif /* NOD_ENABLED */
1032 DNSPacketWriter::optvect_t returnedEdnsOptions; // Here we stuff all the options for the return packet
1033 uint8_t ednsExtRCode = 0;
1034 if(getEDNSOpts(dc->d_mdp, &edo)) {
1035 haveEDNS=true;
1036 if (edo.d_version != 0) {
1037 ednsExtRCode = ERCode::BADVERS;
1038 }
1039
1040 if(!dc->d_tcp) {
1041 /* rfc6891 6.2.3:
1042 "Values lower than 512 MUST be treated as equal to 512."
1043 */
1044 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
1045 }
1046 ednsOpts = edo.d_options;
1047 maxanswersize -= 11; // EDNS header size
1048
1049 for (const auto& o : edo.d_options) {
1050 if (o.first == EDNSOptionCode::ECS && g_useIncomingECS && !dc->d_ecsParsed) {
1051 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
1052 } else if (o.first == EDNSOptionCode::NSID) {
1053 const static string mode_server_id = ::arg()["server-id"];
1054 if(mode_server_id != "disabled" && !mode_server_id.empty() &&
1055 maxanswersize > (2 + 2 + mode_server_id.size())) {
1056 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::NSID, mode_server_id));
1057 variableAnswer = true; // Can't packetcache an answer with NSID
1058 // Option Code and Option Length are both 2
1059 maxanswersize -= 2 + 2 + mode_server_id.size();
1060 }
1061 }
1062 }
1063 }
1064 /* perhaps there was no EDNS or no ECS but by now we looked */
1065 dc->d_ecsParsed = true;
1066 vector<DNSRecord> ret;
1067 vector<uint8_t> packet;
1068
1069 auto luaconfsLocal = g_luaconfs.getLocal();
1070 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
1071 bool wantsRPZ(true);
1072 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
1073 bool logResponse = false;
1074 #ifdef HAVE_PROTOBUF
1075 if (checkProtobufExport(luaconfsLocal)) {
1076 logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
1077 Netmask requestorNM(dc->d_source, dc->d_source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1078 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
1079 pbMessage = RecProtoBufMessage(RecProtoBufMessage::Response, dc->d_uuid, &requestor, &dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass, dc->d_mdp.d_header.id, dc->d_tcp, 0);
1080 pbMessage->setServerIdentity(SyncRes::s_serverID);
1081 pbMessage->setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1082 }
1083 #endif /* HAVE_PROTOBUF */
1084
1085 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
1086
1087 pw.getHeader()->aa=0;
1088 pw.getHeader()->ra=1;
1089 pw.getHeader()->qr=1;
1090 pw.getHeader()->tc=0;
1091 pw.getHeader()->id=dc->d_mdp.d_header.id;
1092 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
1093 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
1094
1095 /* This is the lowest TTL seen in the records of the response,
1096 so we can't cache it for longer than this value.
1097 If we have a TTL cap, this value can't be larger than the
1098 cap no matter what. */
1099 uint32_t minTTL = dc->d_ttlCap;
1100
1101 SyncRes sr(dc->d_now);
1102
1103 bool DNSSECOK=false;
1104 if(t_pdl) {
1105 sr.setLuaEngine(t_pdl);
1106 }
1107 if(g_dnssecmode != DNSSECMode::Off) {
1108 sr.setDoDNSSEC(true);
1109
1110 // Does the requestor want DNSSEC records?
1111 if(edo.d_extFlags & EDNSOpts::DNSSECOK) {
1112 DNSSECOK=true;
1113 g_stats.dnssecQueries++;
1114 }
1115 if (dc->d_mdp.d_header.cd) {
1116 /* Per rfc6840 section 5.9, "When processing a request with
1117 the Checking Disabled (CD) bit set, a resolver SHOULD attempt
1118 to return all response data, even data that has failed DNSSEC
1119 validation. */
1120 ++g_stats.dnssecCheckDisabledQueries;
1121 }
1122 if (dc->d_mdp.d_header.ad) {
1123 /* Per rfc6840 section 5.7, "the AD bit in a query as a signal
1124 indicating that the requester understands and is interested in the
1125 value of the AD bit in the response. This allows a requester to
1126 indicate that it understands the AD bit without also requesting
1127 DNSSEC data via the DO bit. */
1128 ++g_stats.dnssecAuthenticDataQueries;
1129 }
1130 } else {
1131 // Ignore the client-set CD flag
1132 pw.getHeader()->cd=0;
1133 }
1134 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
1135
1136 #ifdef HAVE_PROTOBUF
1137 sr.setInitialRequestId(dc->d_uuid);
1138 sr.setOutgoingProtobufServers(t_outgoingProtobufServers);
1139 #endif
1140
1141 sr.setQuerySource(dc->d_remote, g_useIncomingECS && !dc->d_ednssubnet.source.empty() ? boost::optional<const EDNSSubnetOpts&>(dc->d_ednssubnet) : boost::none);
1142
1143 bool tracedQuery=false; // we could consider letting Lua know about this too
1144 bool shouldNotValidate = false;
1145
1146 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
1147 int res = RCode::NoError;
1148 DNSFilterEngine::Policy appliedPolicy;
1149 std::vector<DNSRecord> spoofed;
1150 RecursorLua4::DNSQuestion dq(dc->d_source, dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ, logResponse);
1151 dq.ednsFlags = &edo.d_extFlags;
1152 dq.ednsOptions = &ednsOpts;
1153 dq.tag = dc->d_tag;
1154 dq.discardedPolicies = &sr.d_discardedPolicies;
1155 dq.policyTags = &dc->d_policyTags;
1156 dq.appliedPolicy = &appliedPolicy;
1157 dq.currentRecords = &ret;
1158 dq.dh = &dc->d_mdp.d_header;
1159 dq.data = dc->d_data;
1160 #ifdef HAVE_PROTOBUF
1161 dq.requestorId = dc->d_requestorId;
1162 dq.deviceId = dc->d_deviceId;
1163 #endif
1164
1165 if(ednsExtRCode != 0) {
1166 goto sendit;
1167 }
1168
1169 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
1170 pw.getHeader()->tc = 1;
1171 res = 0;
1172 variableAnswer = true;
1173 goto sendit;
1174 }
1175
1176 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
1177 sr.setLogMode(SyncRes::Store);
1178 tracedQuery=true;
1179 }
1180
1181
1182 if(!g_quiet || tracedQuery) {
1183 g_log<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
1184 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
1185 if(!dc->d_ednssubnet.source.empty()) {
1186 g_log<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
1187 }
1188 g_log<<endl;
1189 }
1190
1191 sr.setId(MT->getTid());
1192 if(!dc->d_mdp.d_header.rd)
1193 sr.setCacheOnly();
1194
1195 if (t_pdl) {
1196 t_pdl->prerpz(dq, res);
1197 }
1198
1199 // Check if the query has a policy attached to it
1200 if (wantsRPZ) {
1201 appliedPolicy = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_source, sr.d_discardedPolicies);
1202 }
1203
1204 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
1205 if(!t_pdl || !t_pdl->preresolve(dq, res)) {
1206
1207 sr.setWantsRPZ(wantsRPZ);
1208 if(wantsRPZ) {
1209 switch(appliedPolicy.d_kind) {
1210 case DNSFilterEngine::PolicyKind::NoAction:
1211 break;
1212 case DNSFilterEngine::PolicyKind::Drop:
1213 g_stats.policyDrops++;
1214 g_stats.policyResults[appliedPolicy.d_kind]++;
1215 return;
1216 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1217 g_stats.policyResults[appliedPolicy.d_kind]++;
1218 res=RCode::NXDomain;
1219 goto haveAnswer;
1220 case DNSFilterEngine::PolicyKind::NODATA:
1221 g_stats.policyResults[appliedPolicy.d_kind]++;
1222 res=RCode::NoError;
1223 goto haveAnswer;
1224 case DNSFilterEngine::PolicyKind::Custom:
1225 g_stats.policyResults[appliedPolicy.d_kind]++;
1226 res=RCode::NoError;
1227 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1228 for (const auto& dr : spoofed) {
1229 ret.push_back(dr);
1230 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1231 }
1232 goto haveAnswer;
1233 case DNSFilterEngine::PolicyKind::Truncate:
1234 if(!dc->d_tcp) {
1235 g_stats.policyResults[appliedPolicy.d_kind]++;
1236 res=RCode::NoError;
1237 pw.getHeader()->tc=1;
1238 goto haveAnswer;
1239 }
1240 break;
1241 }
1242 }
1243
1244 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
1245 try {
1246 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
1247 shouldNotValidate = sr.wasOutOfBand();
1248 }
1249 catch(ImmediateServFailException &e) {
1250 if(g_logCommonErrors)
1251 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
1252 res = RCode::ServFail;
1253 }
1254
1255 dq.validationState = sr.getValidationState();
1256
1257 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
1258 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
1259 appliedPolicy = sr.d_appliedPolicy;
1260 g_stats.policyResults[appliedPolicy.d_kind]++;
1261 switch(appliedPolicy.d_kind) {
1262 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
1263 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
1264 case DNSFilterEngine::PolicyKind::Drop:
1265 g_stats.policyDrops++;
1266 return;
1267 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1268 ret.clear();
1269 res=RCode::NXDomain;
1270 goto haveAnswer;
1271
1272 case DNSFilterEngine::PolicyKind::NODATA:
1273 ret.clear();
1274 res=RCode::NoError;
1275 goto haveAnswer;
1276
1277 case DNSFilterEngine::PolicyKind::Truncate:
1278 if(!dc->d_tcp) {
1279 ret.clear();
1280 res=RCode::NoError;
1281 pw.getHeader()->tc=1;
1282 goto haveAnswer;
1283 }
1284 break;
1285
1286 case DNSFilterEngine::PolicyKind::Custom:
1287 ret.clear();
1288 res=RCode::NoError;
1289 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1290 for (const auto& dr : spoofed) {
1291 ret.push_back(dr);
1292 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1293 }
1294 goto haveAnswer;
1295 }
1296 }
1297
1298 if (wantsRPZ) {
1299 appliedPolicy = luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies);
1300 }
1301
1302 if(t_pdl) {
1303 if(res == RCode::NoError) {
1304 auto i=ret.cbegin();
1305 for(; i!= ret.cend(); ++i)
1306 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
1307 break;
1308 if(i == ret.cend() && t_pdl->nodata(dq, res))
1309 shouldNotValidate = true;
1310
1311 }
1312 else if(res == RCode::NXDomain && t_pdl->nxdomain(dq, res))
1313 shouldNotValidate = true;
1314
1315 if(t_pdl->postresolve(dq, res))
1316 shouldNotValidate = true;
1317 }
1318
1319 if (wantsRPZ) { //XXX This block is repeated, see above
1320 g_stats.policyResults[appliedPolicy.d_kind]++;
1321 switch(appliedPolicy.d_kind) {
1322 case DNSFilterEngine::PolicyKind::NoAction:
1323 break;
1324 case DNSFilterEngine::PolicyKind::Drop:
1325 g_stats.policyDrops++;
1326 return;
1327 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1328 ret.clear();
1329 res=RCode::NXDomain;
1330 goto haveAnswer;
1331
1332 case DNSFilterEngine::PolicyKind::NODATA:
1333 ret.clear();
1334 res=RCode::NoError;
1335 goto haveAnswer;
1336
1337 case DNSFilterEngine::PolicyKind::Truncate:
1338 if(!dc->d_tcp) {
1339 ret.clear();
1340 res=RCode::NoError;
1341 pw.getHeader()->tc=1;
1342 goto haveAnswer;
1343 }
1344 break;
1345
1346 case DNSFilterEngine::PolicyKind::Custom:
1347 ret.clear();
1348 res=RCode::NoError;
1349 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1350 for (const auto& dr : spoofed) {
1351 ret.push_back(dr);
1352 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1353 }
1354 goto haveAnswer;
1355 }
1356 }
1357 }
1358 haveAnswer:;
1359 if(res == PolicyDecision::DROP) {
1360 g_stats.policyDrops++;
1361 return;
1362 }
1363 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1364 {
1365 string trace(sr.getTrace());
1366 if(!trace.empty()) {
1367 vector<string> lines;
1368 boost::split(lines, trace, boost::is_any_of("\n"));
1369 for(const string& line : lines) {
1370 if(!line.empty())
1371 g_log<<Logger::Warning<< line << endl;
1372 }
1373 }
1374 }
1375
1376 if(res == -1) {
1377 pw.getHeader()->rcode=RCode::ServFail;
1378 // no commit here, because no record
1379 g_stats.servFails++;
1380 }
1381 else {
1382 pw.getHeader()->rcode=res;
1383
1384 // Does the validation mode or query demand validation?
1385 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1386 try {
1387 if(sr.doLog()) {
1388 g_log<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<endl;
1389 }
1390
1391 auto state = sr.getValidationState();
1392
1393 if(state == Secure) {
1394 if(sr.doLog()) {
1395 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates correctly"<<endl;
1396 }
1397
1398 // Is the query source interested in the value of the ad-bit?
1399 if (dc->d_mdp.d_header.ad || DNSSECOK)
1400 pw.getHeader()->ad=1;
1401 }
1402 else if(state == Insecure) {
1403 if(sr.doLog()) {
1404 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Insecure"<<endl;
1405 }
1406
1407 pw.getHeader()->ad=0;
1408 }
1409 else if(state == Bogus) {
1410 if(t_bogusremotes)
1411 t_bogusremotes->push_back(dc->d_source);
1412 if(t_bogusqueryring)
1413 t_bogusqueryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
1414 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
1415 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Bogus"<<endl;
1416 }
1417
1418 // Does the query or validation mode sending out a SERVFAIL on validation errors?
1419 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
1420 if(sr.doLog()) {
1421 g_log<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
1422 }
1423
1424 pw.getHeader()->rcode=RCode::ServFail;
1425 goto sendit;
1426 } else {
1427 if(sr.doLog()) {
1428 g_log<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
1429 }
1430 }
1431 }
1432 }
1433 catch(ImmediateServFailException &e) {
1434 if(g_logCommonErrors)
1435 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
1436 pw.getHeader()->rcode=RCode::ServFail;
1437 goto sendit;
1438 }
1439 }
1440
1441 if(ret.size()) {
1442 orderAndShuffle(ret);
1443 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_source)) {
1444 stable_sort(ret.begin(), ret.end(), *sl);
1445 variableAnswer=true;
1446 }
1447 }
1448
1449 bool needCommit = false;
1450 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
1451 if( ! DNSSECOK &&
1452 ( i->d_type == QType::NSEC3 ||
1453 (
1454 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1455 (
1456 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1457 i->d_place != DNSResourceRecord::ANSWER
1458 )
1459 )
1460 )
1461 ) {
1462 continue;
1463 }
1464
1465 if (!addRecordToPacket(pw, *i, minTTL, dc->d_ttlCap, maxanswersize)) {
1466 needCommit = false;
1467 break;
1468 }
1469 needCommit = true;
1470
1471 #ifdef NOD_ENABLED
1472 bool udr = false;
1473 if (g_udrEnabled) {
1474 udr = udrCheckUniqueDNSRecord(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, *i);
1475 if (!hasUDR && udr)
1476 hasUDR = true;
1477 }
1478 #endif /* NOD ENABLED */
1479
1480 #ifdef HAVE_PROTOBUF
1481 if (t_protobufServers) {
1482 #ifdef NOD_ENABLED
1483 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes, udr);
1484 #else
1485 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes);
1486 #endif /* NOD_ENABLED */
1487 }
1488 #endif
1489 }
1490 if(needCommit)
1491 pw.commit();
1492 }
1493 sendit:;
1494
1495 if(g_useIncomingECS && dc->d_ecsFound && !sr.wasVariable() && !variableAnswer) {
1496 // cerr<<"Stuffing in a 0 scope because answer is static"<<endl;
1497 EDNSSubnetOpts eo;
1498 eo.source = dc->d_ednssubnet.source;
1499 ComboAddress sa;
1500 sa.reset();
1501 sa.sin4.sin_family = eo.source.getNetwork().sin4.sin_family;
1502 eo.scope = Netmask(sa, 0);
1503
1504 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::ECS, makeEDNSSubnetOptsString(eo)));
1505 }
1506
1507 if (haveEDNS) {
1508 /* we try to add the EDNS OPT RR even for truncated answers,
1509 as rfc6891 states:
1510 "The minimal response MUST be the DNS header, question section, and an
1511 OPT record. This MUST also occur when a truncated response (using
1512 the DNS header's TC bit) is returned."
1513 */
1514 pw.addOpt(512, ednsExtRCode, DNSSECOK ? EDNSOpts::DNSSECOK : 0, returnedEdnsOptions);
1515 pw.commit();
1516 }
1517
1518 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
1519 updateResponseStats(res, dc->d_source, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1520 #ifdef NOD_ENABLED
1521 bool nod = false;
1522 if (g_nodEnabled) {
1523 if (nodCheckNewDomain(dc->d_mdp.d_qname))
1524 nod = true;
1525 }
1526 #endif /* NOD_ENABLED */
1527 #ifdef HAVE_PROTOBUF
1528 if (t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && (!appliedPolicy.d_name || appliedPolicy.d_name->empty()) && dc->d_policyTags.empty())) {
1529 pbMessage->setBytes(packet.size());
1530 pbMessage->setResponseCode(pw.getHeader()->rcode);
1531 if (appliedPolicy.d_name) {
1532 pbMessage->setAppliedPolicy(*appliedPolicy.d_name);
1533 pbMessage->setAppliedPolicyType(appliedPolicy.d_type);
1534 }
1535 pbMessage->setPolicyTags(dc->d_policyTags);
1536 pbMessage->setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1537 pbMessage->setRequestorId(dq.requestorId);
1538 pbMessage->setDeviceId(dq.deviceId);
1539 #ifdef NOD_ENABLED
1540 if (g_nodEnabled) {
1541 if (nod) {
1542 pbMessage->setNOD(true);
1543 pbMessage->addPolicyTag(g_nod_pbtag);
1544 }
1545 if (hasUDR) {
1546 pbMessage->addPolicyTag(g_udr_pbtag);
1547 }
1548 }
1549 #endif /* NOD_ENABLED */
1550 protobufLogResponse(*pbMessage);
1551 #ifdef NOD_ENABLED
1552 if (g_nodEnabled) {
1553 pbMessage->setNOD(false);
1554 pbMessage->clearUDR();
1555 if (nod)
1556 pbMessage->removePolicyTag(g_nod_pbtag);
1557 if (hasUDR)
1558 pbMessage->removePolicyTag(g_udr_pbtag);
1559 }
1560 #endif /* NOD_ENABLED */
1561 }
1562 #endif
1563 if(!dc->d_tcp) {
1564 struct msghdr msgh;
1565 struct iovec iov;
1566 char cbuf[256];
1567 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
1568 msgh.msg_control=NULL;
1569
1570 if(g_fromtosockets.count(dc->d_socket)) {
1571 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
1572 }
1573 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
1574 g_log<<Logger::Warning<<"Sending UDP reply to client "<<dc->getRemote()<<" failed with: "<<strerror(errno)<<endl;
1575
1576 if(variableAnswer || sr.wasVariable()) {
1577 g_stats.variableResponses++;
1578 }
1579 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
1580 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, std::move(dc->d_query), dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
1581 string((const char*)&*packet.begin(), packet.size()),
1582 g_now.tv_sec,
1583 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
1584 min(minTTL,SyncRes::s_packetcachettl),
1585 dq.validationState,
1586 dc->d_ecsBegin,
1587 dc->d_ecsEnd,
1588 std::move(pbMessage));
1589 }
1590 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
1591 }
1592 else {
1593 char buf[2];
1594 buf[0]=packet.size()/256;
1595 buf[1]=packet.size()%256;
1596
1597 Utility::iovec iov[2];
1598
1599 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1600 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
1601
1602 int wret=Utility::writev(dc->d_socket, iov, 2);
1603 bool hadError=true;
1604
1605 if(wret == 0)
1606 g_log<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
1607 else if(wret < 0 )
1608 g_log<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
1609 else if((unsigned int)wret != 2 + packet.size())
1610 g_log<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
1611 else
1612 hadError=false;
1613
1614 // update tcp connection status, either by closing or moving to 'BYTE0'
1615
1616 if(hadError) {
1617 // no need to remove us from FDM, we weren't there
1618 dc->d_socket = -1;
1619 }
1620 else {
1621 dc->d_tcpConnection->queriesCount++;
1622 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1623 dc->d_socket = -1;
1624 }
1625 else {
1626 dc->d_tcpConnection->state=TCPConnection::BYTE0;
1627 Utility::gettimeofday(&g_now, 0); // needs to be updated
1628 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection);
1629 t_fdm->setReadTTD(dc->d_socket, g_now, g_tcpTimeout);
1630 }
1631 }
1632 }
1633 float spent=makeFloat(sr.getNow()-dc->d_now);
1634 if(!g_quiet) {
1635 g_log<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1636 g_log<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
1637 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
1638 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
1639
1640 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1641 g_log<< ", dnssec="<<vStates[sr.getValidationState()];
1642 }
1643
1644 g_log<<endl;
1645
1646 }
1647
1648 if (sr.d_outqueries || sr.d_authzonequeries) {
1649 t_RC->cacheMisses++;
1650 }
1651 else {
1652 t_RC->cacheHits++;
1653 }
1654
1655 if(spent < 0.001)
1656 g_stats.answers0_1++;
1657 else if(spent < 0.010)
1658 g_stats.answers1_10++;
1659 else if(spent < 0.1)
1660 g_stats.answers10_100++;
1661 else if(spent < 1.0)
1662 g_stats.answers100_1000++;
1663 else
1664 g_stats.answersSlow++;
1665
1666 uint64_t newLat=(uint64_t)(spent*1000000);
1667 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
1668 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
1669 // no worries, we do this for packet cache hits elsewhere
1670
1671 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
1672 if(ourtime < 1)
1673 g_stats.ourtime0_1++;
1674 else if(ourtime < 2)
1675 g_stats.ourtime1_2++;
1676 else if(ourtime < 4)
1677 g_stats.ourtime2_4++;
1678 else if(ourtime < 8)
1679 g_stats.ourtime4_8++;
1680 else if(ourtime < 16)
1681 g_stats.ourtime8_16++;
1682 else if(ourtime < 32)
1683 g_stats.ourtime16_32++;
1684 else {
1685 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1686 g_stats.ourtimeSlow++;
1687 }
1688 if(ourtime >= 0.0) {
1689 newLat=ourtime*1000; // usec
1690 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
1691 }
1692 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
1693 }
1694 catch(PDNSException &ae) {
1695 g_log<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
1696 }
1697 catch(const MOADNSException &mde) {
1698 g_log<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<mde.what()<<endl;
1699 }
1700 catch(std::exception& e) {
1701 g_log<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
1702
1703 // Luawrapper nests the exception from Lua, so we unnest it here
1704 try {
1705 std::rethrow_if_nested(e);
1706 } catch(const std::exception& ne) {
1707 g_log<<". Extra info: "<<ne.what();
1708 } catch(...) {}
1709
1710 g_log<<endl;
1711 }
1712 catch(...) {
1713 g_log<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
1714 }
1715
1716 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
1717 }
1718
1719 static void makeControlChannelSocket(int processNum=-1)
1720 {
1721 string sockname=::arg()["socket-dir"]+"/"+s_programname;
1722 if(processNum >= 0)
1723 sockname += "."+std::to_string(processNum);
1724 sockname+=".controlsocket";
1725 s_rcc.listen(sockname);
1726
1727 int sockowner = -1;
1728 int sockgroup = -1;
1729
1730 if (!::arg().isEmpty("socket-group"))
1731 sockgroup=::arg().asGid("socket-group");
1732 if (!::arg().isEmpty("socket-owner"))
1733 sockowner=::arg().asUid("socket-owner");
1734
1735 if (sockgroup > -1 || sockowner > -1) {
1736 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1737 unixDie("Failed to chown control socket");
1738 }
1739 }
1740
1741 // do mode change if socket-mode is given
1742 if(!::arg().isEmpty("socket-mode")) {
1743 mode_t sockmode=::arg().asMode("socket-mode");
1744 if(chmod(sockname.c_str(), sockmode) < 0) {
1745 unixDie("Failed to chmod control socket");
1746 }
1747 }
1748 }
1749
1750 static void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass,
1751 bool& foundECS, EDNSSubnetOpts* ednssubnet, EDNSOptionViewMap* options,
1752 bool& foundXPF, ComboAddress* xpfSource, ComboAddress* xpfDest)
1753 {
1754 const bool lookForXPF = xpfSource != nullptr && g_xpfRRCode != 0;
1755 const bool lookForECS = ednssubnet != nullptr;
1756 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(question.c_str());
1757 size_t questionLen = question.length();
1758 unsigned int consumed=0;
1759 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1760
1761 size_t pos= sizeof(dnsheader)+consumed+4;
1762 const size_t headerSize = /* root */ 1 + sizeof(dnsrecordheader);
1763 const uint16_t arcount = ntohs(dh->arcount);
1764
1765 for (uint16_t arpos = 0; arpos < arcount && questionLen > (pos + headerSize) && ((lookForECS && !foundECS) || (lookForXPF && !foundXPF)); arpos++) {
1766 if (question.at(pos) != 0) {
1767 /* not an OPT or a XPF, bye. */
1768 return;
1769 }
1770
1771 pos += 1;
1772 const dnsrecordheader* drh = reinterpret_cast<const dnsrecordheader*>(&question.at(pos));
1773 pos += sizeof(dnsrecordheader);
1774
1775 if (pos >= questionLen) {
1776 return;
1777 }
1778
1779 /* OPT root label (1) followed by type (2) */
1780 if(lookForECS && ntohs(drh->d_type) == QType::OPT) {
1781 if (!options) {
1782 char* ecsStart = nullptr;
1783 size_t ecsLen = 0;
1784 /* we need to pass the record len */
1785 int res = getEDNSOption(const_cast<char*>(reinterpret_cast<const char*>(&question.at(pos - sizeof(drh->d_clen)))), questionLen - pos + sizeof(drh->d_clen), EDNSOptionCode::ECS, &ecsStart, &ecsLen);
1786 if (res == 0 && ecsLen > 4) {
1787 EDNSSubnetOpts eso;
1788 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1789 *ednssubnet=eso;
1790 foundECS = true;
1791 }
1792 }
1793 }
1794 else {
1795 /* we need to pass the record len */
1796 int res = getEDNSOptions(reinterpret_cast<const char*>(&question.at(pos -sizeof(drh->d_clen))), questionLen - pos + (sizeof(drh->d_clen)), *options);
1797 if (res == 0) {
1798 const auto& it = options->find(EDNSOptionCode::ECS);
1799 if (it != options->end() && !it->second.values.empty() && it->second.values.at(0).content != nullptr && it->second.values.at(0).size > 0) {
1800 EDNSSubnetOpts eso;
1801 if(getEDNSSubnetOptsFromString(it->second.values.at(0).content, it->second.values.at(0).size, &eso)) {
1802 *ednssubnet=eso;
1803 foundECS = true;
1804 }
1805 }
1806 }
1807 }
1808 }
1809 else if (lookForXPF && ntohs(drh->d_type) == g_xpfRRCode && ntohs(drh->d_class) == QClass::IN && drh->d_ttl == 0) {
1810 if ((questionLen - pos) < ntohs(drh->d_clen)) {
1811 return;
1812 }
1813
1814 foundXPF = parseXPFPayload(reinterpret_cast<const char*>(&question.at(pos)), ntohs(drh->d_clen), *xpfSource, xpfDest);
1815 }
1816
1817 pos += ntohs(drh->d_clen);
1818 }
1819 }
1820
1821 static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
1822 {
1823 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
1824
1825 if(conn->state==TCPConnection::BYTE0) {
1826 ssize_t bytes=recv(conn->getFD(), &conn->data[0], 2, 0);
1827 if(bytes==1)
1828 conn->state=TCPConnection::BYTE1;
1829 if(bytes==2) {
1830 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
1831 conn->data.resize(conn->qlen);
1832 conn->bytesread=0;
1833 conn->state=TCPConnection::GETQUESTION;
1834 }
1835 if(!bytes || bytes < 0) {
1836 t_fdm->removeReadFD(fd);
1837 return;
1838 }
1839 }
1840 else if(conn->state==TCPConnection::BYTE1) {
1841 ssize_t bytes=recv(conn->getFD(), &conn->data[1], 1, 0);
1842 if(bytes==1) {
1843 conn->state=TCPConnection::GETQUESTION;
1844 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
1845 conn->data.resize(conn->qlen);
1846 conn->bytesread=0;
1847 }
1848 if(!bytes || bytes < 0) {
1849 if(g_logCommonErrors)
1850 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected after first byte"<<endl;
1851 t_fdm->removeReadFD(fd);
1852 return;
1853 }
1854 }
1855 else if(conn->state==TCPConnection::GETQUESTION) {
1856 ssize_t bytes=recv(conn->getFD(), &conn->data[conn->bytesread], conn->qlen - conn->bytesread, 0);
1857 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
1858 if(g_logCommonErrors) {
1859 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected while reading question body"<<endl;
1860 }
1861 t_fdm->removeReadFD(fd);
1862 return;
1863 }
1864 conn->bytesread+=(uint16_t)bytes;
1865 if(conn->bytesread==conn->qlen) {
1866 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
1867
1868 std::unique_ptr<DNSComboWriter> dc;
1869 try {
1870 dc=std::unique_ptr<DNSComboWriter>(new DNSComboWriter(conn->data, g_now));
1871 }
1872 catch(const MOADNSException &mde) {
1873 g_stats.clientParseError++;
1874 if(g_logCommonErrors)
1875 g_log<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
1876 return;
1877 }
1878 dc->d_tcpConnection = conn; // carry the torch
1879 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
1880 dc->d_tcp=true;
1881 dc->setRemote(conn->d_remote);
1882 dc->setSource(conn->d_remote);
1883 ComboAddress dest;
1884 dest.reset();
1885 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1886 socklen_t len = dest.getSocklen();
1887 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1888 dc->setLocal(dest);
1889 dc->setDestination(dest);
1890 DNSName qname;
1891 uint16_t qtype=0;
1892 uint16_t qclass=0;
1893 bool needECS = false;
1894 bool needXPF = g_XPFAcl.match(conn->d_remote);
1895 string requestorId;
1896 string deviceId;
1897 bool logQuery = false;
1898 #ifdef HAVE_PROTOBUF
1899 auto luaconfsLocal = g_luaconfs.getLocal();
1900 if (checkProtobufExport(luaconfsLocal)) {
1901 needECS = true;
1902 }
1903 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
1904 #endif
1905
1906 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag_ffi || t_pdl->d_gettag))) {
1907
1908 try {
1909 EDNSOptionViewMap ednsOptions;
1910 bool xpfFound = false;
1911 dc->d_ecsParsed = true;
1912 dc->d_ecsFound = false;
1913 getQNameAndSubnet(conn->data, &qname, &qtype, &qclass,
1914 dc->d_ecsFound, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
1915 xpfFound, needXPF ? &dc->d_source : nullptr, needXPF ? &dc->d_destination : nullptr);
1916
1917 if(t_pdl) {
1918 try {
1919 if (t_pdl->d_gettag_ffi) {
1920 dc->d_tag = t_pdl->gettag_ffi(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, dc->d_ttlCap, dc->d_variable, logQuery);
1921 }
1922 else if (t_pdl->d_gettag) {
1923 dc->d_tag = t_pdl->gettag(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId);
1924 }
1925 }
1926 catch(const std::exception& e) {
1927 if(g_logCommonErrors)
1928 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
1929 }
1930 }
1931 }
1932 catch(const std::exception& e)
1933 {
1934 if(g_logCommonErrors)
1935 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
1936 }
1937 }
1938
1939 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(&conn->data[0]);
1940
1941 #ifdef HAVE_PROTOBUF
1942 if(t_protobufServers || t_outgoingProtobufServers) {
1943 dc->d_requestorId = requestorId;
1944 dc->d_deviceId = deviceId;
1945 dc->d_uuid = getUniqueID();
1946 }
1947
1948 if(t_protobufServers) {
1949 try {
1950
1951 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && dc->d_policyTags.empty())) {
1952 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, dc->d_source, dc->d_destination, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId);
1953 }
1954 }
1955 catch(std::exception& e) {
1956 if(g_logCommonErrors)
1957 g_log<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
1958 }
1959 }
1960 #endif
1961 if(t_pdl) {
1962 if(t_pdl->ipfilter(dc->d_source, dc->d_destination, *dh)) {
1963 if(!g_quiet)
1964 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED TCP question from "<<dc->d_source.toStringWithPort()<<(dc->d_source != dc->d_remote ? " (via "+dc->d_remote.toStringWithPort()+")" : "")<<" based on policy"<<endl;
1965 g_stats.policyDrops++;
1966 return;
1967 }
1968 }
1969
1970 if(dc->d_mdp.d_header.qr) {
1971 g_stats.ignoredCount++;
1972 if(g_logCommonErrors) {
1973 g_log<<Logger::Error<<"Ignoring answer from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
1974 }
1975 return;
1976 }
1977 if(dc->d_mdp.d_header.opcode) {
1978 g_stats.ignoredCount++;
1979 if(g_logCommonErrors) {
1980 g_log<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
1981 }
1982 return;
1983 }
1984 else if (dh->qdcount == 0) {
1985 g_stats.emptyQueriesCount++;
1986 if(g_logCommonErrors) {
1987 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<< dc->getRemote() <<" on server socket!"<<endl;
1988 }
1989 return;
1990 }
1991 else {
1992 ++g_stats.qcounter;
1993 ++g_stats.tcpqcounter;
1994 MT->makeThread(startDoResolve, dc.release()); // deletes dc, will set state to BYTE0 again
1995 return;
1996 }
1997 }
1998 }
1999 }
2000
2001 //! Handle new incoming TCP connection
2002 static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
2003 {
2004 ComboAddress addr;
2005 socklen_t addrlen=sizeof(addr);
2006 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
2007 if(newsock>=0) {
2008 if(MT->numProcesses() > g_maxMThreads) {
2009 g_stats.overCapacityDrops++;
2010 try {
2011 closesocket(newsock);
2012 }
2013 catch(const PDNSException& e) {
2014 g_log<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
2015 }
2016 return;
2017 }
2018
2019 if(t_remotes)
2020 t_remotes->push_back(addr);
2021 if(t_allowFrom && !t_allowFrom->match(&addr)) {
2022 if(!g_quiet)
2023 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2024
2025 g_stats.unauthorizedTCP++;
2026 try {
2027 closesocket(newsock);
2028 }
2029 catch(const PDNSException& e) {
2030 g_log<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
2031 }
2032 return;
2033 }
2034 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
2035 g_stats.tcpClientOverflow++;
2036 try {
2037 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
2038 }
2039 catch(const PDNSException& e) {
2040 g_log<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
2041 }
2042 return;
2043 }
2044
2045 setNonBlocking(newsock);
2046 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
2047 tc->state=TCPConnection::BYTE0;
2048
2049 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc);
2050
2051 struct timeval now;
2052 Utility::gettimeofday(&now, 0);
2053 t_fdm->setReadTTD(tc->getFD(), now, g_tcpTimeout);
2054 }
2055 }
2056
2057 static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
2058 {
2059 gettimeofday(&g_now, 0);
2060 struct timeval diff = g_now - tv;
2061 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
2062
2063 if(tv.tv_sec && delta > 1000.0) {
2064 g_stats.tooOldDrops++;
2065 return 0;
2066 }
2067
2068 ++g_stats.qcounter;
2069 if(fromaddr.sin4.sin_family==AF_INET6)
2070 g_stats.ipv6qcounter++;
2071
2072 string response;
2073 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
2074 unsigned int ctag=0;
2075 uint32_t qhash = 0;
2076 bool needECS = false;
2077 bool needXPF = g_XPFAcl.match(fromaddr);
2078 std::vector<std::string> policyTags;
2079 LuaContext::LuaObject data;
2080 ComboAddress source = fromaddr;
2081 ComboAddress destination = destaddr;
2082 string requestorId;
2083 string deviceId;
2084 bool logQuery = false;
2085 #ifdef HAVE_PROTOBUF
2086 boost::uuids::uuid uniqueId;
2087 auto luaconfsLocal = g_luaconfs.getLocal();
2088 if (checkProtobufExport(luaconfsLocal)) {
2089 uniqueId = getUniqueID();
2090 needECS = true;
2091 } else if (checkOutgoingProtobufExport(luaconfsLocal)) {
2092 uniqueId = getUniqueID();
2093 }
2094 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
2095 bool logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
2096 #endif
2097 EDNSSubnetOpts ednssubnet;
2098 bool ecsFound = false;
2099 bool ecsParsed = false;
2100 uint16_t ecsBegin = 0;
2101 uint16_t ecsEnd = 0;
2102 uint32_t ttlCap = std::numeric_limits<uint32_t>::max();
2103 bool variable = false;
2104 try {
2105 DNSName qname;
2106 uint16_t qtype=0;
2107 uint16_t qclass=0;
2108 uint32_t age;
2109 bool qnameParsed=false;
2110 #ifdef MALLOC_TRACE
2111 /*
2112 static uint64_t last=0;
2113 if(!last)
2114 g_mtracer->clearAllocators();
2115 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
2116 last=g_mtracer->getAllocs();
2117 cout<<g_mtracer->topAllocatorsString()<<endl;
2118 g_mtracer->clearAllocators();
2119 */
2120 #endif
2121
2122 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag || t_pdl->d_gettag_ffi))) {
2123 try {
2124 EDNSOptionViewMap ednsOptions;
2125 bool xpfFound = false;
2126
2127 ecsFound = false;
2128
2129 getQNameAndSubnet(question, &qname, &qtype, &qclass,
2130 ecsFound, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2131 xpfFound, needXPF ? &source : nullptr, needXPF ? &destination : nullptr);
2132
2133 qnameParsed = true;
2134 ecsParsed = true;
2135
2136 if(t_pdl) {
2137 try {
2138 if (t_pdl->d_gettag_ffi) {
2139 ctag = t_pdl->gettag_ffi(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, ttlCap, variable, logQuery);
2140 }
2141 else if (t_pdl->d_gettag) {
2142 ctag = t_pdl->gettag(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId);
2143 }
2144 }
2145 catch(const std::exception& e) {
2146 if(g_logCommonErrors)
2147 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
2148 }
2149 }
2150 }
2151 catch(const std::exception& e)
2152 {
2153 if(g_logCommonErrors)
2154 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
2155 }
2156 }
2157
2158 bool cacheHit = false;
2159 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
2160 #ifdef HAVE_PROTOBUF
2161 if (t_protobufServers) {
2162 pbMessage = RecProtoBufMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
2163 pbMessage->setServerIdentity(SyncRes::s_serverID);
2164 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && policyTags.empty())) {
2165 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, source, destination, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId);
2166 }
2167 }
2168 #endif /* HAVE_PROTOBUF */
2169
2170 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
2171 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
2172 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
2173 vState valState;
2174 if (qnameParsed) {
2175 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
2176 }
2177 else {
2178 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, &qtype, &qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
2179 }
2180
2181 if (cacheHit) {
2182 if(valState == Bogus) {
2183 if(t_bogusremotes)
2184 t_bogusremotes->push_back(source);
2185 if(t_bogusqueryring)
2186 t_bogusqueryring->push_back(make_pair(qname, qtype));
2187 }
2188
2189 #ifdef HAVE_PROTOBUF
2190 if(t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && pbMessage->getAppliedPolicy().empty() && pbMessage->getPolicyTags().empty())) {
2191 Netmask requestorNM(source, source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
2192 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
2193 pbMessage->update(uniqueId, &requestor, &destination, false, dh->id);
2194 pbMessage->setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
2195 pbMessage->setQueryTime(g_now.tv_sec, g_now.tv_usec);
2196 pbMessage->setRequestorId(requestorId);
2197 pbMessage->setDeviceId(deviceId);
2198 protobufLogResponse(*pbMessage);
2199 }
2200 #endif /* HAVE_PROTOBUF */
2201 if(!g_quiet)
2202 g_log<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<endl;
2203
2204 g_stats.packetCacheHits++;
2205 SyncRes::s_queries++;
2206 ageDNSPacket(response, age);
2207 struct msghdr msgh;
2208 struct iovec iov;
2209 char cbuf[256];
2210 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
2211 msgh.msg_control=NULL;
2212
2213 if(g_fromtosockets.count(fd)) {
2214 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
2215 }
2216 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
2217 g_log<<Logger::Warning<<"Sending UDP reply to client "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" failed with: "<<strerror(errno)<<endl;
2218
2219 if(response.length() >= sizeof(struct dnsheader)) {
2220 struct dnsheader tmpdh;
2221 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
2222 updateResponseStats(tmpdh.rcode, source, response.length(), 0, 0);
2223 }
2224 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
2225 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
2226 return 0;
2227 }
2228 }
2229 catch(std::exception& e) {
2230 g_log<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
2231 return 0;
2232 }
2233
2234 if(t_pdl) {
2235 if(t_pdl->ipfilter(source, destination, *dh)) {
2236 if(!g_quiet)
2237 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" based on policy"<<endl;
2238 g_stats.policyDrops++;
2239 return 0;
2240 }
2241 }
2242
2243 if(MT->numProcesses() > g_maxMThreads) {
2244 if(!g_quiet)
2245 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<", over capacity"<<endl;
2246
2247 g_stats.overCapacityDrops++;
2248 return 0;
2249 }
2250
2251 auto dc = std::unique_ptr<DNSComboWriter>(new DNSComboWriter(question, g_now, std::move(policyTags), std::move(data)));
2252 dc->setSocket(fd);
2253 dc->d_tag=ctag;
2254 dc->d_qhash=qhash;
2255 dc->setRemote(fromaddr);
2256 dc->setSource(source);
2257 dc->setLocal(destaddr);
2258 dc->setDestination(destination);
2259 dc->d_tcp=false;
2260 dc->d_ecsFound = ecsFound;
2261 dc->d_ecsParsed = ecsParsed;
2262 dc->d_ecsBegin = ecsBegin;
2263 dc->d_ecsEnd = ecsEnd;
2264 dc->d_ednssubnet = ednssubnet;
2265 dc->d_ttlCap = ttlCap;
2266 dc->d_variable = variable;
2267 #ifdef HAVE_PROTOBUF
2268 if (t_protobufServers || t_outgoingProtobufServers) {
2269 dc->d_uuid = std::move(uniqueId);
2270 }
2271 dc->d_requestorId = requestorId;
2272 dc->d_deviceId = deviceId;
2273 #endif
2274
2275 MT->makeThread(startDoResolve, (void*) dc.release()); // deletes dc
2276 return 0;
2277 }
2278
2279
2280 static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
2281 {
2282 ssize_t len;
2283 static const size_t maxIncomingQuerySize = 512;
2284 static thread_local std::string data;
2285 ComboAddress fromaddr;
2286 struct msghdr msgh;
2287 struct iovec iov;
2288 char cbuf[256];
2289 bool firstQuery = true;
2290
2291 for(size_t queriesCounter = 0; queriesCounter < s_maxUDPQueriesPerRound; queriesCounter++) {
2292 data.resize(maxIncomingQuerySize);
2293 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
2294 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), &data[0], data.size(), &fromaddr);
2295
2296 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
2297
2298 firstQuery = false;
2299
2300 if (static_cast<size_t>(len) < sizeof(dnsheader)) {
2301 g_stats.ignoredCount++;
2302 if (!g_quiet) {
2303 g_log<<Logger::Error<<"Ignoring too-short ("<<std::to_string(len)<<") query from "<<fromaddr.toString()<<endl;
2304 }
2305 return;
2306 }
2307
2308 if (msgh.msg_flags & MSG_TRUNC) {
2309 g_stats.truncatedDrops++;
2310 if (!g_quiet) {
2311 g_log<<Logger::Error<<"Ignoring truncated query from "<<fromaddr.toString()<<endl;
2312 }
2313 return;
2314 }
2315
2316 if(t_remotes) {
2317 t_remotes->push_back(fromaddr);
2318 }
2319
2320 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
2321 if(!g_quiet) {
2322 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2323 }
2324
2325 g_stats.unauthorizedUDP++;
2326 return;
2327 }
2328 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
2329 if(!fromaddr.sin4.sin_port) { // also works for IPv6
2330 if(!g_quiet) {
2331 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
2332 }
2333
2334 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
2335 return;
2336 }
2337
2338 try {
2339 data.resize(static_cast<size_t>(len));
2340 dnsheader* dh=(dnsheader*)&data[0];
2341
2342 if(dh->qr) {
2343 g_stats.ignoredCount++;
2344 if(g_logCommonErrors) {
2345 g_log<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
2346 }
2347 }
2348 else if(dh->opcode) {
2349 g_stats.ignoredCount++;
2350 if(g_logCommonErrors) {
2351 g_log<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
2352 }
2353 }
2354 else if (dh->qdcount == 0) {
2355 g_stats.emptyQueriesCount++;
2356 if(g_logCommonErrors) {
2357 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<<fromaddr.toString()<<" on server socket!"<<endl;
2358 }
2359 }
2360 else {
2361 struct timeval tv={0,0};
2362 HarvestTimestamp(&msgh, &tv);
2363 ComboAddress dest;
2364 dest.reset(); // this makes sure we ignore this address if not returned by recvmsg above
2365 auto loc = rplookup(g_listenSocketsAddresses, fd);
2366 if(HarvestDestinationAddress(&msgh, &dest)) {
2367 // but.. need to get port too
2368 if(loc) {
2369 dest.sin4.sin_port = loc->sin4.sin_port;
2370 }
2371 }
2372 else {
2373 if(loc) {
2374 dest = *loc;
2375 }
2376 else {
2377 dest.sin4.sin_family = fromaddr.sin4.sin_family;
2378 socklen_t slen = dest.getSocklen();
2379 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
2380 }
2381 }
2382
2383 if(g_weDistributeQueries) {
2384 distributeAsyncFunction(data, boost::bind(doProcessUDPQuestion, data, fromaddr, dest, tv, fd));
2385 }
2386 else {
2387 doProcessUDPQuestion(data, fromaddr, dest, tv, fd);
2388 }
2389 }
2390 }
2391 catch(const MOADNSException &mde) {
2392 g_stats.clientParseError++;
2393 if(g_logCommonErrors) {
2394 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
2395 }
2396 }
2397 catch(const std::runtime_error& e) {
2398 g_stats.clientParseError++;
2399 if(g_logCommonErrors) {
2400 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
2401 }
2402 }
2403 }
2404 else {
2405 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
2406 if(firstQuery && errno == EAGAIN) {
2407 g_stats.noPacketError++;
2408 }
2409
2410 break;
2411 }
2412 }
2413 }
2414
2415 static void makeTCPServerSockets(deferredAdd_t& deferredAdds, std::set<int>& tcpSockets)
2416 {
2417 int fd;
2418 vector<string>locals;
2419 stringtok(locals,::arg()["local-address"]," ,");
2420
2421 if(locals.empty())
2422 throw PDNSException("No local address specified");
2423
2424 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
2425 ServiceTuple st;
2426 st.port=::arg().asNum("local-port");
2427 parseService(*i, st);
2428
2429 ComboAddress sin;
2430
2431 sin.reset();
2432 sin.sin4.sin_family = AF_INET;
2433 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
2434 sin.sin6.sin6_family = AF_INET6;
2435 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
2436 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
2437 }
2438
2439 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
2440 if(fd<0)
2441 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
2442
2443 setCloseOnExec(fd);
2444
2445 int tmp=1;
2446 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
2447 g_log<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
2448 exit(1);
2449 }
2450 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
2451 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
2452 }
2453
2454 #ifdef TCP_DEFER_ACCEPT
2455 if(setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
2456 if(i==locals.begin())
2457 g_log<<Logger::Info<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
2458 }
2459 #endif
2460
2461 if( ::arg().mustDo("non-local-bind") )
2462 Utility::setBindAny(AF_INET, fd);
2463
2464 #ifdef SO_REUSEPORT
2465 if(g_reusePort) {
2466 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
2467 throw PDNSException("SO_REUSEPORT: "+stringerror());
2468 }
2469 #endif
2470
2471 if (::arg().asNum("tcp-fast-open") > 0) {
2472 #ifdef TCP_FASTOPEN
2473 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
2474 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
2475 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno)<<endl;
2476 }
2477 #else
2478 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
2479 #endif
2480 }
2481
2482 sin.sin4.sin_port = htons(st.port);
2483 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
2484 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
2485 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
2486
2487 setNonBlocking(fd);
2488 setSocketSendBuffer(fd, 65000);
2489 listen(fd, 128);
2490 deferredAdds.push_back(make_pair(fd, handleNewTCPQuestion));
2491 tcpSockets.insert(fd);
2492
2493 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
2494 // - fd is not that which we know here, but returned from accept()
2495 if(sin.sin4.sin_family == AF_INET)
2496 g_log<<Logger::Info<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
2497 else
2498 g_log<<Logger::Info<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
2499 }
2500 }
2501
2502 static void makeUDPServerSockets(deferredAdd_t& deferredAdds)
2503 {
2504 int one=1;
2505 vector<string>locals;
2506 stringtok(locals,::arg()["local-address"]," ,");
2507
2508 if(locals.empty())
2509 throw PDNSException("No local address specified");
2510
2511 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
2512 ServiceTuple st;
2513 st.port=::arg().asNum("local-port");
2514 parseService(*i, st);
2515
2516 ComboAddress sin;
2517
2518 sin.reset();
2519 sin.sin4.sin_family = AF_INET;
2520 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
2521 sin.sin6.sin6_family = AF_INET6;
2522 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
2523 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
2524 }
2525
2526 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
2527 if(fd < 0) {
2528 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
2529 }
2530 if (!setSocketTimestamps(fd))
2531 g_log<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
2532
2533 if(IsAnyAddress(sin)) {
2534 if(sin.sin4.sin_family == AF_INET)
2535 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
2536 g_fromtosockets.insert(fd);
2537 #ifdef IPV6_RECVPKTINFO
2538 if(sin.sin4.sin_family == AF_INET6)
2539 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
2540 g_fromtosockets.insert(fd);
2541 #endif
2542 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
2543 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
2544 }
2545 }
2546 if( ::arg().mustDo("non-local-bind") )
2547 Utility::setBindAny(AF_INET6, fd);
2548
2549 setCloseOnExec(fd);
2550
2551 setSocketReceiveBuffer(fd, 250000);
2552 sin.sin4.sin_port = htons(st.port);
2553
2554
2555 #ifdef SO_REUSEPORT
2556 if(g_reusePort) {
2557 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
2558 throw PDNSException("SO_REUSEPORT: "+stringerror());
2559 }
2560 #endif
2561 socklen_t socklen=sin.getSocklen();
2562 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
2563 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
2564
2565 setNonBlocking(fd);
2566
2567 deferredAdds.push_back(make_pair(fd, handleNewUDPQuestion));
2568 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
2569 if(sin.sin4.sin_family == AF_INET)
2570 g_log<<Logger::Info<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
2571 else
2572 g_log<<Logger::Info<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
2573 }
2574 }
2575
2576 static void daemonize(void)
2577 {
2578 if(fork())
2579 exit(0); // bye bye
2580
2581 setsid();
2582
2583 int i=open("/dev/null",O_RDWR); /* open stdin */
2584 if(i < 0)
2585 g_log<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
2586 else {
2587 dup2(i,0); /* stdin */
2588 dup2(i,1); /* stderr */
2589 dup2(i,2); /* stderr */
2590 close(i);
2591 }
2592 }
2593
2594 static void usr1Handler(int)
2595 {
2596 statsWanted=true;
2597 }
2598
2599 static void usr2Handler(int)
2600 {
2601 g_quiet= !g_quiet;
2602 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
2603 ::arg().set("quiet")=g_quiet ? "" : "no";
2604 }
2605
2606 static void doStats(void)
2607 {
2608 static time_t lastOutputTime;
2609 static uint64_t lastQueryCount;
2610
2611 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
2612 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
2613
2614 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
2615 g_log<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
2616 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
2617 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
2618 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
2619
2620 g_log<<Logger::Notice<<"stats: throttle map: "
2621 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
2622 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
2623 g_log<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2624 g_log<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
2625 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
2626 g_log<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
2627 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
2628
2629 //g_log<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
2630 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
2631
2632 g_log<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
2633 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
2634
2635 time_t now = time(0);
2636 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
2637 g_log<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
2638 }
2639 lastOutputTime = now;
2640 lastQueryCount = SyncRes::s_queries;
2641 }
2642 else if(statsWanted)
2643 g_log<<Logger::Notice<<"stats: no stats yet!"<<endl;
2644
2645 statsWanted=false;
2646 }
2647
2648 static void houseKeeping(void *)
2649 {
2650 static thread_local time_t last_rootupdate, last_prune, last_secpoll, last_trustAnchorUpdate{0};
2651 static thread_local int cleanCounter=0;
2652 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
2653 auto luaconfsLocal = g_luaconfs.getLocal();
2654
2655 if (last_trustAnchorUpdate == 0 && !luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0) {
2656 // Loading the Lua config file already "refreshed" the TAs
2657 last_trustAnchorUpdate = g_now.tv_sec + luaconfsLocal->trustAnchorFileInfo.interval * 3600;
2658 }
2659
2660 try {
2661 if(s_running)
2662 return;
2663 s_running=true;
2664
2665 struct timeval now;
2666 Utility::gettimeofday(&now, 0);
2667
2668 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
2669 DTime dt;
2670 dt.setTimeval(now);
2671 t_RC->doPrune(g_maxCacheEntries / g_numThreads); // this function is local to a thread, so fine anyhow
2672 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
2673
2674 SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
2675
2676 if(!((cleanCounter++)%40)) { // this is a full scan!
2677 time_t limit=now.tv_sec-300;
2678 SyncRes::pruneNSSpeeds(limit);
2679 }
2680 last_prune=time(0);
2681 }
2682
2683 if(now.tv_sec - last_rootupdate > 7200) {
2684 int res = SyncRes::getRootNS(g_now, nullptr);
2685 if (!res)
2686 last_rootupdate=now.tv_sec;
2687 }
2688
2689 if(isHandlerThread()) {
2690
2691 if(now.tv_sec - last_secpoll >= 3600) {
2692 try {
2693 doSecPoll(&last_secpoll);
2694 }
2695 catch(std::exception& e)
2696 {
2697 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
2698 }
2699 catch(PDNSException& e)
2700 {
2701 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
2702 }
2703 catch(ImmediateServFailException &e)
2704 {
2705 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
2706 }
2707 catch(...)
2708 {
2709 g_log<<Logger::Error<<"Exception while performing security poll"<<endl;
2710 }
2711 }
2712
2713 if (!luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0 &&
2714 g_now.tv_sec - last_trustAnchorUpdate >= (luaconfsLocal->trustAnchorFileInfo.interval * 3600)) {
2715 g_log<<Logger::Debug<<"Refreshing Trust Anchors from file"<<endl;
2716 try {
2717 map<DNSName, dsmap_t> dsAnchors;
2718 if (updateTrustAnchorsFromFile(luaconfsLocal->trustAnchorFileInfo.fname, dsAnchors)) {
2719 g_luaconfs.modify([&dsAnchors](LuaConfigItems& lci) {
2720 lci.dsAnchors = dsAnchors;
2721 });
2722 }
2723 last_trustAnchorUpdate = now.tv_sec;
2724 } catch (const PDNSException &pe) {
2725 g_log<<Logger::Error<<"Unable to update Trust Anchors: "<<pe.reason<<endl;
2726 }
2727 }
2728 s_running=false;
2729 }
2730 }
2731 catch(PDNSException& ae)
2732 {
2733 s_running=false;
2734 g_log<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
2735 throw;
2736 }
2737 }
2738
2739 static void makeThreadPipes()
2740 {
2741 /* thread 0 is the handler / SNMP, we start at 1 */
2742 for(unsigned int n = 1; n <= (g_numWorkerThreads + g_numDistributorThreads); ++n) {
2743 auto& threadInfos = s_threadInfos.at(n);
2744
2745 int fd[2];
2746 if(pipe(fd) < 0)
2747 unixDie("Creating pipe for inter-thread communications");
2748
2749 threadInfos.pipes.readToThread = fd[0];
2750 threadInfos.pipes.writeToThread = fd[1];
2751
2752 if(pipe(fd) < 0)
2753 unixDie("Creating pipe for inter-thread communications");
2754
2755 threadInfos.pipes.readFromThread = fd[0];
2756 threadInfos.pipes.writeFromThread = fd[1];
2757
2758 if(pipe(fd) < 0)
2759 unixDie("Creating pipe for inter-thread communications");
2760
2761 threadInfos.pipes.readQueriesToThread = fd[0];
2762 threadInfos.pipes.writeQueriesToThread = fd[1];
2763
2764 if (!setNonBlocking(threadInfos.pipes.writeQueriesToThread)) {
2765 unixDie("Making pipe for inter-thread communications non-blocking");
2766 }
2767 }
2768 }
2769
2770 struct ThreadMSG
2771 {
2772 pipefunc_t func;
2773 bool wantAnswer;
2774 };
2775
2776 void broadcastFunction(const pipefunc_t& func)
2777 {
2778 /* This function might be called by the worker with t_id 0 during startup
2779 for the initialization of ACLs and domain maps. After that it should only
2780 be called by the handler. */
2781
2782 if (s_threadInfos.empty() && isHandlerThread()) {
2783 /* the handler and distributors will call themselves below, but
2784 during startup we get called while s_threadInfos has not been
2785 populated yet to update the ACL or domain maps, so we need to
2786 handle that case.
2787 */
2788 func();
2789 }
2790
2791 unsigned int n = 0;
2792 for (const auto& threadInfo : s_threadInfos) {
2793 if(n++ == t_id) {
2794 func(); // don't write to ourselves!
2795 continue;
2796 }
2797
2798 ThreadMSG* tmsg = new ThreadMSG();
2799 tmsg->func = func;
2800 tmsg->wantAnswer = true;
2801 if(write(threadInfo.pipes.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2802 delete tmsg;
2803
2804 unixDie("write to thread pipe returned wrong size or error");
2805 }
2806
2807 string* resp = nullptr;
2808 if(read(threadInfo.pipes.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2809 unixDie("read from thread pipe returned wrong size or error");
2810
2811 if(resp) {
2812 delete resp;
2813 resp = nullptr;
2814 }
2815 }
2816 }
2817
2818 static bool trySendingQueryToWorker(unsigned int target, ThreadMSG* tmsg)
2819 {
2820 const auto& targetInfo = s_threadInfos[target];
2821 if(!targetInfo.isWorker) {
2822 g_log<<Logger::Error<<"distributeAsyncFunction() tried to assign a query to a non-worker thread"<<endl;
2823 exit(1);
2824 }
2825
2826 const auto& tps = targetInfo.pipes;
2827
2828 ssize_t written = write(tps.writeQueriesToThread, &tmsg, sizeof(tmsg));
2829 if (written > 0) {
2830 if (static_cast<size_t>(written) != sizeof(tmsg)) {
2831 delete tmsg;
2832 unixDie("write to thread pipe returned wrong size or error");
2833 }
2834 }
2835 else {
2836 int error = errno;
2837 if (error == EAGAIN || error == EWOULDBLOCK) {
2838 return false;
2839 } else {
2840 delete tmsg;
2841 unixDie("write to thread pipe returned wrong size or error:" + std::to_string(error));
2842 }
2843 }
2844
2845 return true;
2846 }
2847
2848 // This function is only called by the distributor threads, when pdns-distributes-queries is set
2849 void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
2850 {
2851 if (!isDistributorThread()) {
2852 g_log<<Logger::Error<<"distributeAsyncFunction() has been called by a worker ("<<t_id<<")"<<endl;
2853 exit(1);
2854 }
2855
2856 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
2857 unsigned int target = /* skip handler */ 1 + g_numDistributorThreads + (hash % g_numWorkerThreads);
2858
2859 ThreadMSG* tmsg = new ThreadMSG();
2860 tmsg->func = func;
2861 tmsg->wantAnswer = false;
2862
2863 if (!trySendingQueryToWorker(target, tmsg)) {
2864 /* if this function failed but did not raise an exception, it means that the pipe
2865 was full, let's try another one */
2866 unsigned int newTarget = 0;
2867 do {
2868 newTarget = /* skip handler */ 1 + g_numDistributorThreads + dns_random(g_numWorkerThreads);
2869 } while (newTarget == target);
2870
2871 if (!trySendingQueryToWorker(newTarget, tmsg)) {
2872 g_stats.queryPipeFullDrops++;
2873 delete tmsg;
2874 }
2875 }
2876 }
2877
2878 static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
2879 {
2880 ThreadMSG* tmsg = nullptr;
2881
2882 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread || fd == readQueriesToThread
2883 unixDie("read from thread pipe returned wrong size or error");
2884 }
2885
2886 void *resp=0;
2887 try {
2888 resp = tmsg->func();
2889 }
2890 catch(std::exception& e) {
2891 if(g_logCommonErrors)
2892 g_log<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2893 }
2894 catch(PDNSException& e) {
2895 if(g_logCommonErrors)
2896 g_log<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2897 }
2898 if(tmsg->wantAnswer) {
2899 const auto& threadInfo = s_threadInfos.at(t_id);
2900 if(write(threadInfo.pipes.writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
2901 delete tmsg;
2902 unixDie("write to thread pipe returned wrong size or error");
2903 }
2904 }
2905
2906 delete tmsg;
2907 }
2908
2909 template<class T> void *voider(const boost::function<T*()>& func)
2910 {
2911 return func();
2912 }
2913
2914 vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
2915 {
2916 a.insert(a.end(), b.begin(), b.end());
2917 return a;
2918 }
2919
2920 vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
2921 {
2922 a.insert(a.end(), b.begin(), b.end());
2923 return a;
2924 }
2925
2926 vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
2927 {
2928 a.insert(a.end(), b.begin(), b.end());
2929 return a;
2930 }
2931
2932
2933 /*
2934 This function should only be called by the handler to gather metrics, wipe the cache,
2935 reload the Lua script (not the Lua config) or change the current trace regex,
2936 and by the SNMP thread to gather metrics. */
2937 template<class T> T broadcastAccFunction(const boost::function<T*()>& func)
2938 {
2939 if (!isHandlerThread()) {
2940 g_log<<Logger::Error<<"broadcastAccFunction has been called by a worker ("<<t_id<<")"<<endl;
2941 exit(1);
2942 }
2943
2944 unsigned int n = 0;
2945 T ret=T();
2946 for (const auto& threadInfo : s_threadInfos) {
2947 if (n++ == t_id) {
2948 continue;
2949 }
2950
2951 const auto& tps = threadInfo.pipes;
2952 ThreadMSG* tmsg = new ThreadMSG();
2953 tmsg->func = boost::bind(voider<T>, func);
2954 tmsg->wantAnswer = true;
2955
2956 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2957 delete tmsg;
2958 unixDie("write to thread pipe returned wrong size or error");
2959 }
2960
2961 T* resp = nullptr;
2962 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2963 unixDie("read from thread pipe returned wrong size or error");
2964
2965 if(resp) {
2966 ret += *resp;
2967 delete resp;
2968 resp = nullptr;
2969 }
2970 }
2971 return ret;
2972 }
2973
2974 template string broadcastAccFunction(const boost::function<string*()>& fun); // explicit instantiation
2975 template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun); // explicit instantiation
2976 template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun); // explicit instantiation
2977 template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun); // explicit instantiation
2978
2979 static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
2980 {
2981 string remote;
2982 string msg=s_rcc.recv(&remote);
2983 RecursorControlParser rcp;
2984 RecursorControlParser::func_t* command;
2985
2986 string answer=rcp.getAnswer(msg, &command);
2987
2988 // If we are inside a chroot, we need to strip
2989 if (!arg()["chroot"].empty()) {
2990 size_t len = arg()["chroot"].length();
2991 remote = remote.substr(len);
2992 }
2993
2994 try {
2995 s_rcc.send(answer, &remote);
2996 command();
2997 }
2998 catch(std::exception& e) {
2999 g_log<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
3000 }
3001 catch(PDNSException& ae) {
3002 g_log<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
3003 }
3004 }
3005
3006 static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
3007 {
3008 PacketID* pident=any_cast<PacketID>(&var);
3009 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
3010
3011 shared_array<char> buffer(new char[pident->inNeeded]);
3012
3013 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
3014 if(ret > 0) {
3015 pident->inMSG.append(&buffer[0], &buffer[ret]);
3016 pident->inNeeded-=(size_t)ret;
3017 if(!pident->inNeeded || pident->inIncompleteOkay) {
3018 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
3019 PacketID pid=*pident;
3020 string msg=pident->inMSG;
3021
3022 t_fdm->removeReadFD(fd);
3023 MT->sendEvent(pid, &msg);
3024 }
3025 else {
3026 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
3027 }
3028 }
3029 else {
3030 PacketID tmp=*pident;
3031 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
3032 string empty;
3033 MT->sendEvent(tmp, &empty); // this conveys error status
3034 }
3035 }
3036
3037 static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
3038 {
3039 PacketID* pid=any_cast<PacketID>(&var);
3040 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
3041 if(ret > 0) {
3042 pid->outPos+=(ssize_t)ret;
3043 if(pid->outPos==pid->outMSG.size()) {
3044 PacketID tmp=*pid;
3045 t_fdm->removeWriteFD(fd);
3046 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
3047 }
3048 }
3049 else { // error or EOF
3050 PacketID tmp(*pid);
3051 t_fdm->removeWriteFD(fd);
3052 string sent;
3053 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
3054 }
3055 }
3056
3057 // resend event to everybody chained onto it
3058 static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
3059 {
3060 if(iter->key.chain.empty())
3061 return;
3062 // cerr<<"doResends called!\n";
3063 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
3064 resend.fd=-1;
3065 resend.id=*i;
3066 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
3067
3068 MT->sendEvent(resend, &content);
3069 g_stats.chainResends++;
3070 }
3071 }
3072
3073 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
3074 {
3075 PacketID pid=any_cast<PacketID>(var);
3076 ssize_t len;
3077 std::string packet;
3078 packet.resize(g_outgoingEDNSBufsize);
3079 ComboAddress fromaddr;
3080 socklen_t addrlen=sizeof(fromaddr);
3081
3082 len=recvfrom(fd, &packet.at(0), packet.size(), 0, (sockaddr *)&fromaddr, &addrlen);
3083
3084 if(len < (ssize_t) sizeof(dnsheader)) {
3085 if(len < 0)
3086 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
3087 else {
3088 g_stats.serverParseError++;
3089 if(g_logCommonErrors)
3090 g_log<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
3091 ": packet smaller than DNS header"<<endl;
3092 }
3093
3094 t_udpclientsocks->returnSocket(fd);
3095 string empty;
3096
3097 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3098 if(iter != MT->d_waiters.end())
3099 doResends(iter, pid, empty);
3100
3101 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
3102 return;
3103 }
3104
3105 packet.resize(len);
3106 dnsheader dh;
3107 memcpy(&dh, &packet.at(0), sizeof(dh));
3108
3109 PacketID pident;
3110 pident.remote=fromaddr;
3111 pident.id=dh.id;
3112 pident.fd=fd;
3113
3114 if(!dh.qr && g_logCommonErrors) {
3115 g_log<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
3116 }
3117
3118 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
3119 !dh.qr) { // one weird server
3120 pident.domain.clear();
3121 pident.type = 0;
3122 }
3123 else {
3124 try {
3125 if(len > 12)
3126 pident.domain=DNSName(&packet.at(0), len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
3127 }
3128 catch(std::exception& e) {
3129 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
3130 g_log<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
3131 return;
3132 }
3133 }
3134
3135 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
3136 if(iter != MT->d_waiters.end()) {
3137 doResends(iter, pident, packet);
3138 }
3139
3140 retryWithName:
3141
3142 if(!MT->sendEvent(pident, &packet)) {
3143 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
3144 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
3145 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
3146 pident.domain == mthread->key.domain) {
3147 mthread->key.nearMisses++;
3148 }
3149
3150 // be a bit paranoid here since we're weakening our matching
3151 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
3152 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
3153 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
3154 pident.domain = mthread->key.domain;
3155 pident.type = mthread->key.type;
3156 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
3157 }
3158 }
3159 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
3160 if(g_logCommonErrors) {
3161 g_log<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
3162 }
3163 }
3164 else if(fd >= 0) {
3165 t_udpclientsocks->returnSocket(fd);
3166 }
3167 }
3168
3169 FDMultiplexer* getMultiplexer()
3170 {
3171 FDMultiplexer* ret;
3172 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
3173 try {
3174 ret=i.second();
3175 return ret;
3176 }
3177 catch(FDMultiplexerException &fe) {
3178 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
3179 }
3180 catch(...) {
3181 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
3182 }
3183 }
3184 g_log<<Logger::Error<<"No working multiplexer found!"<<endl;
3185 exit(1);
3186 }
3187
3188
3189 static string* doReloadLuaScript()
3190 {
3191 string fname= ::arg()["lua-dns-script"];
3192 try {
3193 if(fname.empty()) {
3194 t_pdl.reset();
3195 g_log<<Logger::Info<<t_id<<" Unloaded current lua script"<<endl;
3196 return new string("unloaded\n");
3197 }
3198 else {
3199 t_pdl = std::make_shared<RecursorLua4>();
3200 t_pdl->loadFile(fname);
3201 }
3202 }
3203 catch(std::exception& e) {
3204 g_log<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
3205 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
3206 }
3207
3208 g_log<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
3209 return new string("(re)loaded '"+fname+"'\n");
3210 }
3211
3212 string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3213 {
3214 if(begin != end)
3215 ::arg().set("lua-dns-script") = *begin;
3216
3217 return broadcastAccFunction<string>(doReloadLuaScript);
3218 }
3219
3220 static string* pleaseUseNewTraceRegex(const std::string& newRegex)
3221 try
3222 {
3223 if(newRegex.empty()) {
3224 t_traceRegex.reset();
3225 return new string("unset\n");
3226 }
3227 else {
3228 t_traceRegex = std::make_shared<Regex>(newRegex);
3229 return new string("ok\n");
3230 }
3231 }
3232 catch(PDNSException& ae)
3233 {
3234 return new string(ae.reason+"\n");
3235 }
3236
3237 string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3238 {
3239 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
3240 }
3241
3242 static void checkLinuxIPv6Limits()
3243 {
3244 #ifdef __linux__
3245 string line;
3246 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
3247 int lim=std::stoi(line);
3248 if(lim < 16384) {
3249 g_log<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
3250 }
3251 }
3252 #endif
3253 }
3254 static void checkOrFixFDS()
3255 {
3256 unsigned int availFDs=getFilenumLimit();
3257 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
3258
3259 if(wantFDs > availFDs) {
3260 unsigned int hardlimit= getFilenumLimit(true);
3261 if(hardlimit >= wantFDs) {
3262 setFilenumLimit(wantFDs);
3263 g_log<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
3264 }
3265 else {
3266 int newval = (hardlimit - 25) / g_numWorkerThreads;
3267 g_log<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
3268 g_maxMThreads = newval;
3269 setFilenumLimit(hardlimit);
3270 }
3271 }
3272 }
3273
3274 static void* recursorThread(unsigned int tid, const string& threadName);
3275
3276 static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
3277 {
3278 t_allowFrom = ng;
3279 return nullptr;
3280 }
3281
3282 int g_argc;
3283 char** g_argv;
3284
3285 void parseACLs()
3286 {
3287 static bool l_initialized;
3288
3289 if(l_initialized) { // only reload configuration file on second call
3290 string configname=::arg()["config-dir"]+"/recursor.conf";
3291 if(::arg()["config-name"]!="") {
3292 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3293 }
3294 cleanSlashes(configname);
3295
3296 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
3297 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
3298 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
3299 ::arg().preParseFile(configname.c_str(), "include-dir");
3300 ::arg().preParse(g_argc, g_argv, "include-dir");
3301
3302 // then process includes
3303 std::vector<std::string> extraConfigs;
3304 ::arg().gatherIncludes(extraConfigs);
3305
3306 for(const std::string& fn : extraConfigs) {
3307 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
3308 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
3309 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
3310 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
3311 }
3312
3313 ::arg().preParse(g_argc, g_argv, "allow-from-file");
3314 ::arg().preParse(g_argc, g_argv, "allow-from");
3315 }
3316
3317 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
3318 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
3319
3320 if(!::arg()["allow-from-file"].empty()) {
3321 string line;
3322 ifstream ifs(::arg()["allow-from-file"].c_str());
3323 if(!ifs) {
3324 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
3325 }
3326
3327 string::size_type pos;
3328 while(getline(ifs,line)) {
3329 pos=line.find('#');
3330 if(pos!=string::npos)
3331 line.resize(pos);
3332 trim(line);
3333 if(line.empty())
3334 continue;
3335
3336 allowFrom->addMask(line);
3337 }
3338 g_log<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
3339 }
3340 else if(!::arg()["allow-from"].empty()) {
3341 vector<string> ips;
3342 stringtok(ips, ::arg()["allow-from"], ", ");
3343
3344 g_log<<Logger::Warning<<"Only allowing queries from: ";
3345 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
3346 allowFrom->addMask(*i);
3347 if(i!=ips.begin())
3348 g_log<<Logger::Warning<<", ";
3349 g_log<<Logger::Warning<<*i;
3350 }
3351 g_log<<Logger::Warning<<endl;
3352 }
3353 else {
3354 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
3355 g_log<<Logger::Warning<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
3356 allowFrom = nullptr;
3357 }
3358
3359 g_initialAllowFrom = allowFrom;
3360 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
3361 oldAllowFrom = nullptr;
3362
3363 l_initialized = true;
3364 }
3365
3366
3367 static void setupDelegationOnly()
3368 {
3369 vector<string> parts;
3370 stringtok(parts, ::arg()["delegation-only"], ", \t");
3371 for(const auto& p : parts) {
3372 SyncRes::addDelegationOnly(DNSName(p));
3373 }
3374 }
3375
3376 static std::map<unsigned int, std::set<int> > parseCPUMap()
3377 {
3378 std::map<unsigned int, std::set<int> > result;
3379
3380 const std::string value = ::arg()["cpu-map"];
3381
3382 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
3383 g_log<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
3384 return result;
3385 }
3386
3387 std::vector<std::string> parts;
3388
3389 stringtok(parts, value, " \t");
3390
3391 for(const auto& part : parts) {
3392 if (part.find('=') == string::npos)
3393 continue;
3394
3395 try {
3396 auto headers = splitField(part, '=');
3397 trim(headers.first);
3398 trim(headers.second);
3399
3400 unsigned int threadId = pdns_stou(headers.first);
3401 std::vector<std::string> cpus;
3402
3403 stringtok(cpus, headers.second, ",");
3404
3405 for(const auto& cpu : cpus) {
3406 int cpuId = std::stoi(cpu);
3407
3408 result[threadId].insert(cpuId);
3409 }
3410 }
3411 catch(const std::exception& e) {
3412 g_log<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
3413 }
3414 }
3415
3416 return result;
3417 }
3418
3419 static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
3420 {
3421 const auto& cpuMapping = cpusMap.find(n);
3422 if (cpuMapping != cpusMap.cend()) {
3423 int rc = mapThreadToCPUList(tid, cpuMapping->second);
3424 if (rc == 0) {
3425 g_log<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
3426 for (const auto cpu : cpuMapping->second) {
3427 g_log<<Logger::Info<<" "<<cpu;
3428 }
3429 g_log<<Logger::Info<<endl;
3430 }
3431 else {
3432 g_log<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
3433 for (const auto cpu : cpuMapping->second) {
3434 g_log<<Logger::Info<<" "<<cpu;
3435 }
3436 g_log<<Logger::Info<<strerror(rc)<<endl;
3437 }
3438 }
3439 }
3440
3441 #ifdef NOD_ENABLED
3442 static void setupNODThread()
3443 {
3444 if (g_nodEnabled) {
3445 uint32_t num_cells = ::arg().asNum("new-domain-db-size");
3446 t_nodDBp = std::make_shared<nod::NODDB>(num_cells);
3447 try {
3448 t_nodDBp->setCacheDir(::arg()["new-domain-history-dir"]);
3449 }
3450 catch (const PDNSException& e) {
3451 g_log<<Logger::Error<<"new-domain-history-dir (" << ::arg()["new-domain-history-dir"] << ") is not readable or does not exist"<<endl;
3452 _exit(1);
3453 }
3454 if (!t_nodDBp->init()) {
3455 g_log<<Logger::Error<<"Could not initialize domain tracking"<<endl;
3456 _exit(1);
3457 }
3458 std::thread t(nod::NODDB::startHousekeepingThread, t_nodDBp, std::this_thread::get_id());
3459 t.detach();
3460 g_nod_pbtag = ::arg()["new-domain-pb-tag"];
3461 }
3462 if (g_udrEnabled) {
3463 uint32_t num_cells = ::arg().asNum("unique-response-db-size");
3464 t_udrDBp = std::make_shared<nod::UniqueResponseDB>(num_cells);
3465 try {
3466 t_udrDBp->setCacheDir(::arg()["unique-response-history-dir"]);
3467 }
3468 catch (const PDNSException& e) {
3469 g_log<<Logger::Error<<"unique-response-history-dir (" << ::arg()["unique-response-history-dir"] << ") is not readable or does not exist"<<endl;
3470 _exit(1);
3471 }
3472 if (!t_udrDBp->init()) {
3473 g_log<<Logger::Error<<"Could not initialize unique response tracking"<<endl;
3474 _exit(1);
3475 }
3476 std::thread t(nod::UniqueResponseDB::startHousekeepingThread, t_udrDBp, std::this_thread::get_id());
3477 t.detach();
3478 g_udr_pbtag = ::arg()["unique-response-pb-tag"];
3479 }
3480 }
3481
3482 void parseNODWhitelist(const std::string& wlist)
3483 {
3484 vector<string> parts;
3485 stringtok(parts, wlist, ",; ");
3486 for(const auto& a : parts) {
3487 g_nodDomainWL.add(DNSName(a));
3488 }
3489 }
3490
3491 static void setupNODGlobal()
3492 {
3493 // Setup NOD subsystem
3494 g_nodEnabled = ::arg().mustDo("new-domain-tracking");
3495 g_nodLookupDomain = DNSName(::arg()["new-domain-lookup"]);
3496 g_nodLog = ::arg().mustDo("new-domain-log");
3497 parseNODWhitelist(::arg()["new-domain-whitelist"]);
3498
3499 // Setup Unique DNS Response subsystem
3500 g_udrEnabled = ::arg().mustDo("unique-response-tracking");
3501 g_udrLog = ::arg().mustDo("unique-response-log");
3502 }
3503 #endif /* NOD_ENABLED */
3504
3505 static int serviceMain(int argc, char*argv[])
3506 {
3507 g_log.setName(s_programname);
3508 g_log.disableSyslog(::arg().mustDo("disable-syslog"));
3509 g_log.setTimestamps(::arg().mustDo("log-timestamp"));
3510
3511 if(!::arg()["logging-facility"].empty()) {
3512 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
3513 if(val >= 0)
3514 g_log.setFacility(val);
3515 else
3516 g_log<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
3517 }
3518
3519 showProductVersion();
3520
3521 g_disthashseed=dns_random(0xffffffff);
3522
3523 checkLinuxIPv6Limits();
3524 try {
3525 vector<string> addrs;
3526 if(!::arg()["query-local-address6"].empty()) {
3527 SyncRes::s_doIPv6=true;
3528 g_log<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
3529
3530 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
3531 for(const string& addr : addrs) {
3532 g_localQueryAddresses6.push_back(ComboAddress(addr));
3533 }
3534 }
3535 else {
3536 g_log<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
3537 }
3538 addrs.clear();
3539 stringtok(addrs, ::arg()["query-local-address"], ", ;");
3540 for(const string& addr : addrs) {
3541 g_localQueryAddresses4.push_back(ComboAddress(addr));
3542 }
3543 }
3544 catch(std::exception& e) {
3545 g_log<<Logger::Error<<"Assigning local query addresses: "<<e.what();
3546 exit(99);
3547 }
3548
3549 // keep this ABOVE loadRecursorLuaConfig!
3550 if(::arg()["dnssec"]=="off")
3551 g_dnssecmode=DNSSECMode::Off;
3552 else if(::arg()["dnssec"]=="process-no-validate")
3553 g_dnssecmode=DNSSECMode::ProcessNoValidate;
3554 else if(::arg()["dnssec"]=="process")
3555 g_dnssecmode=DNSSECMode::Process;
3556 else if(::arg()["dnssec"]=="validate")
3557 g_dnssecmode=DNSSECMode::ValidateAll;
3558 else if(::arg()["dnssec"]=="log-fail")
3559 g_dnssecmode=DNSSECMode::ValidateForLog;
3560 else {
3561 g_log<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
3562 exit(1);
3563 }
3564
3565 g_signatureInceptionSkew = ::arg().asNum("signature-inception-skew");
3566 if (g_signatureInceptionSkew < 0) {
3567 g_log<<Logger::Error<<"A negative value for 'signature-inception-skew' is not allowed"<<endl;
3568 exit(1);
3569 }
3570
3571 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
3572 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
3573
3574 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
3575 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
3576
3577 luaConfigDelayedThreads delayedLuaThreads;
3578 try {
3579 loadRecursorLuaConfig(::arg()["lua-config-file"], delayedLuaThreads);
3580 }
3581 catch (PDNSException &e) {
3582 g_log<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
3583 exit(1);
3584 }
3585
3586 parseACLs();
3587 initPublicSuffixList(::arg()["public-suffix-list-file"]);
3588
3589 if(!::arg()["dont-query"].empty()) {
3590 vector<string> ips;
3591 stringtok(ips, ::arg()["dont-query"], ", ");
3592 ips.push_back("0.0.0.0");
3593 ips.push_back("::");
3594
3595 g_log<<Logger::Warning<<"Will not send queries to: ";
3596 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
3597 SyncRes::addDontQuery(*i);
3598 if(i!=ips.begin())
3599 g_log<<Logger::Warning<<", ";
3600 g_log<<Logger::Warning<<*i;
3601 }
3602 g_log<<Logger::Warning<<endl;
3603 }
3604
3605 g_quiet=::arg().mustDo("quiet");
3606
3607 /* this needs to be done before parseACLs(), which call broadcastFunction() */
3608 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
3609 if(g_weDistributeQueries) {
3610 g_log<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
3611 }
3612
3613 setupDelegationOnly();
3614 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
3615
3616 if(::arg()["trace"]=="fail") {
3617 SyncRes::setDefaultLogMode(SyncRes::Store);
3618 }
3619 else if(::arg().mustDo("trace")) {
3620 SyncRes::setDefaultLogMode(SyncRes::Log);
3621 ::arg().set("quiet")="no";
3622 g_quiet=false;
3623 g_dnssecLOG=true;
3624 }
3625 string myHostname = getHostname();
3626 if (myHostname == "UNKNOWN"){
3627 g_log<<Logger::Warning<<"Unable to get the hostname, NSID and id.server values will be empty"<<endl;
3628 myHostname = "";
3629 }
3630
3631 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
3632
3633 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
3634
3635 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
3636 SyncRes::s_maxbogusttl=::arg().asNum("max-cache-bogus-ttl");
3637 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
3638 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
3639 // Cap the packetcache-servfail-ttl to the packetcache-ttl
3640 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
3641 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
3642 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
3643 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
3644 SyncRes::s_serverID=::arg()["server-id"];
3645 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
3646 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
3647 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
3648 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
3649 if(SyncRes::s_serverID.empty()) {
3650 SyncRes::s_serverID = myHostname;
3651 }
3652
3653 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
3654 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
3655 SyncRes::clearECSStats();
3656
3657 if (!::arg().isEmpty("ecs-scope-zero-address")) {
3658 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
3659 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
3660 }
3661 else {
3662 bool found = false;
3663 for (const auto& addr : g_localQueryAddresses4) {
3664 if (!IsAnyAddress(addr)) {
3665 SyncRes::setECSScopeZeroAddress(Netmask(addr, 32));
3666 found = true;
3667 break;
3668 }
3669 }
3670 if (!found) {
3671 for (const auto& addr : g_localQueryAddresses6) {
3672 if (!IsAnyAddress(addr)) {
3673 SyncRes::setECSScopeZeroAddress(Netmask(addr, 128));
3674 found = true;
3675 break;
3676 }
3677 }
3678 if (!found) {
3679 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
3680 }
3681 }
3682 }
3683
3684 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
3685 SyncRes::parseEDNSSubnetAddFor(::arg()["ecs-add-for"]);
3686 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
3687
3688 g_XPFAcl.toMasks(::arg()["xpf-allow-from"]);
3689 g_xpfRRCode = ::arg().asNum("xpf-rr-code");
3690
3691 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
3692
3693 g_initialDomainMap = parseAuthAndForwards();
3694
3695 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3696
3697 g_logCommonErrors=::arg().mustDo("log-common-errors");
3698 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
3699
3700 g_anyToTcp = ::arg().mustDo("any-to-tcp");
3701 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
3702
3703 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
3704
3705 g_numDistributorThreads = ::arg().asNum("distributor-threads");
3706 g_numWorkerThreads = ::arg().asNum("threads");
3707 if (g_numWorkerThreads < 1) {
3708 g_log<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
3709 g_numWorkerThreads = 1;
3710 }
3711
3712 g_numThreads = g_numDistributorThreads + g_numWorkerThreads;
3713 g_maxMThreads = ::arg().asNum("max-mthreads");
3714
3715 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
3716
3717 g_statisticsInterval = ::arg().asNum("statistics-interval");
3718
3719 #ifdef SO_REUSEPORT
3720 g_reusePort = ::arg().mustDo("reuseport");
3721 #endif
3722
3723 s_threadInfos.resize(g_numDistributorThreads + g_numWorkerThreads + /* handler */ 1);
3724
3725 if (g_reusePort) {
3726 if (g_weDistributeQueries) {
3727 /* first thread is the handler, then distributors */
3728 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3729 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
3730 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
3731 makeUDPServerSockets(deferredAdds);
3732 makeTCPServerSockets(deferredAdds, tcpSockets);
3733 }
3734 }
3735 else {
3736 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3737 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3738 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
3739 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
3740 makeUDPServerSockets(deferredAdds);
3741 makeTCPServerSockets(deferredAdds, tcpSockets);
3742 }
3743 }
3744 }
3745 else {
3746 std::set<int> tcpSockets;
3747 /* we don't have reuseport so we can only open one socket per
3748 listening addr:port and everyone will listen on it */
3749 makeUDPServerSockets(g_deferredAdds);
3750 makeTCPServerSockets(g_deferredAdds, tcpSockets);
3751
3752 /* every listener (so distributor if g_weDistributeQueries, workers otherwise)
3753 needs to listen to the shared sockets */
3754 if (g_weDistributeQueries) {
3755 /* first thread is the handler, then distributors */
3756 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3757 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3758 }
3759 }
3760 else {
3761 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3762 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3763 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3764 }
3765 }
3766 }
3767
3768 #ifdef NOD_ENABLED
3769 // Setup newly observed domain globals
3770 setupNODGlobal();
3771 #endif /* NOD_ENABLED */
3772
3773 int forks;
3774 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
3775 if(!fork()) // we are child
3776 break;
3777 }
3778
3779 if(::arg().mustDo("daemon")) {
3780 g_log<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
3781 g_log.toConsole(Logger::Critical);
3782 daemonize();
3783 }
3784 signal(SIGUSR1,usr1Handler);
3785 signal(SIGUSR2,usr2Handler);
3786 signal(SIGPIPE,SIG_IGN);
3787
3788 checkOrFixFDS();
3789
3790 #ifdef HAVE_LIBSODIUM
3791 if (sodium_init() == -1) {
3792 g_log<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
3793 exit(99);
3794 }
3795 #endif
3796
3797 openssl_thread_setup();
3798 openssl_seed();
3799 /* setup rng before chroot */
3800 dns_random_init();
3801
3802 if(::arg()["server-id"].empty()) {
3803 ::arg().set("server-id") = myHostname;
3804 }
3805
3806 int newgid=0;
3807 if(!::arg()["setgid"].empty())
3808 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
3809 int newuid=0;
3810 if(!::arg()["setuid"].empty())
3811 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
3812
3813 Utility::dropGroupPrivs(newuid, newgid);
3814
3815 if (!::arg()["chroot"].empty()) {
3816 #ifdef HAVE_SYSTEMD
3817 char *ns;
3818 ns = getenv("NOTIFY_SOCKET");
3819 if (ns != nullptr) {
3820 g_log<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
3821 exit(1);
3822 }
3823 #endif
3824 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
3825 g_log<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
3826 exit(1);
3827 }
3828 else
3829 g_log<<Logger::Info<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
3830 }
3831
3832 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
3833 if(!s_pidfname.empty())
3834 unlink(s_pidfname.c_str()); // remove possible old pid file
3835 writePid();
3836
3837 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
3838
3839 Utility::dropUserPrivs(newuid);
3840 try {
3841 /* we might still have capabilities remaining, for example if we have been started as root
3842 without --setuid (please don't do that) or as an unprivileged user with ambient capabilities
3843 like CAP_NET_BIND_SERVICE.
3844 */
3845 dropCapabilities();
3846 }
3847 catch(const std::exception& e) {
3848 g_log<<Logger::Warning<<e.what()<<endl;
3849 }
3850
3851 startLuaConfigDelayedThreads(delayedLuaThreads, g_luaconfs.getCopy().generation);
3852
3853 makeThreadPipes();
3854
3855 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
3856 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
3857 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
3858 s_maxUDPQueriesPerRound=::arg().asNum("max-udp-queries-per-round");
3859
3860 if (::arg().mustDo("snmp-agent")) {
3861 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"], ::arg().mustDo("snmp-enable-expensive-stats"));
3862 g_snmpAgent->run();
3863 }
3864
3865 int port = ::arg().asNum("udp-source-port-min");
3866 if(port < 1024 || port > 65535){
3867 g_log<<Logger::Error<<"Unable to launch, udp-source-port-min is not a valid port number"<<endl;
3868 exit(99); // this isn't going to fix itself either
3869 }
3870 s_minUdpSourcePort = port;
3871 port = ::arg().asNum("udp-source-port-max");
3872 if(port < 1024 || port > 65535 || port < s_minUdpSourcePort){
3873 g_log<<Logger::Error<<"Unable to launch, udp-source-port-max is not a valid port number or is smaller than udp-source-port-min"<<endl;
3874 exit(99); // this isn't going to fix itself either
3875 }
3876 s_maxUdpSourcePort = port;
3877 std::vector<string> parts {};
3878 stringtok(parts, ::arg()["udp-source-port-avoid"], ", ");
3879 for (const auto &part : parts)
3880 {
3881 port = std::stoi(part);
3882 if(port < 1024 || port > 65535){
3883 g_log<<Logger::Error<<"Unable to launch, udp-source-port-avoid contains an invalid port number: "<<part<<endl;
3884 exit(99); // this isn't going to fix itself either
3885 }
3886 s_avoidUdpSourcePorts.insert(port);
3887 }
3888
3889 unsigned int currentThreadId = 1;
3890 const auto cpusMap = parseCPUMap();
3891
3892 if(g_numThreads == 1) {
3893 g_log<<Logger::Warning<<"Operating unthreaded"<<endl;
3894 #ifdef HAVE_SYSTEMD
3895 sd_notify(0, "READY=1");
3896 #endif
3897
3898 /* This thread handles the web server, carbon, statistics and the control channel */
3899 auto& handlerInfos = s_threadInfos.at(0);
3900 handlerInfos.isHandler = true;
3901 handlerInfos.thread = std::thread(recursorThread, 0, "main");
3902
3903 setCPUMap(cpusMap, currentThreadId, pthread_self());
3904
3905 auto& infos = s_threadInfos.at(currentThreadId);
3906 infos.isListener = true;
3907 infos.isWorker = true;
3908 recursorThread(currentThreadId++, "worker");
3909 }
3910 else {
3911
3912 if (g_weDistributeQueries) {
3913 g_log<<Logger::Warning<<"Launching "<< g_numDistributorThreads <<" distributor threads"<<endl;
3914 for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
3915 auto& infos = s_threadInfos.at(currentThreadId);
3916 infos.isListener = true;
3917 infos.thread = std::thread(recursorThread, currentThreadId++, "distr");
3918
3919 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
3920 }
3921 }
3922
3923 g_log<<Logger::Warning<<"Launching "<< g_numWorkerThreads <<" worker threads"<<endl;
3924
3925 for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
3926 auto& infos = s_threadInfos.at(currentThreadId);
3927 infos.isListener = g_weDistributeQueries ? false : true;
3928 infos.isWorker = true;
3929 infos.thread = std::thread(recursorThread, currentThreadId++, "worker");
3930
3931 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
3932 }
3933
3934 #ifdef HAVE_SYSTEMD
3935 sd_notify(0, "READY=1");
3936 #endif
3937
3938 /* This thread handles the web server, carbon, statistics and the control channel */
3939 auto& infos = s_threadInfos.at(0);
3940 infos.isHandler = true;
3941 infos.thread = std::thread(recursorThread, 0, "web+stat");
3942
3943 s_threadInfos.at(0).thread.join();
3944 }
3945 return 0;
3946 }
3947
3948 static void* recursorThread(unsigned int n, const string& threadName)
3949 try
3950 {
3951 t_id=n;
3952 auto& threadInfo = s_threadInfos.at(t_id);
3953
3954 static string threadPrefix = "pdns-r/";
3955 setThreadName(threadPrefix + threadName);
3956
3957 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
3958 SyncRes::setDomainMap(g_initialDomainMap);
3959 t_allowFrom = g_initialAllowFrom;
3960 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
3961 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
3962 primeHints();
3963
3964 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
3965
3966 g_log<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3967
3968 #ifdef NOD_ENABLED
3969 if (threadInfo.isWorker)
3970 setupNODThread();
3971 #endif /* NOD_ENABLED */
3972
3973 /* the listener threads handle TCP queries */
3974 if(threadInfo.isWorker || threadInfo.isListener) {
3975 try {
3976 if(!::arg()["lua-dns-script"].empty()) {
3977 t_pdl = std::make_shared<RecursorLua4>();
3978 t_pdl->loadFile(::arg()["lua-dns-script"]);
3979 g_log<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
3980 }
3981 }
3982 catch(std::exception &e) {
3983 g_log<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
3984 _exit(99);
3985 }
3986 }
3987
3988 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
3989 if(ringsize) {
3990 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3991 if(g_weDistributeQueries)
3992 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries") / g_numDistributorThreads);
3993 else
3994 t_remotes->set_capacity(ringsize);
3995 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3996 t_servfailremotes->set_capacity(ringsize);
3997 t_bogusremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3998 t_bogusremotes->set_capacity(ringsize);
3999 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4000 t_largeanswerremotes->set_capacity(ringsize);
4001 t_timeouts = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4002 t_timeouts->set_capacity(ringsize);
4003
4004 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4005 t_queryring->set_capacity(ringsize);
4006 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4007 t_servfailqueryring->set_capacity(ringsize);
4008 t_bogusqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4009 t_bogusqueryring->set_capacity(ringsize);
4010 }
4011
4012 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
4013
4014 #ifdef HAVE_PROTOBUF
4015 /* start protobuf export threads if needed */
4016 auto luaconfsLocal = g_luaconfs.getLocal();
4017 checkProtobufExport(luaconfsLocal);
4018 checkOutgoingProtobufExport(luaconfsLocal);
4019 #endif /* HAVE_PROTOBUF */
4020
4021 PacketID pident;
4022
4023 t_fdm=getMultiplexer();
4024
4025 if(threadInfo.isHandler) {
4026 if(::arg().mustDo("webserver")) {
4027 g_log<<Logger::Warning << "Enabling web server" << endl;
4028 try {
4029 new RecursorWebServer(t_fdm);
4030 }
4031 catch(PDNSException &e) {
4032 g_log<<Logger::Error<<"Exception: "<<e.reason<<endl;
4033 exit(99);
4034 }
4035 }
4036 g_log<<Logger::Info<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
4037 }
4038 else {
4039
4040 t_fdm->addReadFD(threadInfo.pipes.readToThread, handlePipeRequest);
4041 t_fdm->addReadFD(threadInfo.pipes.readQueriesToThread, handlePipeRequest);
4042
4043 if (threadInfo.isListener) {
4044 if (g_reusePort) {
4045 /* then every listener has its own FDs */
4046 for(const auto deferred : threadInfo.deferredAdds) {
4047 t_fdm->addReadFD(deferred.first, deferred.second);
4048 }
4049 }
4050 else {
4051 /* otherwise all listeners are listening on the same ones */
4052 for(const auto deferred : g_deferredAdds) {
4053 t_fdm->addReadFD(deferred.first, deferred.second);
4054 }
4055 }
4056 }
4057 }
4058
4059 registerAllStats();
4060
4061 if(threadInfo.isHandler) {
4062 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
4063 }
4064
4065 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
4066
4067 bool listenOnTCP(true);
4068
4069 time_t last_stat = 0;
4070 time_t last_carbon=0, last_lua_maintenance=0;
4071 time_t carbonInterval=::arg().asNum("carbon-interval");
4072 time_t luaMaintenanceInterval=::arg().asNum("lua-maintenance-interval");
4073 counter.store(0); // used to periodically execute certain tasks
4074 for(;;) {
4075 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
4076
4077 if(!(counter%500)) {
4078 MT->makeThread(houseKeeping, 0);
4079 }
4080
4081 if(!(counter%55)) {
4082 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
4083 expired_t expired=t_fdm->getTimeouts(g_now);
4084
4085 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
4086 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4087 if(g_logCommonErrors)
4088 g_log<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4089 t_fdm->removeReadFD(i->first);
4090 }
4091 }
4092
4093 counter++;
4094
4095 if(threadInfo.isHandler) {
4096 if(statsWanted || (g_statisticsInterval > 0 && (g_now.tv_sec - last_stat) >= g_statisticsInterval)) {
4097 doStats();
4098 last_stat = g_now.tv_sec;
4099 }
4100
4101 Utility::gettimeofday(&g_now, 0);
4102
4103 if((g_now.tv_sec - last_carbon) >= carbonInterval) {
4104 MT->makeThread(doCarbonDump, 0);
4105 last_carbon = g_now.tv_sec;
4106 }
4107 }
4108 if (t_pdl != nullptr) {
4109 // lua-dns-script directive is present, call the maintenance callback if needed
4110 /* remember that the listener threads handle TCP queries */
4111 if (threadInfo.isWorker || threadInfo.isListener) {
4112 // Only on threads processing queries
4113 if(g_now.tv_sec - last_lua_maintenance >= luaMaintenanceInterval) {
4114 t_pdl->maintenance();
4115 last_lua_maintenance = g_now.tv_sec;
4116 }
4117 }
4118 }
4119
4120 t_fdm->run(&g_now);
4121 // 'run' updates g_now for us
4122
4123 if(threadInfo.isListener) {
4124 if(listenOnTCP) {
4125 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
4126 for(const auto fd : threadInfo.tcpSockets) {
4127 t_fdm->removeReadFD(fd);
4128 }
4129 listenOnTCP=false;
4130 }
4131 }
4132 else {
4133 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
4134 for(const auto fd : threadInfo.tcpSockets) {
4135 t_fdm->addReadFD(fd, handleNewTCPQuestion);
4136 }
4137 listenOnTCP=true;
4138 }
4139 }
4140 }
4141 }
4142 }
4143 catch(PDNSException &ae) {
4144 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
4145 return 0;
4146 }
4147 catch(std::exception &e) {
4148 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
4149 return 0;
4150 }
4151 catch(...) {
4152 g_log<<Logger::Error<<"any other exception in main: "<<endl;
4153 return 0;
4154 }
4155
4156
4157 int main(int argc, char **argv)
4158 {
4159 g_argc = argc;
4160 g_argv = argv;
4161 g_stats.startupTime=time(0);
4162 versionSetProduct(ProductRecursor);
4163 reportBasicTypes();
4164 reportOtherTypes();
4165
4166 int ret = EXIT_SUCCESS;
4167
4168 try {
4169 ::arg().set("stack-size","stack size per mthread")="200000";
4170 ::arg().set("soa-minimum-ttl","Don't change")="0";
4171 ::arg().set("no-shuffle","Don't change")="off";
4172 ::arg().set("local-port","port to listen on")="53";
4173 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
4174 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
4175 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
4176 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
4177 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
4178 ::arg().set("signature-inception-skew", "Allow the signature inception to be off by this number of seconds")="60";
4179 ::arg().set("daemon","Operate as a daemon")="no";
4180 ::arg().setSwitch("write-pid","Write a PID file")="yes";
4181 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
4182 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
4183 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
4184 ::arg().set("log-common-errors","If we should log rather common errors")="no";
4185 ::arg().set("chroot","switch to chroot jail")="";
4186 ::arg().set("setgid","If set, change group id to this gid for more security")="";
4187 ::arg().set("setuid","If set, change user id to this uid for more security")="";
4188 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
4189 ::arg().set("threads", "Launch this number of threads")="2";
4190 ::arg().set("distributor-threads", "Launch this number of distributor threads, distributing queries to other threads")="0";
4191 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
4192 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
4193 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
4194 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
4195 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
4196 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
4197 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
4198 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
4199 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
4200 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
4201 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
4202 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
4203 ::arg().set("carbon-namespace", "If set overwrites the first part of the carbon string")="pdns";
4204 ::arg().set("carbon-instance", "If set overwrites the the instance name default")="recursor";
4205
4206 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
4207 ::arg().set("quiet","Suppress logging of questions and answers")="";
4208 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
4209 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
4210 ::arg().set("socket-owner","Owner of socket")="";
4211 ::arg().set("socket-group","Group of socket")="";
4212 ::arg().set("socket-mode", "Permissions for socket")="";
4213
4214 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
4215 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
4216 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
4217 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
4218 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
4219 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
4220 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
4221 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
4222 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
4223 ::arg().set("hint-file", "If set, load root hints from this file")="";
4224 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
4225 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
4226 ::arg().set("max-cache-bogus-ttl", "maximum number of seconds to keep a Bogus (positive or negative) cached entry in memory")="3600";
4227 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
4228 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
4229 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
4230 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
4231 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname, set custom or 'disabled'")="";
4232 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
4233 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
4234 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
4235 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
4236 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
4237 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4238 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
4239 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
4240 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4241 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
4242 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
4243 ::arg().set("lua-config-file", "More powerful configuration options")="";
4244
4245 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
4246 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
4247 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
4248 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
4249 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
4250 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
4251 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
4252 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
4253 ::arg().set("lua-maintenance-interval", "Number of seconds between calls to the lua user defined maintenance() function")="1";
4254 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
4255 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
4256 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
4257 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
4258 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
4259 ::arg().set("ecs-add-for", "List of client netmasks for which EDNS Client Subnet will be added")="0.0.0.0/0, ::/0, " LOCAL_NETS_INVERSE;
4260 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
4261 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
4262 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
4263 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
4264 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
4265 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
4266 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
4267 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1232";
4268 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1232";
4269 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
4270 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
4271 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
4272 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
4273 ::arg().set("max-udp-queries-per-round", "Maximum number of UDP queries processed per recvmsg() round, before returning back to normal processing")="10000";
4274
4275 ::arg().set("include-dir","Include *.conf files from this directory")="";
4276 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
4277
4278 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
4279
4280 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
4281 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
4282 ::arg().setSwitch("snmp-enable-expensive-stats", "If set and snmp-agent is set, even statistics whose reporting can have an impact on production will be enabled")="no";
4283
4284 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
4285 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
4286
4287 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
4288
4289 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
4290
4291 ::arg().set("xpf-allow-from","XPF information is only processed from these subnets")="";
4292 ::arg().set("xpf-rr-code","XPF option code to use")="0";
4293
4294 ::arg().set("udp-source-port-min", "Minimum UDP port to bind on")="1024";
4295 ::arg().set("udp-source-port-max", "Maximum UDP port to bind on")="65535";
4296 ::arg().set("udp-source-port-avoid", "List of comma separated UDP port number to avoid")="11211";
4297 ::arg().set("rng", "Specify random number generator to use. Valid values are auto,sodium,openssl,getrandom,arc4random,urandom.")="auto";
4298 ::arg().set("public-suffix-list-file", "Path to the Public Suffix List file, if any")="";
4299 #ifdef NOD_ENABLED
4300 ::arg().set("new-domain-tracking", "Track newly observed domains (i.e. never seen before).")="no";
4301 ::arg().set("new-domain-log", "Log newly observed domains.")="yes";
4302 ::arg().set("new-domain-lookup", "Perform a DNS lookup newly observed domains as a subdomain of the configured domain")="";
4303 ::arg().set("new-domain-history-dir", "Persist new domain tracking data here to persist between restarts")=string(NODCACHEDIR)+"/nod";
4304 ::arg().set("new-domain-whitelist", "List of domains (and implicitly all subdomains) which will never be considered a new domain")="";
4305 ::arg().set("new-domain-db-size", "Size of the DB used to track new domains in terms of number of cells. Defaults to 67108864")="67108864";
4306 ::arg().set("new-domain-pb-tag", "If protobuf is configured, the tag to use for messages containing newly observed domains. Defaults to 'pdns-nod'")="pdns-nod";
4307 ::arg().set("unique-response-tracking", "Track unique responses (tuple of query name, type and RR).")="no";
4308 ::arg().set("unique-response-log", "Log unique responses")="yes";
4309 ::arg().set("unique-response-history-dir", "Persist unique response tracking data here to persist between restarts")=string(NODCACHEDIR)+"/udr";
4310 ::arg().set("unique-response-db-size", "Size of the DB used to track unique responses in terms of number of cells. Defaults to 67108864")="67108864";
4311 ::arg().set("unique-response-pb-tag", "If protobuf is configured, the tag to use for messages containing unique DNS responses. Defaults to 'pdns-udr'")="pdns-udr";
4312 #endif /* NOD_ENABLED */
4313 ::arg().setCmd("help","Provide a helpful message");
4314 ::arg().setCmd("version","Print version string");
4315 ::arg().setCmd("config","Output blank configuration");
4316 g_log.toConsole(Logger::Info);
4317 ::arg().laxParse(argc,argv); // do a lax parse
4318
4319 string configname=::arg()["config-dir"]+"/recursor.conf";
4320 if(::arg()["config-name"]!="") {
4321 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
4322 s_programname+="-"+::arg()["config-name"];
4323 }
4324 cleanSlashes(configname);
4325
4326 if(!::arg().getCommands().empty()) {
4327 cerr<<"Fatal: non-option on the command line, perhaps a '--setting=123' statement missed the '='?"<<endl;
4328 exit(99);
4329 }
4330
4331 if(::arg().mustDo("config")) {
4332 cout<<::arg().configstring()<<endl;
4333 exit(0);
4334 }
4335
4336 if(!::arg().file(configname.c_str()))
4337 g_log<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
4338
4339 ::arg().parse(argc,argv);
4340
4341 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() ) {
4342 g_log<<Logger::Error<<"Using chroot and enabling the API is not possible"<<endl;
4343 exit(EXIT_FAILURE);
4344 }
4345
4346 if (::arg()["socket-dir"].empty()) {
4347 if (::arg()["chroot"].empty())
4348 ::arg().set("socket-dir") = LOCALSTATEDIR;
4349 else
4350 ::arg().set("socket-dir") = "/";
4351 }
4352
4353 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
4354
4355 if(::arg().asNum("threads")==1) {
4356 if (::arg().mustDo("pdns-distributes-queries")) {
4357 g_log<<Logger::Warning<<"Only one thread, no need to distribute queries ourselves"<<endl;
4358 ::arg().set("pdns-distributes-queries")="no";
4359 }
4360 }
4361
4362 if(::arg().mustDo("pdns-distributes-queries") && ::arg().asNum("distributor-threads") <= 0) {
4363 g_log<<Logger::Warning<<"Asked to run with pdns-distributes-queries set but no distributor threads, raising to 1"<<endl;
4364 ::arg().set("distributor-threads")="1";
4365 }
4366
4367 if (!::arg().mustDo("pdns-distributes-queries")) {
4368 ::arg().set("distributor-threads")="0";
4369 }
4370
4371 if(::arg().mustDo("help")) {
4372 cout<<"syntax:"<<endl<<endl;
4373 cout<<::arg().helpstring(::arg()["help"])<<endl;
4374 exit(0);
4375 }
4376 if(::arg().mustDo("version")) {
4377 showProductVersion();
4378 showBuildConfiguration();
4379 exit(0);
4380 }
4381
4382 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
4383
4384 if (logUrgency < Logger::Error)
4385 logUrgency = Logger::Error;
4386 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
4387 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
4388 }
4389 g_log.setLoglevel(logUrgency);
4390 g_log.toConsole(logUrgency);
4391
4392 serviceMain(argc, argv);
4393 }
4394 catch(PDNSException &ae) {
4395 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
4396 ret=EXIT_FAILURE;
4397 }
4398 catch(std::exception &e) {
4399 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
4400 ret=EXIT_FAILURE;
4401 }
4402 catch(...) {
4403 g_log<<Logger::Error<<"any other exception in main: "<<endl;
4404 ret=EXIT_FAILURE;
4405 }
4406
4407 return ret;
4408 }