]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/pdns_recursor.cc
Qname minimizaton.
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25
26 #include <netdb.h>
27 #include <sys/stat.h>
28 #include <unistd.h>
29 #ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
30 #include <boost/container/flat_set.hpp>
31 #endif
32 #include "ws-recursor.hh"
33 #include <thread>
34 #include "threadname.hh"
35 #include "recpacketcache.hh"
36 #include "utility.hh"
37 #include "dns_random.hh"
38 #ifdef HAVE_LIBSODIUM
39 #include <sodium.h>
40 #endif
41 #include "opensslsigners.hh"
42 #include <iostream>
43 #include <errno.h>
44 #include <boost/static_assert.hpp>
45 #include <map>
46 #include <set>
47 #include "recursor_cache.hh"
48 #include "cachecleaner.hh"
49 #include <stdio.h>
50 #include <signal.h>
51 #include <stdlib.h>
52 #include "misc.hh"
53 #include "mtasker.hh"
54 #include <utility>
55 #include "arguments.hh"
56 #include "syncres.hh"
57 #include <fcntl.h>
58 #include <fstream>
59 #include "sortlist.hh"
60 #include "sstuff.hh"
61 #include <boost/tuple/tuple.hpp>
62 #include <boost/tuple/tuple_comparison.hpp>
63 #include <boost/shared_array.hpp>
64 #include <boost/function.hpp>
65 #include <boost/algorithm/string.hpp>
66 #ifdef MALLOC_TRACE
67 #include "malloctrace.hh"
68 #endif
69 #include <netinet/tcp.h>
70 #include "capabilities.hh"
71 #include "dnsparser.hh"
72 #include "dnswriter.hh"
73 #include "dnsrecords.hh"
74 #include "zoneparser-tng.hh"
75 #include "rec_channel.hh"
76 #include "logger.hh"
77 #include "iputils.hh"
78 #include "mplexer.hh"
79 #include "config.h"
80 #include "lua-recursor4.hh"
81 #include "version.hh"
82 #include "responsestats.hh"
83 #include "secpoll-recursor.hh"
84 #include "dnsname.hh"
85 #include "filterpo.hh"
86 #include "rpzloader.hh"
87 #include "validate-recursor.hh"
88 #include "rec-lua-conf.hh"
89 #include "ednsoptions.hh"
90 #include "gettime.hh"
91 #include "pubsuffix.hh"
92 #ifdef NOD_ENABLED
93 #include "nod.hh"
94 #endif /* NOD_ENABLED */
95
96 #include "rec-protobuf.hh"
97 #include "rec-snmp.hh"
98
99 #ifdef HAVE_SYSTEMD
100 #include <systemd/sd-daemon.h>
101 #endif
102
103 #include "namespaces.hh"
104
105 #ifdef HAVE_PROTOBUF
106 #include "uuid-utils.hh"
107 #endif /* HAVE_PROTOBUF */
108
109 #include "xpf.hh"
110
111 typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
112
113 static thread_local std::shared_ptr<RecursorLua4> t_pdl;
114 static thread_local unsigned int t_id = 0;
115 static thread_local std::shared_ptr<Regex> t_traceRegex;
116 static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
117 #ifdef HAVE_PROTOBUF
118 static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_protobufServers{nullptr};
119 static thread_local uint64_t t_protobufServersGeneration;
120 static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_outgoingProtobufServers{nullptr};
121 static thread_local uint64_t t_outgoingProtobufServersGeneration;
122 #endif /* HAVE_PROTOBUF */
123
124 #ifdef HAVE_FSTRM
125 static thread_local std::shared_ptr<std::vector<std::unique_ptr<FrameStreamLogger>>> t_frameStreamServers{nullptr};
126 static thread_local uint64_t t_frameStreamServersGeneration;
127 #endif /* HAVE_FSTRM */
128
129 thread_local std::unique_ptr<MT_t> MT; // the big MTasker
130 thread_local std::unique_ptr<MemRecursorCache> t_RC;
131 thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
132 thread_local FDMultiplexer* t_fdm{nullptr};
133 thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes, t_bogusremotes;
134 thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring, t_bogusqueryring;
135 thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
136 #ifdef NOD_ENABLED
137 thread_local std::shared_ptr<nod::NODDB> t_nodDBp;
138 thread_local std::shared_ptr<nod::UniqueResponseDB> t_udrDBp;
139 #endif /* NOD_ENABLED */
140 __thread struct timeval g_now; // timestamp, updated (too) frequently
141
142 typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
143
144 // for communicating with our threads
145 // effectively readonly after startup
146 struct RecThreadInfo
147 {
148 struct ThreadPipeSet
149 {
150 int writeToThread{-1};
151 int readToThread{-1};
152 int writeFromThread{-1};
153 int readFromThread{-1};
154 int writeQueriesToThread{-1}; // this one is non-blocking
155 int readQueriesToThread{-1};
156 };
157
158 /* FD corresponding to TCP sockets this thread is listening
159 on.
160 These FDs are also in deferredAdds when we have one
161 socket per listener, and in g_deferredAdds instead. */
162 std::set<int> tcpSockets;
163 /* FD corresponding to listening sockets if we have one socket per
164 listener (with reuseport), otherwise all listeners share the
165 same FD and g_deferredAdds is then used instead */
166 deferredAdd_t deferredAdds;
167 struct ThreadPipeSet pipes;
168 std::thread thread;
169 MT_t* mt{nullptr};
170 uint64_t numberOfDistributedQueries{0};
171 /* handle the web server, carbon, statistics and the control channel */
172 bool isHandler{false};
173 /* accept incoming queries (and distributes them to the workers if pdns-distributes-queries is set) */
174 bool isListener{false};
175 /* process queries */
176 bool isWorker{false};
177 };
178
179 /* first we have the handler thread, t_id == 0 (some other
180 helper threads like SNMP might have t_id == 0 as well)
181 then the distributor threads if any
182 and finally the workers */
183 static std::vector<RecThreadInfo> s_threadInfos;
184 /* without reuseport, all listeners share the same sockets */
185 static deferredAdd_t g_deferredAdds;
186
187 typedef vector<int> tcpListenSockets_t;
188 typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
189
190 static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
191 static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
192 static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
193 static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
194 static AtomicCounter counter;
195 static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
196 static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
197 static NetmaskGroup g_XPFAcl;
198 static size_t g_tcpMaxQueriesPerConn;
199 static size_t s_maxUDPQueriesPerRound;
200 static uint64_t g_latencyStatSize;
201 static uint32_t g_disthashseed;
202 static unsigned int g_maxTCPPerClient;
203 static unsigned int g_maxMThreads;
204 static unsigned int g_numDistributorThreads;
205 static unsigned int g_numWorkerThreads;
206 static int g_tcpTimeout;
207 static uint16_t g_udpTruncationThreshold;
208 static uint16_t g_xpfRRCode{0};
209 static std::atomic<bool> statsWanted;
210 static std::atomic<bool> g_quiet;
211 static bool g_logCommonErrors;
212 static bool g_anyToTcp;
213 static bool g_weDistributeQueries; // if true, 1 or more threads listen on the incoming query sockets and distribute them to workers
214 static bool g_reusePort{false};
215 static bool g_gettagNeedsEDNSOptions{false};
216 static time_t g_statisticsInterval;
217 static bool g_useIncomingECS;
218 static bool g_useKernelTimestamp;
219 std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
220 #ifdef NOD_ENABLED
221 static bool g_nodEnabled;
222 static DNSName g_nodLookupDomain;
223 static bool g_nodLog;
224 static SuffixMatchNode g_nodDomainWL;
225 static std::string g_nod_pbtag;
226 static bool g_udrEnabled;
227 static bool g_udrLog;
228 static std::string g_udr_pbtag;
229 #endif /* NOD_ENABLED */
230 #ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
231 static boost::container::flat_set<uint16_t> s_avoidUdpSourcePorts;
232 #else
233 static std::set<uint16_t> s_avoidUdpSourcePorts;
234 #endif
235 static uint16_t s_minUdpSourcePort;
236 static uint16_t s_maxUdpSourcePort;
237 static double s_balancingFactor;
238
239 RecursorControlChannel s_rcc; // only active in the handler thread
240 RecursorStats g_stats;
241 string s_programname="pdns_recursor";
242 string s_pidfname;
243 bool g_lowercaseOutgoing;
244 unsigned int g_networkTimeoutMsec;
245 unsigned int g_numThreads;
246 uint16_t g_outgoingEDNSBufsize;
247 bool g_logRPZChanges{false};
248
249 // Used in the Syncres to not throttle certain servers
250 GlobalStateHolder<SuffixMatchNode> g_dontThrottleNames;
251 GlobalStateHolder<NetmaskGroup> g_dontThrottleNetmasks;
252
253 #define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
254 #define LOCAL_NETS_INVERSE "!127.0.0.0/8, !10.0.0.0/8, !100.64.0.0/10, !169.254.0.0/16, !192.168.0.0/16, !172.16.0.0/12, !::1/128, !fc00::/7, !fe80::/10"
255 // Bad Nets taken from both:
256 // http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
257 // and
258 // http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
259 // where such a network may not be considered a valid destination
260 #define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
261 #define DONT_QUERY LOCAL_NETS ", " BAD_NETS
262
263 //! used to send information to a newborn mthread
264 struct DNSComboWriter {
265 DNSComboWriter(const std::string& query, const struct timeval& now): d_mdp(true, query), d_now(now), d_query(query)
266 {
267 }
268
269 DNSComboWriter(const std::string& query, const struct timeval& now, std::vector<std::string>&& policyTags, LuaContext::LuaObject&& data): d_mdp(true, query), d_now(now), d_query(query), d_policyTags(std::move(policyTags)), d_data(std::move(data))
270 {
271 }
272
273 void setRemote(const ComboAddress& sa)
274 {
275 d_remote=sa;
276 }
277
278 void setSource(const ComboAddress& sa)
279 {
280 d_source=sa;
281 }
282
283 void setLocal(const ComboAddress& sa)
284 {
285 d_local=sa;
286 }
287
288 void setDestination(const ComboAddress& sa)
289 {
290 d_destination=sa;
291 }
292
293 void setSocket(int sock)
294 {
295 d_socket=sock;
296 }
297
298 string getRemote() const
299 {
300 if (d_source == d_remote) {
301 return d_source.toStringWithPort();
302 }
303 return d_source.toStringWithPort() + " (proxied by " + d_remote.toStringWithPort() + ")";
304 }
305
306 MOADNSParser d_mdp;
307 struct timeval d_now;
308 /* Remote client, might differ from d_source
309 in case of XPF, in which case d_source holds
310 the IP of the client and d_remote of the proxy
311 */
312 ComboAddress d_remote;
313 ComboAddress d_source;
314 /* Destination address, might differ from
315 d_destination in case of XPF, in which case
316 d_destination holds the IP of the proxy and
317 d_local holds our own. */
318 ComboAddress d_local;
319 ComboAddress d_destination;
320 #ifdef HAVE_PROTOBUF
321 boost::uuids::uuid d_uuid;
322 string d_requestorId;
323 string d_deviceId;
324 struct timeval d_kernelTimestamp{0,0};
325 #endif
326 std::string d_query;
327 std::vector<std::string> d_policyTags;
328 LuaContext::LuaObject d_data;
329 EDNSSubnetOpts d_ednssubnet;
330 shared_ptr<TCPConnection> d_tcpConnection;
331 int d_socket;
332 unsigned int d_tag{0};
333 uint32_t d_qhash{0};
334 uint32_t d_ttlCap{std::numeric_limits<uint32_t>::max()};
335 uint16_t d_ecsBegin{0};
336 uint16_t d_ecsEnd{0};
337 bool d_variable{false};
338 bool d_ecsFound{false};
339 bool d_ecsParsed{false};
340 bool d_tcp;
341 };
342
343 MT_t* getMT()
344 {
345 return MT ? MT.get() : nullptr;
346 }
347
348 ArgvMap &arg()
349 {
350 static ArgvMap theArg;
351 return theArg;
352 }
353
354 unsigned int getRecursorThreadId()
355 {
356 return t_id;
357 }
358
359 int getMTaskerTID()
360 {
361 return MT->getTid();
362 }
363
364 static bool isDistributorThread()
365 {
366 if (t_id == 0) {
367 return false;
368 }
369
370 return g_weDistributeQueries && s_threadInfos.at(t_id).isListener;
371 }
372
373 static bool isHandlerThread()
374 {
375 if (t_id == 0) {
376 return true;
377 }
378
379 return s_threadInfos.at(t_id).isHandler;
380 }
381
382 static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
383
384 // -1 is error, 0 is timeout, 1 is success
385 int asendtcp(const string& data, Socket* sock)
386 {
387 PacketID pident;
388 pident.sock=sock;
389 pident.outMSG=data;
390
391 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
392 string packet;
393
394 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
395
396 if(!ret || ret==-1) { // timeout
397 t_fdm->removeWriteFD(sock->getHandle());
398 }
399 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
400 return -1;
401 }
402 return ret;
403 }
404
405 static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
406
407 // -1 is error, 0 is timeout, 1 is success
408 int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
409 {
410 data.clear();
411 PacketID pident;
412 pident.sock=sock;
413 pident.inNeeded=len;
414 pident.inIncompleteOkay=incompleteOkay;
415 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
416
417 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
418 if(!ret || ret==-1) { // timeout
419 t_fdm->removeReadFD(sock->getHandle());
420 }
421 else if(data.empty()) {// error, EOF or other
422 return -1;
423 }
424
425 return ret;
426 }
427
428 static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
429 {
430 PacketID pident=*any_cast<PacketID>(&var);
431 char resp[512];
432 ComboAddress fromaddr;
433 socklen_t addrlen=sizeof(fromaddr);
434
435 ssize_t ret=recvfrom(fd, resp, sizeof(resp), 0, (sockaddr *)&fromaddr, &addrlen);
436 if (fromaddr != pident.remote) {
437 g_log<<Logger::Notice<<"Response received from the wrong remote host ("<<fromaddr.toStringWithPort()<<" instead of "<<pident.remote.toStringWithPort()<<"), discarding"<<endl;
438
439 }
440
441 t_fdm->removeReadFD(fd);
442 if(ret >= 0) {
443 string data(resp, (size_t) ret);
444 MT->sendEvent(pident, &data);
445 }
446 else {
447 string empty;
448 MT->sendEvent(pident, &empty);
449 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
450 }
451 }
452 string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
453 {
454 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
455 s.setNonBlocking();
456 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
457
458 s.bind(local);
459 s.connect(dest);
460 s.send(query);
461
462 PacketID pident;
463 pident.sock=&s;
464 pident.remote=dest;
465 pident.type=0;
466 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
467
468 string data;
469
470 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
471
472 if(!ret || ret==-1) { // timeout
473 t_fdm->removeReadFD(s.getHandle());
474 }
475 else if(data.empty()) {// error, EOF or other
476 // we could special case this
477 return data;
478 }
479 return data;
480 }
481
482 //! pick a random query local address
483 ComboAddress getQueryLocalAddress(int family, uint16_t port)
484 {
485 ComboAddress ret;
486 if(family==AF_INET) {
487 if(g_localQueryAddresses4.empty())
488 ret = g_local4;
489 else
490 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
491 ret.sin4.sin_port = htons(port);
492 }
493 else {
494 if(g_localQueryAddresses6.empty())
495 ret = g_local6;
496 else
497 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
498
499 ret.sin6.sin6_port = htons(port);
500 }
501 return ret;
502 }
503
504 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
505
506 static void setSocketBuffer(int fd, int optname, uint32_t size)
507 {
508 uint32_t psize=0;
509 socklen_t len=sizeof(psize);
510
511 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
512 g_log<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
513 return;
514 }
515
516 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
517 g_log<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
518 }
519
520
521 static void setSocketReceiveBuffer(int fd, uint32_t size)
522 {
523 setSocketBuffer(fd, SO_RCVBUF, size);
524 }
525
526 static void setSocketSendBuffer(int fd, uint32_t size)
527 {
528 setSocketBuffer(fd, SO_SNDBUF, size);
529 }
530
531
532 // you can ask this class for a UDP socket to send a query from
533 // this socket is not yours, don't even think about deleting it
534 // but after you call 'returnSocket' on it, don't assume anything anymore
535 class UDPClientSocks
536 {
537 unsigned int d_numsocks;
538 public:
539 UDPClientSocks() : d_numsocks(0)
540 {
541 }
542
543 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
544 int getSocket(const ComboAddress& toaddr, int* fd)
545 {
546 *fd=makeClientSocket(toaddr.sin4.sin_family);
547 if(*fd < 0) // temporary error - receive exception otherwise
548 return -2;
549
550 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
551 int err = errno;
552 try {
553 closesocket(*fd);
554 }
555 catch(const PDNSException& e) {
556 g_log<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
557 }
558
559 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
560 return -2;
561 return -1;
562 }
563
564 d_numsocks++;
565 return 0;
566 }
567
568 // return a socket to the pool, or simply erase it
569 void returnSocket(int fd)
570 {
571 try {
572 t_fdm->removeReadFD(fd);
573 }
574 catch(const FDMultiplexerException& e) {
575 // we sometimes return a socket that has not yet been assigned to t_fdm
576 }
577
578 try {
579 closesocket(fd);
580 }
581 catch(const PDNSException& e) {
582 g_log<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
583 }
584
585 --d_numsocks;
586 }
587
588 private:
589
590 // returns -1 for errors which might go away, throws for ones that won't
591 static int makeClientSocket(int family)
592 {
593 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
594
595 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
596 return ret;
597
598 if(ret<0)
599 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
600
601 // setCloseOnExec(ret); // we're not going to exec
602
603 int tries=10;
604 ComboAddress sin;
605 while(--tries) {
606 uint16_t port;
607
608 if(tries==1) // fall back to kernel 'random'
609 port = 0;
610 else {
611 do {
612 port = s_minUdpSourcePort + dns_random(s_maxUdpSourcePort - s_minUdpSourcePort + 1);
613 }
614 while (s_avoidUdpSourcePorts.count(port));
615 }
616
617 sin=getQueryLocalAddress(family, port); // does htons for us
618
619 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
620 break;
621 }
622
623 if(!tries) {
624 closesocket(ret);
625 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
626 }
627
628 try {
629 setReceiveSocketErrors(ret, family);
630 setNonBlocking(ret);
631 }
632 catch(...) {
633 closesocket(ret);
634 throw;
635 }
636
637 return ret;
638 }
639 };
640
641 static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
642
643 /* these two functions are used by LWRes */
644 // -2 is OS error, -1 is error that depends on the remote, > 0 is success
645 int asendto(const char *data, size_t len, int flags,
646 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
647 {
648
649 PacketID pident;
650 pident.domain = domain;
651 pident.remote = toaddr;
652 pident.type = qtype;
653
654 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
655 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
656
657 for(; chain.first != chain.second; chain.first++) {
658 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
659 /*
660 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
661 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
662 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
663 */
664 chain.first->key.chain.insert(id); // we can chain
665 *fd=-1; // gets used in waitEvent / sendEvent later on
666 return 1;
667 }
668 }
669
670 int ret=t_udpclientsocks->getSocket(toaddr, fd);
671 if(ret < 0)
672 return ret;
673
674 pident.fd=*fd;
675 pident.id=id;
676
677 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
678 ret = send(*fd, data, len, 0);
679
680 int tmp = errno;
681
682 if(ret < 0)
683 t_udpclientsocks->returnSocket(*fd);
684
685 errno = tmp; // this is for logging purposes only
686 return ret;
687 }
688
689 // -1 is error, 0 is timeout, 1 is success
690 int arecvfrom(std::string& packet, int flags, const ComboAddress& fromaddr, size_t *d_len,
691 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
692 {
693 static optional<unsigned int> nearMissLimit;
694 if(!nearMissLimit)
695 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
696
697 PacketID pident;
698 pident.fd=fd;
699 pident.id=id;
700 pident.domain=domain;
701 pident.type = qtype;
702 pident.remote=fromaddr;
703
704 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
705
706 /* -1 means error, 0 means timeout, 1 means a result from handleUDPServerResponse() which might still be an error */
707 if(ret > 0) {
708 /* handleUDPServerResponse() will close the socket for us no matter what */
709 if(packet.empty()) // means "error"
710 return -1;
711
712 *d_len=packet.size();
713
714 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
715 g_log<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
716 g_stats.spoofCount++;
717 return -1;
718 }
719 }
720 else {
721 /* getting there means error or timeout, it's up to us to close the socket */
722 if(fd >= 0)
723 t_udpclientsocks->returnSocket(fd);
724 }
725 return ret;
726 }
727
728 static void writePid(void)
729 {
730 if(!::arg().mustDo("write-pid"))
731 return;
732 ofstream of(s_pidfname.c_str(), std::ios_base::app);
733 if(of)
734 of<< Utility::getpid() <<endl;
735 else
736 g_log<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
737 }
738
739 TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : data(2, 0), d_remote(addr), d_fd(fd)
740 {
741 ++s_currentConnections;
742 (*t_tcpClientCounts)[d_remote]++;
743 }
744
745 TCPConnection::~TCPConnection()
746 {
747 try {
748 if(closesocket(d_fd) < 0)
749 g_log<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
750 }
751 catch(const PDNSException& e) {
752 g_log<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
753 }
754
755 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
756 t_tcpClientCounts->erase(d_remote);
757 --s_currentConnections;
758 }
759
760 AtomicCounter TCPConnection::s_currentConnections;
761
762 static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
763
764 // the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
765 static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
766 {
767 if(packetsize > 1000 && t_largeanswerremotes)
768 t_largeanswerremotes->push_back(remote);
769 switch(res) {
770 case RCode::ServFail:
771 if(t_servfailremotes) {
772 t_servfailremotes->push_back(remote);
773 if(query && t_servfailqueryring) // packet cache
774 t_servfailqueryring->push_back(make_pair(*query, qtype));
775 }
776 g_stats.servFails++;
777 break;
778 case RCode::NXDomain:
779 g_stats.nxDomains++;
780 break;
781 case RCode::NoError:
782 g_stats.noErrors++;
783 break;
784 }
785 }
786
787 static string makeLoginfo(const std::unique_ptr<DNSComboWriter>& dc)
788 try
789 {
790 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->getRemote())+")";
791 }
792 catch(...)
793 {
794 return "Exception making error message for exception";
795 }
796
797 #ifdef HAVE_PROTOBUF
798 static void protobufLogQuery(uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId)
799 {
800 if (!t_protobufServers) {
801 return;
802 }
803
804 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
805 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
806 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
807 message.setServerIdentity(SyncRes::s_serverID);
808 message.setEDNSSubnet(ednssubnet, ednssubnet.isIpv4() ? maskV4 : maskV6);
809 message.setRequestorId(requestorId);
810 message.setDeviceId(deviceId);
811
812 if (!policyTags.empty()) {
813 message.setPolicyTags(policyTags);
814 }
815
816 // cerr <<message.toDebugString()<<endl;
817 std::string str;
818 message.serialize(str);
819
820 for (auto& server : *t_protobufServers) {
821 server->queueData(str);
822 }
823 }
824
825 static void protobufLogResponse(const RecProtoBufMessage& message)
826 {
827 if (!t_protobufServers) {
828 return;
829 }
830
831 // cerr <<message.toDebugString()<<endl;
832 std::string str;
833 message.serialize(str);
834
835 for (auto& server : *t_protobufServers) {
836 server->queueData(str);
837 }
838 }
839 #endif
840
841 /**
842 * Chases the CNAME provided by the PolicyCustom RPZ policy.
843 *
844 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
845 * @param qtype: The QType of the original query
846 * @param sr: A SyncRes
847 * @param res: An integer that will contain the RCODE of the lookup we do
848 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
849 */
850 static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
851 {
852 if (spoofed.d_type == QType::CNAME) {
853 bool oldWantsRPZ = sr.getWantsRPZ();
854 sr.setWantsRPZ(false);
855 vector<DNSRecord> ans;
856 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, QClass::IN, ans);
857 for (const auto& rec : ans) {
858 if(rec.d_place == DNSResourceRecord::ANSWER) {
859 ret.push_back(rec);
860 }
861 }
862 // Reset the RPZ state of the SyncRes
863 sr.setWantsRPZ(oldWantsRPZ);
864 }
865 }
866
867 static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, uint32_t ttlCap, const uint16_t maxAnswerSize)
868 {
869 pw.startRecord(rec.d_name, rec.d_type, (rec.d_ttl > ttlCap ? ttlCap : rec.d_ttl), rec.d_class, rec.d_place);
870
871 if(rec.d_type != QType::OPT) // their TTL ain't real
872 minTTL = min(minTTL, rec.d_ttl);
873
874 rec.d_content->toPacket(pw);
875 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
876 pw.rollback();
877 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
878 pw.getHeader()->tc=1;
879 pw.truncate();
880 }
881 return false;
882 }
883
884 return true;
885 }
886
887 #ifdef HAVE_PROTOBUF
888 static std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> startProtobufServers(const ProtobufExportConfig& config)
889 {
890 auto result = std::make_shared<std::vector<std::unique_ptr<RemoteLogger>>>();
891
892 for (const auto& server : config.servers) {
893 try {
894 result->emplace_back(new RemoteLogger(server, config.timeout, 100*config.maxQueuedEntries, config.reconnectWaitTime, config.asyncConnect));
895 }
896 catch(const std::exception& e) {
897 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.what()<<endl;
898 }
899 catch(const PDNSException& e) {
900 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.reason<<endl;
901 }
902 }
903
904 return result;
905 }
906
907 static bool checkProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
908 {
909 if (!luaconfsLocal->protobufExportConfig.enabled) {
910 if (t_protobufServers) {
911 for (auto& server : *t_protobufServers) {
912 server->stop();
913 }
914 t_protobufServers.reset();
915 }
916
917 return false;
918 }
919
920 /* if the server was not running, or if it was running according to a
921 previous configuration */
922 if (!t_protobufServers ||
923 t_protobufServersGeneration < luaconfsLocal->generation) {
924
925 if (t_protobufServers) {
926 for (auto& server : *t_protobufServers) {
927 server->stop();
928 }
929 }
930 t_protobufServers.reset();
931
932 t_protobufServers = startProtobufServers(luaconfsLocal->protobufExportConfig);
933 t_protobufServersGeneration = luaconfsLocal->generation;
934 }
935
936 return true;
937 }
938
939 static bool checkOutgoingProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
940 {
941 if (!luaconfsLocal->outgoingProtobufExportConfig.enabled) {
942 if (t_outgoingProtobufServers) {
943 for (auto& server : *t_outgoingProtobufServers) {
944 server->stop();
945 }
946 }
947 t_outgoingProtobufServers.reset();
948
949 return false;
950 }
951
952 /* if the server was not running, or if it was running according to a
953 previous configuration */
954 if (!t_outgoingProtobufServers ||
955 t_outgoingProtobufServersGeneration < luaconfsLocal->generation) {
956
957 if (t_outgoingProtobufServers) {
958 for (auto& server : *t_outgoingProtobufServers) {
959 server->stop();
960 }
961 }
962 t_outgoingProtobufServers.reset();
963
964 t_outgoingProtobufServers = startProtobufServers(luaconfsLocal->outgoingProtobufExportConfig);
965 t_outgoingProtobufServersGeneration = luaconfsLocal->generation;
966 }
967
968 return true;
969 }
970
971 #ifdef HAVE_FSTRM
972
973 static std::shared_ptr<std::vector<std::unique_ptr<FrameStreamLogger>>> startFrameStreamServers(const FrameStreamExportConfig& config)
974 {
975 auto result = std::make_shared<std::vector<std::unique_ptr<FrameStreamLogger>>>();
976
977 for (const auto& server : config.servers) {
978 try {
979 std::unordered_map<string,unsigned> options;
980 options["bufferHint"] = config.bufferHint;
981 options["flushTimeout"] = config.flushTimeout;
982 options["inputQueueSize"] = config.inputQueueSize;
983 options["outputQueueSize"] = config.outputQueueSize;
984 options["queueNotifyThreshold"] = config.queueNotifyThreshold;
985 options["reopenInterval"] = config.reopenInterval;
986 FrameStreamLogger *fsl = nullptr;
987 try {
988 ComboAddress address(server);
989 fsl = new FrameStreamLogger(address.sin4.sin_family, address.toStringWithPort(), true, options);
990 }
991 catch (const PDNSException& e) {
992 fsl = new FrameStreamLogger(AF_UNIX, server, true, options);
993 }
994 fsl->setLogQueries(config.logQueries);
995 fsl->setLogResponses(config.logResponses);
996 result->emplace_back(fsl);
997 }
998 catch(const std::exception& e) {
999 g_log<<Logger::Error<<"Error while starting dnstap framestream logger to '"<<server<<": "<<e.what()<<endl;
1000 }
1001 catch(const PDNSException& e) {
1002 g_log<<Logger::Error<<"Error while starting dnstap framestream logger to '"<<server<<": "<<e.reason<<endl;
1003 }
1004 }
1005
1006 return result;
1007 }
1008
1009 static bool checkFrameStreamExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
1010 {
1011 if (!luaconfsLocal->frameStreamExportConfig.enabled) {
1012 if (t_frameStreamServers) {
1013 // dt's take care of cleanup
1014 t_frameStreamServers.reset();
1015 }
1016
1017 return false;
1018 }
1019
1020 /* if the server was not running, or if it was running according to a
1021 previous configuration */
1022 if (!t_frameStreamServers ||
1023 t_frameStreamServersGeneration < luaconfsLocal->generation) {
1024
1025 if (t_frameStreamServers) {
1026 // dt's take care of cleanup
1027 t_frameStreamServers.reset();
1028 }
1029
1030 t_frameStreamServers = startFrameStreamServers(luaconfsLocal->frameStreamExportConfig);
1031 t_frameStreamServersGeneration = luaconfsLocal->generation;
1032 }
1033
1034 return true;
1035 }
1036 #endif /* HAVE_FSTRM */
1037 #endif /* HAVE_PROTOBUF */
1038
1039 #ifdef NOD_ENABLED
1040 static bool nodCheckNewDomain(const DNSName& dname)
1041 {
1042 static const QType qt(QType::A);
1043 static const uint16_t qc(QClass::IN);
1044 bool ret = false;
1045 // First check the (sub)domain isn't whitelisted for NOD purposes
1046 if (!g_nodDomainWL.check(dname)) {
1047 // Now check the NODDB (note this is probablistic so can have FNs/FPs)
1048 if (t_nodDBp && t_nodDBp->isNewDomain(dname)) {
1049 if (g_nodLog) {
1050 // This should probably log to a dedicated log file
1051 g_log<<Logger::Notice<<"Newly observed domain nod="<<dname.toLogString()<<endl;
1052 }
1053 if (!(g_nodLookupDomain.isRoot())) {
1054 // Send a DNS A query to <domain>.g_nodLookupDomain
1055 DNSName qname = dname;
1056 vector<DNSRecord> dummy;
1057 qname += g_nodLookupDomain;
1058 directResolve(qname, qt, qc, dummy);
1059 }
1060 ret = true;
1061 }
1062 }
1063 return ret;
1064 }
1065
1066 static void nodAddDomain(const DNSName& dname)
1067 {
1068 // Don't bother adding domains on the nod whitelist
1069 if (!g_nodDomainWL.check(dname)) {
1070 if (t_nodDBp) {
1071 // This keeps the nod info up to date
1072 t_nodDBp->addDomain(dname);
1073 }
1074 }
1075 }
1076
1077 static bool udrCheckUniqueDNSRecord(const DNSName& dname, uint16_t qtype, const DNSRecord& record)
1078 {
1079 bool ret = false;
1080 if (record.d_place == DNSResourceRecord::ANSWER ||
1081 record.d_place == DNSResourceRecord::ADDITIONAL) {
1082 // Create a string that represent a triplet of (qname, qtype and RR[type, name, content])
1083 std::stringstream ss;
1084 ss << dname.toDNSStringLC() << ":" << qtype << ":" << qtype << ":" << record.d_type << ":" << record.d_name.toDNSStringLC() << ":" << record.d_content->getZoneRepresentation();
1085 if (t_udrDBp && t_udrDBp->isUniqueResponse(ss.str())) {
1086 if (g_udrLog) {
1087 // This should also probably log to a dedicated file.
1088 g_log<<Logger::Notice<<"Unique response observed: qname="<<dname.toLogString()<<" qtype="<<QType(qtype).getName()<< " rrtype=" << QType(record.d_type).getName() << " rrname=" << record.d_name.toLogString() << " rrcontent=" << record.d_content->getZoneRepresentation() << endl;
1089 }
1090 ret = true;
1091 }
1092 }
1093 return ret;
1094 }
1095 #endif /* NOD_ENABLED */
1096
1097 static void startDoResolve(void *p)
1098 {
1099 auto dc=std::unique_ptr<DNSComboWriter>(reinterpret_cast<DNSComboWriter*>(p));
1100 try {
1101 if (t_queryring)
1102 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
1103
1104 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
1105 EDNSOpts edo;
1106 std::vector<pair<uint16_t, string> > ednsOpts;
1107 bool variableAnswer = dc->d_variable;
1108 bool haveEDNS=false;
1109 #ifdef NOD_ENABLED
1110 bool hasUDR = false;
1111 #endif /* NOD_ENABLED */
1112 DNSPacketWriter::optvect_t returnedEdnsOptions; // Here we stuff all the options for the return packet
1113 uint8_t ednsExtRCode = 0;
1114 if(getEDNSOpts(dc->d_mdp, &edo)) {
1115 haveEDNS=true;
1116 if (edo.d_version != 0) {
1117 ednsExtRCode = ERCode::BADVERS;
1118 }
1119
1120 if(!dc->d_tcp) {
1121 /* rfc6891 6.2.3:
1122 "Values lower than 512 MUST be treated as equal to 512."
1123 */
1124 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
1125 }
1126 ednsOpts = edo.d_options;
1127 maxanswersize -= 11; // EDNS header size
1128
1129 for (const auto& o : edo.d_options) {
1130 if (o.first == EDNSOptionCode::ECS && g_useIncomingECS && !dc->d_ecsParsed) {
1131 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
1132 } else if (o.first == EDNSOptionCode::NSID) {
1133 const static string mode_server_id = ::arg()["server-id"];
1134 if(mode_server_id != "disabled" && !mode_server_id.empty() &&
1135 maxanswersize > (2 + 2 + mode_server_id.size())) {
1136 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::NSID, mode_server_id));
1137 variableAnswer = true; // Can't packetcache an answer with NSID
1138 // Option Code and Option Length are both 2
1139 maxanswersize -= 2 + 2 + mode_server_id.size();
1140 }
1141 }
1142 }
1143 }
1144 /* perhaps there was no EDNS or no ECS but by now we looked */
1145 dc->d_ecsParsed = true;
1146 vector<DNSRecord> ret;
1147 vector<uint8_t> packet;
1148
1149 auto luaconfsLocal = g_luaconfs.getLocal();
1150 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
1151 bool wantsRPZ(true);
1152 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
1153 bool logResponse = false;
1154 #ifdef HAVE_PROTOBUF
1155 if (checkProtobufExport(luaconfsLocal)) {
1156 logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
1157 Netmask requestorNM(dc->d_source, dc->d_source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1158 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
1159 pbMessage = RecProtoBufMessage(RecProtoBufMessage::Response, dc->d_uuid, &requestor, &dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass, dc->d_mdp.d_header.id, dc->d_tcp, 0);
1160 pbMessage->setServerIdentity(SyncRes::s_serverID);
1161 pbMessage->setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1162 }
1163 #endif /* HAVE_PROTOBUF */
1164
1165 #ifdef HAVE_FSTRM
1166 checkFrameStreamExport(luaconfsLocal);
1167 #endif
1168
1169 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
1170
1171 pw.getHeader()->aa=0;
1172 pw.getHeader()->ra=1;
1173 pw.getHeader()->qr=1;
1174 pw.getHeader()->tc=0;
1175 pw.getHeader()->id=dc->d_mdp.d_header.id;
1176 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
1177 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
1178
1179 /* This is the lowest TTL seen in the records of the response,
1180 so we can't cache it for longer than this value.
1181 If we have a TTL cap, this value can't be larger than the
1182 cap no matter what. */
1183 uint32_t minTTL = dc->d_ttlCap;
1184
1185 SyncRes sr(dc->d_now);
1186
1187 bool DNSSECOK=false;
1188 if(t_pdl) {
1189 sr.setLuaEngine(t_pdl);
1190 }
1191 if(g_dnssecmode != DNSSECMode::Off) {
1192 sr.setDoDNSSEC(true);
1193
1194 // Does the requestor want DNSSEC records?
1195 if(edo.d_extFlags & EDNSOpts::DNSSECOK) {
1196 DNSSECOK=true;
1197 g_stats.dnssecQueries++;
1198 }
1199 if (dc->d_mdp.d_header.cd) {
1200 /* Per rfc6840 section 5.9, "When processing a request with
1201 the Checking Disabled (CD) bit set, a resolver SHOULD attempt
1202 to return all response data, even data that has failed DNSSEC
1203 validation. */
1204 ++g_stats.dnssecCheckDisabledQueries;
1205 }
1206 if (dc->d_mdp.d_header.ad) {
1207 /* Per rfc6840 section 5.7, "the AD bit in a query as a signal
1208 indicating that the requester understands and is interested in the
1209 value of the AD bit in the response. This allows a requester to
1210 indicate that it understands the AD bit without also requesting
1211 DNSSEC data via the DO bit. */
1212 ++g_stats.dnssecAuthenticDataQueries;
1213 }
1214 } else {
1215 // Ignore the client-set CD flag
1216 pw.getHeader()->cd=0;
1217 }
1218 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
1219
1220 #ifdef HAVE_PROTOBUF
1221 sr.setInitialRequestId(dc->d_uuid);
1222 sr.setOutgoingProtobufServers(t_outgoingProtobufServers);
1223 #endif
1224 #ifdef HAVE_FSTRM
1225 sr.setFrameStreamServers(t_frameStreamServers);
1226 #endif
1227 sr.setQuerySource(dc->d_remote, g_useIncomingECS && !dc->d_ednssubnet.source.empty() ? boost::optional<const EDNSSubnetOpts&>(dc->d_ednssubnet) : boost::none);
1228
1229 bool tracedQuery=false; // we could consider letting Lua know about this too
1230 bool shouldNotValidate = false;
1231
1232 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
1233 int res = RCode::NoError;
1234 DNSFilterEngine::Policy appliedPolicy;
1235 std::vector<DNSRecord> spoofed;
1236 RecursorLua4::DNSQuestion dq(dc->d_source, dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ, logResponse);
1237 dq.ednsFlags = &edo.d_extFlags;
1238 dq.ednsOptions = &ednsOpts;
1239 dq.tag = dc->d_tag;
1240 dq.discardedPolicies = &sr.d_discardedPolicies;
1241 dq.policyTags = &dc->d_policyTags;
1242 dq.appliedPolicy = &appliedPolicy;
1243 dq.currentRecords = &ret;
1244 dq.dh = &dc->d_mdp.d_header;
1245 dq.data = dc->d_data;
1246 #ifdef HAVE_PROTOBUF
1247 dq.requestorId = dc->d_requestorId;
1248 dq.deviceId = dc->d_deviceId;
1249 #endif
1250
1251 if(ednsExtRCode != 0) {
1252 goto sendit;
1253 }
1254
1255 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
1256 pw.getHeader()->tc = 1;
1257 res = 0;
1258 variableAnswer = true;
1259 goto sendit;
1260 }
1261
1262 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
1263 sr.setLogMode(SyncRes::Store);
1264 tracedQuery=true;
1265 }
1266
1267
1268 if(!g_quiet || tracedQuery) {
1269 g_log<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
1270 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
1271 if(!dc->d_ednssubnet.source.empty()) {
1272 g_log<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
1273 }
1274 g_log<<endl;
1275 }
1276
1277 sr.setId(MT->getTid());
1278 if(!dc->d_mdp.d_header.rd)
1279 sr.setCacheOnly();
1280
1281 if (t_pdl) {
1282 t_pdl->prerpz(dq, res);
1283 }
1284
1285 // Check if the query has a policy attached to it
1286 if (wantsRPZ) {
1287 appliedPolicy = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_source, sr.d_discardedPolicies);
1288 }
1289
1290 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
1291 if(!t_pdl || !t_pdl->preresolve(dq, res)) {
1292
1293 sr.setWantsRPZ(wantsRPZ);
1294 if(wantsRPZ) {
1295 switch(appliedPolicy.d_kind) {
1296 case DNSFilterEngine::PolicyKind::NoAction:
1297 break;
1298 case DNSFilterEngine::PolicyKind::Drop:
1299 g_stats.policyDrops++;
1300 g_stats.policyResults[appliedPolicy.d_kind]++;
1301 return;
1302 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1303 g_stats.policyResults[appliedPolicy.d_kind]++;
1304 res=RCode::NXDomain;
1305 goto haveAnswer;
1306 case DNSFilterEngine::PolicyKind::NODATA:
1307 g_stats.policyResults[appliedPolicy.d_kind]++;
1308 res=RCode::NoError;
1309 goto haveAnswer;
1310 case DNSFilterEngine::PolicyKind::Custom:
1311 g_stats.policyResults[appliedPolicy.d_kind]++;
1312 res=RCode::NoError;
1313 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1314 for (const auto& dr : spoofed) {
1315 ret.push_back(dr);
1316 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1317 }
1318 goto haveAnswer;
1319 case DNSFilterEngine::PolicyKind::Truncate:
1320 if(!dc->d_tcp) {
1321 g_stats.policyResults[appliedPolicy.d_kind]++;
1322 res=RCode::NoError;
1323 pw.getHeader()->tc=1;
1324 goto haveAnswer;
1325 }
1326 break;
1327 }
1328 }
1329
1330 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
1331 try {
1332 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
1333 shouldNotValidate = sr.wasOutOfBand();
1334 }
1335 catch(ImmediateServFailException &e) {
1336 if(g_logCommonErrors)
1337 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
1338 res = RCode::ServFail;
1339 }
1340
1341 dq.validationState = sr.getValidationState();
1342
1343 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
1344 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
1345 appliedPolicy = sr.d_appliedPolicy;
1346 g_stats.policyResults[appliedPolicy.d_kind]++;
1347 switch(appliedPolicy.d_kind) {
1348 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
1349 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
1350 case DNSFilterEngine::PolicyKind::Drop:
1351 g_stats.policyDrops++;
1352 return;
1353 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1354 ret.clear();
1355 res=RCode::NXDomain;
1356 goto haveAnswer;
1357
1358 case DNSFilterEngine::PolicyKind::NODATA:
1359 ret.clear();
1360 res=RCode::NoError;
1361 goto haveAnswer;
1362
1363 case DNSFilterEngine::PolicyKind::Truncate:
1364 if(!dc->d_tcp) {
1365 ret.clear();
1366 res=RCode::NoError;
1367 pw.getHeader()->tc=1;
1368 goto haveAnswer;
1369 }
1370 break;
1371
1372 case DNSFilterEngine::PolicyKind::Custom:
1373 ret.clear();
1374 res=RCode::NoError;
1375 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1376 for (const auto& dr : spoofed) {
1377 ret.push_back(dr);
1378 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1379 }
1380 goto haveAnswer;
1381 }
1382 }
1383
1384 if (wantsRPZ) {
1385 appliedPolicy = luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies);
1386 }
1387
1388 if(t_pdl) {
1389 if(res == RCode::NoError) {
1390 auto i=ret.cbegin();
1391 for(; i!= ret.cend(); ++i)
1392 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
1393 break;
1394 if(i == ret.cend() && t_pdl->nodata(dq, res))
1395 shouldNotValidate = true;
1396
1397 }
1398 else if(res == RCode::NXDomain && t_pdl->nxdomain(dq, res))
1399 shouldNotValidate = true;
1400
1401 if(t_pdl->postresolve(dq, res))
1402 shouldNotValidate = true;
1403 }
1404
1405 if (wantsRPZ) { //XXX This block is repeated, see above
1406 g_stats.policyResults[appliedPolicy.d_kind]++;
1407 switch(appliedPolicy.d_kind) {
1408 case DNSFilterEngine::PolicyKind::NoAction:
1409 break;
1410 case DNSFilterEngine::PolicyKind::Drop:
1411 g_stats.policyDrops++;
1412 return;
1413 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1414 ret.clear();
1415 res=RCode::NXDomain;
1416 goto haveAnswer;
1417
1418 case DNSFilterEngine::PolicyKind::NODATA:
1419 ret.clear();
1420 res=RCode::NoError;
1421 goto haveAnswer;
1422
1423 case DNSFilterEngine::PolicyKind::Truncate:
1424 if(!dc->d_tcp) {
1425 ret.clear();
1426 res=RCode::NoError;
1427 pw.getHeader()->tc=1;
1428 goto haveAnswer;
1429 }
1430 break;
1431
1432 case DNSFilterEngine::PolicyKind::Custom:
1433 ret.clear();
1434 res=RCode::NoError;
1435 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1436 for (const auto& dr : spoofed) {
1437 ret.push_back(dr);
1438 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1439 }
1440 goto haveAnswer;
1441 }
1442 }
1443 }
1444 haveAnswer:;
1445 if(res == PolicyDecision::DROP) {
1446 g_stats.policyDrops++;
1447 return;
1448 }
1449 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1450 {
1451 string trace(sr.getTrace());
1452 if(!trace.empty()) {
1453 vector<string> lines;
1454 boost::split(lines, trace, boost::is_any_of("\n"));
1455 for(const string& line : lines) {
1456 if(!line.empty())
1457 g_log<<Logger::Warning<< line << endl;
1458 }
1459 }
1460 }
1461
1462 if(res == -1) {
1463 pw.getHeader()->rcode=RCode::ServFail;
1464 // no commit here, because no record
1465 g_stats.servFails++;
1466 }
1467 else {
1468 pw.getHeader()->rcode=res;
1469
1470 // Does the validation mode or query demand validation?
1471 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1472 try {
1473 if(sr.doLog()) {
1474 g_log<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<endl;
1475 }
1476
1477 auto state = sr.getValidationState();
1478
1479 if(state == Secure) {
1480 if(sr.doLog()) {
1481 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates correctly"<<endl;
1482 }
1483
1484 // Is the query source interested in the value of the ad-bit?
1485 if (dc->d_mdp.d_header.ad || DNSSECOK)
1486 pw.getHeader()->ad=1;
1487 }
1488 else if(state == Insecure) {
1489 if(sr.doLog()) {
1490 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Insecure"<<endl;
1491 }
1492
1493 pw.getHeader()->ad=0;
1494 }
1495 else if(state == Bogus) {
1496 if(t_bogusremotes)
1497 t_bogusremotes->push_back(dc->d_source);
1498 if(t_bogusqueryring)
1499 t_bogusqueryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
1500 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
1501 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Bogus"<<endl;
1502 }
1503
1504 // Does the query or validation mode sending out a SERVFAIL on validation errors?
1505 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
1506 if(sr.doLog()) {
1507 g_log<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
1508 }
1509
1510 pw.getHeader()->rcode=RCode::ServFail;
1511 goto sendit;
1512 } else {
1513 if(sr.doLog()) {
1514 g_log<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
1515 }
1516 }
1517 }
1518 }
1519 catch(ImmediateServFailException &e) {
1520 if(g_logCommonErrors)
1521 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
1522 pw.getHeader()->rcode=RCode::ServFail;
1523 goto sendit;
1524 }
1525 }
1526
1527 if(ret.size()) {
1528 orderAndShuffle(ret);
1529 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_source)) {
1530 stable_sort(ret.begin(), ret.end(), *sl);
1531 variableAnswer=true;
1532 }
1533 }
1534
1535 bool needCommit = false;
1536 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
1537 if( ! DNSSECOK &&
1538 ( i->d_type == QType::NSEC3 ||
1539 (
1540 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1541 (
1542 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1543 i->d_place != DNSResourceRecord::ANSWER
1544 )
1545 )
1546 )
1547 ) {
1548 continue;
1549 }
1550
1551 if (!addRecordToPacket(pw, *i, minTTL, dc->d_ttlCap, maxanswersize)) {
1552 needCommit = false;
1553 break;
1554 }
1555 needCommit = true;
1556
1557 #ifdef NOD_ENABLED
1558 bool udr = false;
1559 if (g_udrEnabled) {
1560 udr = udrCheckUniqueDNSRecord(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, *i);
1561 if (!hasUDR && udr)
1562 hasUDR = true;
1563 }
1564 #endif /* NOD ENABLED */
1565
1566 #ifdef HAVE_PROTOBUF
1567 if (t_protobufServers) {
1568 #ifdef NOD_ENABLED
1569 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes, udr);
1570 #else
1571 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes);
1572 #endif /* NOD_ENABLED */
1573 }
1574 #endif
1575 }
1576 if(needCommit)
1577 pw.commit();
1578 }
1579 sendit:;
1580
1581 if(g_useIncomingECS && dc->d_ecsFound && !sr.wasVariable() && !variableAnswer) {
1582 // cerr<<"Stuffing in a 0 scope because answer is static"<<endl;
1583 EDNSSubnetOpts eo;
1584 eo.source = dc->d_ednssubnet.source;
1585 ComboAddress sa;
1586 sa.reset();
1587 sa.sin4.sin_family = eo.source.getNetwork().sin4.sin_family;
1588 eo.scope = Netmask(sa, 0);
1589
1590 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::ECS, makeEDNSSubnetOptsString(eo)));
1591 }
1592
1593 if (haveEDNS) {
1594 /* we try to add the EDNS OPT RR even for truncated answers,
1595 as rfc6891 states:
1596 "The minimal response MUST be the DNS header, question section, and an
1597 OPT record. This MUST also occur when a truncated response (using
1598 the DNS header's TC bit) is returned."
1599 */
1600 pw.addOpt(512, ednsExtRCode, DNSSECOK ? EDNSOpts::DNSSECOK : 0, returnedEdnsOptions);
1601 pw.commit();
1602 }
1603
1604 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
1605 updateResponseStats(res, dc->d_source, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1606 #ifdef NOD_ENABLED
1607 bool nod = false;
1608 if (g_nodEnabled) {
1609 if (nodCheckNewDomain(dc->d_mdp.d_qname))
1610 nod = true;
1611 }
1612 #endif /* NOD_ENABLED */
1613 #ifdef HAVE_PROTOBUF
1614 if (t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && (!appliedPolicy.d_name || appliedPolicy.d_name->empty()) && dc->d_policyTags.empty())) {
1615 pbMessage->setBytes(packet.size());
1616 pbMessage->setResponseCode(pw.getHeader()->rcode);
1617 if (appliedPolicy.d_name) {
1618 pbMessage->setAppliedPolicy(*appliedPolicy.d_name);
1619 pbMessage->setAppliedPolicyType(appliedPolicy.d_type);
1620 }
1621 pbMessage->setPolicyTags(dc->d_policyTags);
1622 if (g_useKernelTimestamp && dc->d_kernelTimestamp.tv_sec) {
1623 pbMessage->setQueryTime(dc->d_kernelTimestamp.tv_sec, dc->d_kernelTimestamp.tv_usec);
1624 }
1625 else {
1626 pbMessage->setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1627 }
1628 pbMessage->setRequestorId(dq.requestorId);
1629 pbMessage->setDeviceId(dq.deviceId);
1630 #ifdef NOD_ENABLED
1631 if (g_nodEnabled) {
1632 if (nod) {
1633 pbMessage->setNOD(true);
1634 pbMessage->addPolicyTag(g_nod_pbtag);
1635 }
1636 if (hasUDR) {
1637 pbMessage->addPolicyTag(g_udr_pbtag);
1638 }
1639 }
1640 #endif /* NOD_ENABLED */
1641 protobufLogResponse(*pbMessage);
1642 #ifdef NOD_ENABLED
1643 if (g_nodEnabled) {
1644 pbMessage->setNOD(false);
1645 pbMessage->clearUDR();
1646 if (nod)
1647 pbMessage->removePolicyTag(g_nod_pbtag);
1648 if (hasUDR)
1649 pbMessage->removePolicyTag(g_udr_pbtag);
1650 }
1651 #endif /* NOD_ENABLED */
1652 }
1653 #endif
1654 if(!dc->d_tcp) {
1655 struct msghdr msgh;
1656 struct iovec iov;
1657 char cbuf[256];
1658 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
1659 msgh.msg_control=NULL;
1660
1661 if(g_fromtosockets.count(dc->d_socket)) {
1662 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
1663 }
1664 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
1665 g_log<<Logger::Warning<<"Sending UDP reply to client "<<dc->getRemote()<<" failed with: "<<strerror(errno)<<endl;
1666
1667 if(variableAnswer || sr.wasVariable()) {
1668 g_stats.variableResponses++;
1669 }
1670 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
1671 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, std::move(dc->d_query), dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
1672 string((const char*)&*packet.begin(), packet.size()),
1673 g_now.tv_sec,
1674 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
1675 min(minTTL,SyncRes::s_packetcachettl),
1676 dq.validationState,
1677 dc->d_ecsBegin,
1678 dc->d_ecsEnd,
1679 std::move(pbMessage));
1680 }
1681 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
1682 }
1683 else {
1684 char buf[2];
1685 buf[0]=packet.size()/256;
1686 buf[1]=packet.size()%256;
1687
1688 Utility::iovec iov[2];
1689
1690 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1691 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
1692
1693 int wret=Utility::writev(dc->d_socket, iov, 2);
1694 bool hadError=true;
1695
1696 if(wret == 0)
1697 g_log<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
1698 else if(wret < 0 )
1699 g_log<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
1700 else if((unsigned int)wret != 2 + packet.size())
1701 g_log<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
1702 else
1703 hadError=false;
1704
1705 // update tcp connection status, either by closing or moving to 'BYTE0'
1706
1707 if(hadError) {
1708 // no need to remove us from FDM, we weren't there
1709 dc->d_socket = -1;
1710 }
1711 else {
1712 dc->d_tcpConnection->queriesCount++;
1713 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1714 dc->d_socket = -1;
1715 }
1716 else {
1717 dc->d_tcpConnection->state=TCPConnection::BYTE0;
1718 Utility::gettimeofday(&g_now, 0); // needs to be updated
1719 struct timeval ttd = g_now;
1720 ttd.tv_sec += g_tcpTimeout;
1721
1722 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection, &ttd);
1723 }
1724 }
1725 }
1726 float spent=makeFloat(sr.getNow()-dc->d_now);
1727 if(!g_quiet) {
1728 g_log<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1729 g_log<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
1730 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
1731 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
1732
1733 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1734 g_log<< ", dnssec="<<vStates[sr.getValidationState()];
1735 }
1736
1737 g_log<<endl;
1738
1739 }
1740
1741 if (sr.d_outqueries || sr.d_authzonequeries) {
1742 t_RC->cacheMisses++;
1743 }
1744 else {
1745 t_RC->cacheHits++;
1746 }
1747
1748 if(spent < 0.001)
1749 g_stats.answers0_1++;
1750 else if(spent < 0.010)
1751 g_stats.answers1_10++;
1752 else if(spent < 0.1)
1753 g_stats.answers10_100++;
1754 else if(spent < 1.0)
1755 g_stats.answers100_1000++;
1756 else
1757 g_stats.answersSlow++;
1758
1759 uint64_t newLat=(uint64_t)(spent*1000000);
1760 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
1761 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
1762 // no worries, we do this for packet cache hits elsewhere
1763
1764 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
1765 if(ourtime < 1)
1766 g_stats.ourtime0_1++;
1767 else if(ourtime < 2)
1768 g_stats.ourtime1_2++;
1769 else if(ourtime < 4)
1770 g_stats.ourtime2_4++;
1771 else if(ourtime < 8)
1772 g_stats.ourtime4_8++;
1773 else if(ourtime < 16)
1774 g_stats.ourtime8_16++;
1775 else if(ourtime < 32)
1776 g_stats.ourtime16_32++;
1777 else {
1778 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1779 g_stats.ourtimeSlow++;
1780 }
1781 if(ourtime >= 0.0) {
1782 newLat=ourtime*1000; // usec
1783 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
1784 }
1785 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
1786 }
1787 catch(PDNSException &ae) {
1788 g_log<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
1789 }
1790 catch(const MOADNSException &mde) {
1791 g_log<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<mde.what()<<endl;
1792 }
1793 catch(std::exception& e) {
1794 g_log<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
1795
1796 // Luawrapper nests the exception from Lua, so we unnest it here
1797 try {
1798 std::rethrow_if_nested(e);
1799 } catch(const std::exception& ne) {
1800 g_log<<". Extra info: "<<ne.what();
1801 } catch(...) {}
1802
1803 g_log<<endl;
1804 }
1805 catch(...) {
1806 g_log<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
1807 }
1808
1809 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
1810 }
1811
1812 static void makeControlChannelSocket(int processNum=-1)
1813 {
1814 string sockname=::arg()["socket-dir"]+"/"+s_programname;
1815 if(processNum >= 0)
1816 sockname += "."+std::to_string(processNum);
1817 sockname+=".controlsocket";
1818 s_rcc.listen(sockname);
1819
1820 int sockowner = -1;
1821 int sockgroup = -1;
1822
1823 if (!::arg().isEmpty("socket-group"))
1824 sockgroup=::arg().asGid("socket-group");
1825 if (!::arg().isEmpty("socket-owner"))
1826 sockowner=::arg().asUid("socket-owner");
1827
1828 if (sockgroup > -1 || sockowner > -1) {
1829 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1830 unixDie("Failed to chown control socket");
1831 }
1832 }
1833
1834 // do mode change if socket-mode is given
1835 if(!::arg().isEmpty("socket-mode")) {
1836 mode_t sockmode=::arg().asMode("socket-mode");
1837 if(chmod(sockname.c_str(), sockmode) < 0) {
1838 unixDie("Failed to chmod control socket");
1839 }
1840 }
1841 }
1842
1843 static void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass,
1844 bool& foundECS, EDNSSubnetOpts* ednssubnet, EDNSOptionViewMap* options,
1845 bool& foundXPF, ComboAddress* xpfSource, ComboAddress* xpfDest)
1846 {
1847 const bool lookForXPF = xpfSource != nullptr && g_xpfRRCode != 0;
1848 const bool lookForECS = ednssubnet != nullptr;
1849 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(question.c_str());
1850 size_t questionLen = question.length();
1851 unsigned int consumed=0;
1852 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1853
1854 size_t pos= sizeof(dnsheader)+consumed+4;
1855 const size_t headerSize = /* root */ 1 + sizeof(dnsrecordheader);
1856 const uint16_t arcount = ntohs(dh->arcount);
1857
1858 for (uint16_t arpos = 0; arpos < arcount && questionLen > (pos + headerSize) && ((lookForECS && !foundECS) || (lookForXPF && !foundXPF)); arpos++) {
1859 if (question.at(pos) != 0) {
1860 /* not an OPT or a XPF, bye. */
1861 return;
1862 }
1863
1864 pos += 1;
1865 const dnsrecordheader* drh = reinterpret_cast<const dnsrecordheader*>(&question.at(pos));
1866 pos += sizeof(dnsrecordheader);
1867
1868 if (pos >= questionLen) {
1869 return;
1870 }
1871
1872 /* OPT root label (1) followed by type (2) */
1873 if(lookForECS && ntohs(drh->d_type) == QType::OPT) {
1874 if (!options) {
1875 char* ecsStart = nullptr;
1876 size_t ecsLen = 0;
1877 /* we need to pass the record len */
1878 int res = getEDNSOption(const_cast<char*>(reinterpret_cast<const char*>(&question.at(pos - sizeof(drh->d_clen)))), questionLen - pos + sizeof(drh->d_clen), EDNSOptionCode::ECS, &ecsStart, &ecsLen);
1879 if (res == 0 && ecsLen > 4) {
1880 EDNSSubnetOpts eso;
1881 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1882 *ednssubnet=eso;
1883 foundECS = true;
1884 }
1885 }
1886 }
1887 else {
1888 /* we need to pass the record len */
1889 int res = getEDNSOptions(reinterpret_cast<const char*>(&question.at(pos -sizeof(drh->d_clen))), questionLen - pos + (sizeof(drh->d_clen)), *options);
1890 if (res == 0) {
1891 const auto& it = options->find(EDNSOptionCode::ECS);
1892 if (it != options->end() && !it->second.values.empty() && it->second.values.at(0).content != nullptr && it->second.values.at(0).size > 0) {
1893 EDNSSubnetOpts eso;
1894 if(getEDNSSubnetOptsFromString(it->second.values.at(0).content, it->second.values.at(0).size, &eso)) {
1895 *ednssubnet=eso;
1896 foundECS = true;
1897 }
1898 }
1899 }
1900 }
1901 }
1902 else if (lookForXPF && ntohs(drh->d_type) == g_xpfRRCode && ntohs(drh->d_class) == QClass::IN && drh->d_ttl == 0) {
1903 if ((questionLen - pos) < ntohs(drh->d_clen)) {
1904 return;
1905 }
1906
1907 foundXPF = parseXPFPayload(reinterpret_cast<const char*>(&question.at(pos)), ntohs(drh->d_clen), *xpfSource, xpfDest);
1908 }
1909
1910 pos += ntohs(drh->d_clen);
1911 }
1912 }
1913
1914 static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
1915 {
1916 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
1917
1918 if(conn->state==TCPConnection::BYTE0) {
1919 ssize_t bytes=recv(conn->getFD(), &conn->data[0], 2, 0);
1920 if(bytes==1)
1921 conn->state=TCPConnection::BYTE1;
1922 if(bytes==2) {
1923 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
1924 conn->data.resize(conn->qlen);
1925 conn->bytesread=0;
1926 conn->state=TCPConnection::GETQUESTION;
1927 }
1928 if(!bytes || bytes < 0) {
1929 t_fdm->removeReadFD(fd);
1930 return;
1931 }
1932 }
1933 else if(conn->state==TCPConnection::BYTE1) {
1934 ssize_t bytes=recv(conn->getFD(), &conn->data[1], 1, 0);
1935 if(bytes==1) {
1936 conn->state=TCPConnection::GETQUESTION;
1937 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
1938 conn->data.resize(conn->qlen);
1939 conn->bytesread=0;
1940 }
1941 if(!bytes || bytes < 0) {
1942 if(g_logCommonErrors)
1943 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected after first byte"<<endl;
1944 t_fdm->removeReadFD(fd);
1945 return;
1946 }
1947 }
1948 else if(conn->state==TCPConnection::GETQUESTION) {
1949 ssize_t bytes=recv(conn->getFD(), &conn->data[conn->bytesread], conn->qlen - conn->bytesread, 0);
1950 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
1951 if(g_logCommonErrors) {
1952 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected while reading question body"<<endl;
1953 }
1954 t_fdm->removeReadFD(fd);
1955 return;
1956 }
1957 conn->bytesread+=(uint16_t)bytes;
1958 if(conn->bytesread==conn->qlen) {
1959 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
1960
1961 std::unique_ptr<DNSComboWriter> dc;
1962 try {
1963 dc=std::unique_ptr<DNSComboWriter>(new DNSComboWriter(conn->data, g_now));
1964 }
1965 catch(const MOADNSException &mde) {
1966 g_stats.clientParseError++;
1967 if(g_logCommonErrors)
1968 g_log<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
1969 return;
1970 }
1971 dc->d_tcpConnection = conn; // carry the torch
1972 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
1973 dc->d_tcp=true;
1974 dc->setRemote(conn->d_remote);
1975 dc->setSource(conn->d_remote);
1976 ComboAddress dest;
1977 dest.reset();
1978 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1979 socklen_t len = dest.getSocklen();
1980 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1981 dc->setLocal(dest);
1982 dc->setDestination(dest);
1983 DNSName qname;
1984 uint16_t qtype=0;
1985 uint16_t qclass=0;
1986 bool needECS = false;
1987 bool needXPF = g_XPFAcl.match(conn->d_remote);
1988 string requestorId;
1989 string deviceId;
1990 bool logQuery = false;
1991 #ifdef HAVE_PROTOBUF
1992 auto luaconfsLocal = g_luaconfs.getLocal();
1993 if (checkProtobufExport(luaconfsLocal)) {
1994 needECS = true;
1995 }
1996 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
1997 #endif /* HAVE_PROTOBUF */
1998
1999 #ifdef HAVE_FSTRM
2000 checkFrameStreamExport(luaconfsLocal);
2001 #endif
2002
2003 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag_ffi || t_pdl->d_gettag))) {
2004
2005 try {
2006 EDNSOptionViewMap ednsOptions;
2007 bool xpfFound = false;
2008 dc->d_ecsParsed = true;
2009 dc->d_ecsFound = false;
2010 getQNameAndSubnet(conn->data, &qname, &qtype, &qclass,
2011 dc->d_ecsFound, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2012 xpfFound, needXPF ? &dc->d_source : nullptr, needXPF ? &dc->d_destination : nullptr);
2013
2014 if(t_pdl) {
2015 try {
2016 if (t_pdl->d_gettag_ffi) {
2017 dc->d_tag = t_pdl->gettag_ffi(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, dc->d_ttlCap, dc->d_variable, logQuery);
2018 }
2019 else if (t_pdl->d_gettag) {
2020 dc->d_tag = t_pdl->gettag(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId);
2021 }
2022 }
2023 catch(const std::exception& e) {
2024 if(g_logCommonErrors)
2025 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
2026 }
2027 }
2028 }
2029 catch(const std::exception& e)
2030 {
2031 if(g_logCommonErrors)
2032 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
2033 }
2034 }
2035
2036 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(&conn->data[0]);
2037
2038 #ifdef HAVE_PROTOBUF
2039 if(t_protobufServers || t_outgoingProtobufServers) {
2040 dc->d_requestorId = requestorId;
2041 dc->d_deviceId = deviceId;
2042 dc->d_uuid = getUniqueID();
2043 }
2044
2045 if(t_protobufServers) {
2046 try {
2047
2048 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && dc->d_policyTags.empty())) {
2049 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, dc->d_source, dc->d_destination, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId);
2050 }
2051 }
2052 catch(std::exception& e) {
2053 if(g_logCommonErrors)
2054 g_log<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
2055 }
2056 }
2057 #endif
2058 if(t_pdl) {
2059 if(t_pdl->ipfilter(dc->d_source, dc->d_destination, *dh)) {
2060 if(!g_quiet)
2061 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED TCP question from "<<dc->d_source.toStringWithPort()<<(dc->d_source != dc->d_remote ? " (via "+dc->d_remote.toStringWithPort()+")" : "")<<" based on policy"<<endl;
2062 g_stats.policyDrops++;
2063 return;
2064 }
2065 }
2066
2067 if(dc->d_mdp.d_header.qr) {
2068 g_stats.ignoredCount++;
2069 if(g_logCommonErrors) {
2070 g_log<<Logger::Error<<"Ignoring answer from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
2071 }
2072 return;
2073 }
2074 if(dc->d_mdp.d_header.opcode) {
2075 g_stats.ignoredCount++;
2076 if(g_logCommonErrors) {
2077 g_log<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
2078 }
2079 return;
2080 }
2081 else if (dh->qdcount == 0) {
2082 g_stats.emptyQueriesCount++;
2083 if(g_logCommonErrors) {
2084 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<< dc->getRemote() <<" on server socket!"<<endl;
2085 }
2086 return;
2087 }
2088 else {
2089 ++g_stats.qcounter;
2090 ++g_stats.tcpqcounter;
2091 MT->makeThread(startDoResolve, dc.release()); // deletes dc, will set state to BYTE0 again
2092 return;
2093 }
2094 }
2095 }
2096 }
2097
2098 //! Handle new incoming TCP connection
2099 static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
2100 {
2101 ComboAddress addr;
2102 socklen_t addrlen=sizeof(addr);
2103 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
2104 if(newsock>=0) {
2105 if(MT->numProcesses() > g_maxMThreads) {
2106 g_stats.overCapacityDrops++;
2107 try {
2108 closesocket(newsock);
2109 }
2110 catch(const PDNSException& e) {
2111 g_log<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
2112 }
2113 return;
2114 }
2115
2116 if(t_remotes)
2117 t_remotes->push_back(addr);
2118 if(t_allowFrom && !t_allowFrom->match(&addr)) {
2119 if(!g_quiet)
2120 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2121
2122 g_stats.unauthorizedTCP++;
2123 try {
2124 closesocket(newsock);
2125 }
2126 catch(const PDNSException& e) {
2127 g_log<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
2128 }
2129 return;
2130 }
2131 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
2132 g_stats.tcpClientOverflow++;
2133 try {
2134 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
2135 }
2136 catch(const PDNSException& e) {
2137 g_log<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
2138 }
2139 return;
2140 }
2141
2142 setNonBlocking(newsock);
2143 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
2144 tc->state=TCPConnection::BYTE0;
2145
2146 struct timeval ttd;
2147 Utility::gettimeofday(&ttd, 0);
2148 ttd.tv_sec += g_tcpTimeout;
2149
2150 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc, &ttd);
2151 }
2152 }
2153
2154 static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
2155 {
2156 gettimeofday(&g_now, 0);
2157 if (tv.tv_sec) {
2158 struct timeval diff = g_now - tv;
2159 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
2160
2161 if(delta > 1000.0) {
2162 g_stats.tooOldDrops++;
2163 return nullptr;
2164 }
2165 }
2166
2167 ++g_stats.qcounter;
2168 if(fromaddr.sin4.sin_family==AF_INET6)
2169 g_stats.ipv6qcounter++;
2170
2171 string response;
2172 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
2173 unsigned int ctag=0;
2174 uint32_t qhash = 0;
2175 bool needECS = false;
2176 bool needXPF = g_XPFAcl.match(fromaddr);
2177 std::vector<std::string> policyTags;
2178 LuaContext::LuaObject data;
2179 ComboAddress source = fromaddr;
2180 ComboAddress destination = destaddr;
2181 string requestorId;
2182 string deviceId;
2183 bool logQuery = false;
2184 #ifdef HAVE_PROTOBUF
2185 boost::uuids::uuid uniqueId;
2186 auto luaconfsLocal = g_luaconfs.getLocal();
2187 if (checkProtobufExport(luaconfsLocal)) {
2188 uniqueId = getUniqueID();
2189 needECS = true;
2190 } else if (checkOutgoingProtobufExport(luaconfsLocal)) {
2191 uniqueId = getUniqueID();
2192 }
2193 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
2194 bool logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
2195 #endif
2196 #ifdef HAVE_FSTRM
2197 checkFrameStreamExport(luaconfsLocal);
2198 #endif
2199 EDNSSubnetOpts ednssubnet;
2200 bool ecsFound = false;
2201 bool ecsParsed = false;
2202 uint16_t ecsBegin = 0;
2203 uint16_t ecsEnd = 0;
2204 uint32_t ttlCap = std::numeric_limits<uint32_t>::max();
2205 bool variable = false;
2206 try {
2207 DNSName qname;
2208 uint16_t qtype=0;
2209 uint16_t qclass=0;
2210 uint32_t age;
2211 bool qnameParsed=false;
2212 #ifdef MALLOC_TRACE
2213 /*
2214 static uint64_t last=0;
2215 if(!last)
2216 g_mtracer->clearAllocators();
2217 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
2218 last=g_mtracer->getAllocs();
2219 cout<<g_mtracer->topAllocatorsString()<<endl;
2220 g_mtracer->clearAllocators();
2221 */
2222 #endif
2223
2224 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag || t_pdl->d_gettag_ffi))) {
2225 try {
2226 EDNSOptionViewMap ednsOptions;
2227 bool xpfFound = false;
2228
2229 ecsFound = false;
2230
2231 getQNameAndSubnet(question, &qname, &qtype, &qclass,
2232 ecsFound, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2233 xpfFound, needXPF ? &source : nullptr, needXPF ? &destination : nullptr);
2234
2235 qnameParsed = true;
2236 ecsParsed = true;
2237
2238 if(t_pdl) {
2239 try {
2240 if (t_pdl->d_gettag_ffi) {
2241 ctag = t_pdl->gettag_ffi(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, ttlCap, variable, logQuery);
2242 }
2243 else if (t_pdl->d_gettag) {
2244 ctag = t_pdl->gettag(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId);
2245 }
2246 }
2247 catch(const std::exception& e) {
2248 if(g_logCommonErrors)
2249 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
2250 }
2251 }
2252 }
2253 catch(const std::exception& e)
2254 {
2255 if(g_logCommonErrors)
2256 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
2257 }
2258 }
2259
2260 bool cacheHit = false;
2261 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
2262 #ifdef HAVE_PROTOBUF
2263 if (t_protobufServers) {
2264 pbMessage = RecProtoBufMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
2265 pbMessage->setServerIdentity(SyncRes::s_serverID);
2266 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && policyTags.empty())) {
2267 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, source, destination, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId);
2268 }
2269 }
2270 #endif /* HAVE_PROTOBUF */
2271
2272 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
2273 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
2274 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
2275 vState valState;
2276 if (qnameParsed) {
2277 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
2278 }
2279 else {
2280 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, &qtype, &qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
2281 }
2282
2283 if (cacheHit) {
2284 if(valState == Bogus) {
2285 if(t_bogusremotes)
2286 t_bogusremotes->push_back(source);
2287 if(t_bogusqueryring)
2288 t_bogusqueryring->push_back(make_pair(qname, qtype));
2289 }
2290
2291 #ifdef HAVE_PROTOBUF
2292 if(t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && pbMessage->getAppliedPolicy().empty() && pbMessage->getPolicyTags().empty())) {
2293 Netmask requestorNM(source, source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
2294 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
2295 pbMessage->update(uniqueId, &requestor, &destination, false, dh->id);
2296 pbMessage->setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
2297 if (g_useKernelTimestamp && tv.tv_sec) {
2298 pbMessage->setQueryTime(tv.tv_sec, tv.tv_usec);
2299 }
2300 else {
2301 pbMessage->setQueryTime(g_now.tv_sec, g_now.tv_usec);
2302 }
2303 pbMessage->setRequestorId(requestorId);
2304 pbMessage->setDeviceId(deviceId);
2305 protobufLogResponse(*pbMessage);
2306 }
2307 #endif /* HAVE_PROTOBUF */
2308 if(!g_quiet)
2309 g_log<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<endl;
2310
2311 g_stats.packetCacheHits++;
2312 SyncRes::s_queries++;
2313 ageDNSPacket(response, age);
2314 struct msghdr msgh;
2315 struct iovec iov;
2316 char cbuf[256];
2317 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
2318 msgh.msg_control=NULL;
2319
2320 if(g_fromtosockets.count(fd)) {
2321 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
2322 }
2323 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
2324 g_log<<Logger::Warning<<"Sending UDP reply to client "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" failed with: "<<strerror(errno)<<endl;
2325
2326 if(response.length() >= sizeof(struct dnsheader)) {
2327 struct dnsheader tmpdh;
2328 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
2329 updateResponseStats(tmpdh.rcode, source, response.length(), 0, 0);
2330 }
2331 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
2332 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
2333 return 0;
2334 }
2335 }
2336 catch(std::exception& e) {
2337 g_log<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
2338 return 0;
2339 }
2340
2341 if(t_pdl) {
2342 if(t_pdl->ipfilter(source, destination, *dh)) {
2343 if(!g_quiet)
2344 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" based on policy"<<endl;
2345 g_stats.policyDrops++;
2346 return 0;
2347 }
2348 }
2349
2350 if(MT->numProcesses() > g_maxMThreads) {
2351 if(!g_quiet)
2352 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<", over capacity"<<endl;
2353
2354 g_stats.overCapacityDrops++;
2355 return 0;
2356 }
2357
2358 auto dc = std::unique_ptr<DNSComboWriter>(new DNSComboWriter(question, g_now, std::move(policyTags), std::move(data)));
2359 dc->setSocket(fd);
2360 dc->d_tag=ctag;
2361 dc->d_qhash=qhash;
2362 dc->setRemote(fromaddr);
2363 dc->setSource(source);
2364 dc->setLocal(destaddr);
2365 dc->setDestination(destination);
2366 dc->d_tcp=false;
2367 dc->d_ecsFound = ecsFound;
2368 dc->d_ecsParsed = ecsParsed;
2369 dc->d_ecsBegin = ecsBegin;
2370 dc->d_ecsEnd = ecsEnd;
2371 dc->d_ednssubnet = ednssubnet;
2372 dc->d_ttlCap = ttlCap;
2373 dc->d_variable = variable;
2374 #ifdef HAVE_PROTOBUF
2375 if (t_protobufServers || t_outgoingProtobufServers) {
2376 dc->d_uuid = std::move(uniqueId);
2377 }
2378 dc->d_requestorId = requestorId;
2379 dc->d_deviceId = deviceId;
2380 dc->d_kernelTimestamp = tv;
2381 #endif
2382
2383 MT->makeThread(startDoResolve, (void*) dc.release()); // deletes dc
2384 return 0;
2385 }
2386
2387
2388 static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
2389 {
2390 ssize_t len;
2391 static const size_t maxIncomingQuerySize = 512;
2392 static thread_local std::string data;
2393 ComboAddress fromaddr;
2394 struct msghdr msgh;
2395 struct iovec iov;
2396 char cbuf[256];
2397 bool firstQuery = true;
2398
2399 for(size_t queriesCounter = 0; queriesCounter < s_maxUDPQueriesPerRound; queriesCounter++) {
2400 data.resize(maxIncomingQuerySize);
2401 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
2402 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), &data[0], data.size(), &fromaddr);
2403
2404 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
2405
2406 firstQuery = false;
2407
2408 if (static_cast<size_t>(len) < sizeof(dnsheader)) {
2409 g_stats.ignoredCount++;
2410 if (!g_quiet) {
2411 g_log<<Logger::Error<<"Ignoring too-short ("<<std::to_string(len)<<") query from "<<fromaddr.toString()<<endl;
2412 }
2413 return;
2414 }
2415
2416 if (msgh.msg_flags & MSG_TRUNC) {
2417 g_stats.truncatedDrops++;
2418 if (!g_quiet) {
2419 g_log<<Logger::Error<<"Ignoring truncated query from "<<fromaddr.toString()<<endl;
2420 }
2421 return;
2422 }
2423
2424 if(t_remotes) {
2425 t_remotes->push_back(fromaddr);
2426 }
2427
2428 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
2429 if(!g_quiet) {
2430 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2431 }
2432
2433 g_stats.unauthorizedUDP++;
2434 return;
2435 }
2436 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
2437 if(!fromaddr.sin4.sin_port) { // also works for IPv6
2438 if(!g_quiet) {
2439 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
2440 }
2441
2442 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
2443 return;
2444 }
2445
2446 try {
2447 data.resize(static_cast<size_t>(len));
2448 dnsheader* dh=(dnsheader*)&data[0];
2449
2450 if(dh->qr) {
2451 g_stats.ignoredCount++;
2452 if(g_logCommonErrors) {
2453 g_log<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
2454 }
2455 }
2456 else if(dh->opcode) {
2457 g_stats.ignoredCount++;
2458 if(g_logCommonErrors) {
2459 g_log<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
2460 }
2461 }
2462 else if (dh->qdcount == 0) {
2463 g_stats.emptyQueriesCount++;
2464 if(g_logCommonErrors) {
2465 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<<fromaddr.toString()<<" on server socket!"<<endl;
2466 }
2467 }
2468 else {
2469 struct timeval tv={0,0};
2470 HarvestTimestamp(&msgh, &tv);
2471 ComboAddress dest;
2472 dest.reset(); // this makes sure we ignore this address if not returned by recvmsg above
2473 auto loc = rplookup(g_listenSocketsAddresses, fd);
2474 if(HarvestDestinationAddress(&msgh, &dest)) {
2475 // but.. need to get port too
2476 if(loc) {
2477 dest.sin4.sin_port = loc->sin4.sin_port;
2478 }
2479 }
2480 else {
2481 if(loc) {
2482 dest = *loc;
2483 }
2484 else {
2485 dest.sin4.sin_family = fromaddr.sin4.sin_family;
2486 socklen_t slen = dest.getSocklen();
2487 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
2488 }
2489 }
2490
2491 if(g_weDistributeQueries) {
2492 distributeAsyncFunction(data, boost::bind(doProcessUDPQuestion, data, fromaddr, dest, tv, fd));
2493 }
2494 else {
2495 ++s_threadInfos[t_id].numberOfDistributedQueries;
2496 doProcessUDPQuestion(data, fromaddr, dest, tv, fd);
2497 }
2498 }
2499 }
2500 catch(const MOADNSException &mde) {
2501 g_stats.clientParseError++;
2502 if(g_logCommonErrors) {
2503 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
2504 }
2505 }
2506 catch(const std::runtime_error& e) {
2507 g_stats.clientParseError++;
2508 if(g_logCommonErrors) {
2509 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
2510 }
2511 }
2512 }
2513 else {
2514 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
2515 if(firstQuery && errno == EAGAIN) {
2516 g_stats.noPacketError++;
2517 }
2518
2519 break;
2520 }
2521 }
2522 }
2523
2524 static void makeTCPServerSockets(deferredAdd_t& deferredAdds, std::set<int>& tcpSockets)
2525 {
2526 int fd;
2527 vector<string>locals;
2528 stringtok(locals,::arg()["local-address"]," ,");
2529
2530 if(locals.empty())
2531 throw PDNSException("No local address specified");
2532
2533 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
2534 ServiceTuple st;
2535 st.port=::arg().asNum("local-port");
2536 parseService(*i, st);
2537
2538 ComboAddress sin;
2539
2540 sin.reset();
2541 sin.sin4.sin_family = AF_INET;
2542 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
2543 sin.sin6.sin6_family = AF_INET6;
2544 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
2545 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
2546 }
2547
2548 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
2549 if(fd<0)
2550 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
2551
2552 setCloseOnExec(fd);
2553
2554 int tmp=1;
2555 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
2556 g_log<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
2557 exit(1);
2558 }
2559 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
2560 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
2561 }
2562
2563 #ifdef TCP_DEFER_ACCEPT
2564 if(setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
2565 if(i==locals.begin())
2566 g_log<<Logger::Info<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
2567 }
2568 #endif
2569
2570 if( ::arg().mustDo("non-local-bind") )
2571 Utility::setBindAny(AF_INET, fd);
2572
2573 #ifdef SO_REUSEPORT
2574 if(g_reusePort) {
2575 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
2576 throw PDNSException("SO_REUSEPORT: "+stringerror());
2577 }
2578 #endif
2579
2580 if (::arg().asNum("tcp-fast-open") > 0) {
2581 #ifdef TCP_FASTOPEN
2582 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
2583 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
2584 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno)<<endl;
2585 }
2586 #else
2587 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
2588 #endif
2589 }
2590
2591 sin.sin4.sin_port = htons(st.port);
2592 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
2593 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
2594 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
2595
2596 setNonBlocking(fd);
2597 setSocketSendBuffer(fd, 65000);
2598 listen(fd, 128);
2599 deferredAdds.push_back(make_pair(fd, handleNewTCPQuestion));
2600 tcpSockets.insert(fd);
2601
2602 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
2603 // - fd is not that which we know here, but returned from accept()
2604 if(sin.sin4.sin_family == AF_INET)
2605 g_log<<Logger::Info<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
2606 else
2607 g_log<<Logger::Info<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
2608 }
2609 }
2610
2611 static void makeUDPServerSockets(deferredAdd_t& deferredAdds)
2612 {
2613 int one=1;
2614 vector<string>locals;
2615 stringtok(locals,::arg()["local-address"]," ,");
2616
2617 if(locals.empty())
2618 throw PDNSException("No local address specified");
2619
2620 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
2621 ServiceTuple st;
2622 st.port=::arg().asNum("local-port");
2623 parseService(*i, st);
2624
2625 ComboAddress sin;
2626
2627 sin.reset();
2628 sin.sin4.sin_family = AF_INET;
2629 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
2630 sin.sin6.sin6_family = AF_INET6;
2631 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
2632 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
2633 }
2634
2635 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
2636 if(fd < 0) {
2637 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
2638 }
2639 if (!setSocketTimestamps(fd))
2640 g_log<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
2641
2642 if(IsAnyAddress(sin)) {
2643 if(sin.sin4.sin_family == AF_INET)
2644 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
2645 g_fromtosockets.insert(fd);
2646 #ifdef IPV6_RECVPKTINFO
2647 if(sin.sin4.sin_family == AF_INET6)
2648 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
2649 g_fromtosockets.insert(fd);
2650 #endif
2651 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
2652 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
2653 }
2654 }
2655 if( ::arg().mustDo("non-local-bind") )
2656 Utility::setBindAny(AF_INET6, fd);
2657
2658 setCloseOnExec(fd);
2659
2660 setSocketReceiveBuffer(fd, 250000);
2661 sin.sin4.sin_port = htons(st.port);
2662
2663
2664 #ifdef SO_REUSEPORT
2665 if(g_reusePort) {
2666 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
2667 throw PDNSException("SO_REUSEPORT: "+stringerror());
2668 }
2669 #endif
2670
2671 if (sin.isIPv4()) {
2672 try {
2673 setSocketIgnorePMTU(fd);
2674 }
2675 catch(const std::exception& e) {
2676 g_log<<Logger::Warning<<"Failed to set IP_MTU_DISCOVER on UDP server socket: "<<e.what()<<endl;
2677 }
2678 }
2679
2680 socklen_t socklen=sin.getSocklen();
2681 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
2682 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
2683
2684 setNonBlocking(fd);
2685
2686 deferredAdds.push_back(make_pair(fd, handleNewUDPQuestion));
2687 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
2688 if(sin.sin4.sin_family == AF_INET)
2689 g_log<<Logger::Info<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
2690 else
2691 g_log<<Logger::Info<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
2692 }
2693 }
2694
2695 static void daemonize(void)
2696 {
2697 if(fork())
2698 exit(0); // bye bye
2699
2700 setsid();
2701
2702 int i=open("/dev/null",O_RDWR); /* open stdin */
2703 if(i < 0)
2704 g_log<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
2705 else {
2706 dup2(i,0); /* stdin */
2707 dup2(i,1); /* stderr */
2708 dup2(i,2); /* stderr */
2709 close(i);
2710 }
2711 }
2712
2713 static void usr1Handler(int)
2714 {
2715 statsWanted=true;
2716 }
2717
2718 static void usr2Handler(int)
2719 {
2720 g_quiet= !g_quiet;
2721 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
2722 ::arg().set("quiet")=g_quiet ? "" : "no";
2723 }
2724
2725 static void doStats(void)
2726 {
2727 static time_t lastOutputTime;
2728 static uint64_t lastQueryCount;
2729
2730 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
2731 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
2732
2733 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
2734 g_log<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
2735 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
2736 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
2737 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
2738
2739 g_log<<Logger::Notice<<"stats: throttle map: "
2740 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
2741 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
2742 g_log<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2743 g_log<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
2744 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
2745 g_log<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
2746 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
2747
2748 //g_log<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
2749 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
2750
2751 g_log<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
2752 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
2753
2754 size_t idx = 0;
2755 for (const auto& threadInfo : s_threadInfos) {
2756 if(threadInfo.isWorker) {
2757 g_log<<Logger::Notice<<"stats: thread "<<idx<<" has been distributed "<<threadInfo.numberOfDistributedQueries<<" queries"<<endl;
2758 ++idx;
2759 }
2760 }
2761
2762 time_t now = time(0);
2763 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
2764 g_log<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
2765 }
2766 lastOutputTime = now;
2767 lastQueryCount = SyncRes::s_queries;
2768 }
2769 else if(statsWanted)
2770 g_log<<Logger::Notice<<"stats: no stats yet!"<<endl;
2771
2772 statsWanted=false;
2773 }
2774
2775 static void houseKeeping(void *)
2776 {
2777 static thread_local time_t last_rootupdate, last_prune, last_secpoll, last_trustAnchorUpdate{0};
2778 static thread_local int cleanCounter=0;
2779 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
2780 auto luaconfsLocal = g_luaconfs.getLocal();
2781
2782 if (last_trustAnchorUpdate == 0 && !luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0) {
2783 // Loading the Lua config file already "refreshed" the TAs
2784 last_trustAnchorUpdate = g_now.tv_sec + luaconfsLocal->trustAnchorFileInfo.interval * 3600;
2785 }
2786
2787 try {
2788 if(s_running) {
2789 return;
2790 }
2791 s_running=true;
2792
2793 struct timeval now;
2794 Utility::gettimeofday(&now, 0);
2795
2796 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
2797 t_RC->doPrune(g_maxCacheEntries / g_numThreads); // this function is local to a thread, so fine anyhow
2798 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
2799
2800 SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
2801
2802 if(!((cleanCounter++)%40)) { // this is a full scan!
2803 time_t limit=now.tv_sec-300;
2804 SyncRes::pruneNSSpeeds(limit);
2805 }
2806 last_prune=time(0);
2807 }
2808
2809 if(now.tv_sec - last_rootupdate > 7200) {
2810 int res = SyncRes::getRootNS(g_now, nullptr);
2811 if (!res)
2812 last_rootupdate=now.tv_sec;
2813 }
2814
2815 if(isHandlerThread()) {
2816
2817 if(now.tv_sec - last_secpoll >= 3600) {
2818 try {
2819 doSecPoll(&last_secpoll);
2820 }
2821 catch(std::exception& e)
2822 {
2823 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
2824 }
2825 catch(PDNSException& e)
2826 {
2827 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
2828 }
2829 catch(ImmediateServFailException &e)
2830 {
2831 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
2832 }
2833 catch(...)
2834 {
2835 g_log<<Logger::Error<<"Exception while performing security poll"<<endl;
2836 }
2837 }
2838
2839 if (!luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0 &&
2840 g_now.tv_sec - last_trustAnchorUpdate >= (luaconfsLocal->trustAnchorFileInfo.interval * 3600)) {
2841 g_log<<Logger::Debug<<"Refreshing Trust Anchors from file"<<endl;
2842 try {
2843 map<DNSName, dsmap_t> dsAnchors;
2844 if (updateTrustAnchorsFromFile(luaconfsLocal->trustAnchorFileInfo.fname, dsAnchors)) {
2845 g_luaconfs.modify([&dsAnchors](LuaConfigItems& lci) {
2846 lci.dsAnchors = dsAnchors;
2847 });
2848 }
2849 last_trustAnchorUpdate = now.tv_sec;
2850 } catch (const PDNSException &pe) {
2851 g_log<<Logger::Error<<"Unable to update Trust Anchors: "<<pe.reason<<endl;
2852 }
2853 }
2854 }
2855 s_running=false;
2856 }
2857 catch(PDNSException& ae)
2858 {
2859 s_running=false;
2860 g_log<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
2861 throw;
2862 }
2863 }
2864
2865 static void makeThreadPipes()
2866 {
2867 auto pipeBufferSize = ::arg().asNum("distribution-pipe-buffer-size");
2868 if (pipeBufferSize > 0) {
2869 g_log<<Logger::Info<<"Resizing the buffer of the distribution pipe to "<<pipeBufferSize<<endl;
2870 }
2871
2872 /* thread 0 is the handler / SNMP, we start at 1 */
2873 for(unsigned int n = 1; n <= (g_numWorkerThreads + g_numDistributorThreads); ++n) {
2874 auto& threadInfos = s_threadInfos.at(n);
2875
2876 int fd[2];
2877 if(pipe(fd) < 0)
2878 unixDie("Creating pipe for inter-thread communications");
2879
2880 threadInfos.pipes.readToThread = fd[0];
2881 threadInfos.pipes.writeToThread = fd[1];
2882
2883 if(pipe(fd) < 0)
2884 unixDie("Creating pipe for inter-thread communications");
2885
2886 threadInfos.pipes.readFromThread = fd[0];
2887 threadInfos.pipes.writeFromThread = fd[1];
2888
2889 if(pipe(fd) < 0)
2890 unixDie("Creating pipe for inter-thread communications");
2891
2892 threadInfos.pipes.readQueriesToThread = fd[0];
2893 threadInfos.pipes.writeQueriesToThread = fd[1];
2894
2895 if (pipeBufferSize > 0) {
2896 if (!setPipeBufferSize(threadInfos.pipes.writeQueriesToThread, pipeBufferSize)) {
2897 g_log<<Logger::Warning<<"Error resizing the buffer of the distribution pipe for thread "<<n<<" to "<<pipeBufferSize<<": "<<strerror(errno)<<endl;
2898 auto existingSize = getPipeBufferSize(threadInfos.pipes.writeQueriesToThread);
2899 if (existingSize > 0) {
2900 g_log<<Logger::Warning<<"The current size of the distribution pipe's buffer for thread "<<n<<" is "<<existingSize<<endl;
2901 }
2902 }
2903 }
2904
2905 if (!setNonBlocking(threadInfos.pipes.writeQueriesToThread)) {
2906 unixDie("Making pipe for inter-thread communications non-blocking");
2907 }
2908 }
2909 }
2910
2911 struct ThreadMSG
2912 {
2913 pipefunc_t func;
2914 bool wantAnswer;
2915 };
2916
2917 void broadcastFunction(const pipefunc_t& func)
2918 {
2919 /* This function might be called by the worker with t_id 0 during startup
2920 for the initialization of ACLs and domain maps. After that it should only
2921 be called by the handler. */
2922
2923 if (s_threadInfos.empty() && isHandlerThread()) {
2924 /* the handler and distributors will call themselves below, but
2925 during startup we get called while s_threadInfos has not been
2926 populated yet to update the ACL or domain maps, so we need to
2927 handle that case.
2928 */
2929 func();
2930 }
2931
2932 unsigned int n = 0;
2933 for (const auto& threadInfo : s_threadInfos) {
2934 if(n++ == t_id) {
2935 func(); // don't write to ourselves!
2936 continue;
2937 }
2938
2939 ThreadMSG* tmsg = new ThreadMSG();
2940 tmsg->func = func;
2941 tmsg->wantAnswer = true;
2942 if(write(threadInfo.pipes.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2943 delete tmsg;
2944
2945 unixDie("write to thread pipe returned wrong size or error");
2946 }
2947
2948 string* resp = nullptr;
2949 if(read(threadInfo.pipes.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2950 unixDie("read from thread pipe returned wrong size or error");
2951
2952 if(resp) {
2953 delete resp;
2954 resp = nullptr;
2955 }
2956 }
2957 }
2958
2959 static bool trySendingQueryToWorker(unsigned int target, ThreadMSG* tmsg)
2960 {
2961 auto& targetInfo = s_threadInfos[target];
2962 if(!targetInfo.isWorker) {
2963 g_log<<Logger::Error<<"distributeAsyncFunction() tried to assign a query to a non-worker thread"<<endl;
2964 exit(1);
2965 }
2966
2967 const auto& tps = targetInfo.pipes;
2968
2969 ssize_t written = write(tps.writeQueriesToThread, &tmsg, sizeof(tmsg));
2970 if (written > 0) {
2971 if (static_cast<size_t>(written) != sizeof(tmsg)) {
2972 delete tmsg;
2973 unixDie("write to thread pipe returned wrong size or error");
2974 }
2975 }
2976 else {
2977 int error = errno;
2978 if (error == EAGAIN || error == EWOULDBLOCK) {
2979 return false;
2980 } else {
2981 delete tmsg;
2982 unixDie("write to thread pipe returned wrong size or error:" + std::to_string(error));
2983 }
2984 }
2985
2986 ++targetInfo.numberOfDistributedQueries;
2987
2988 return true;
2989 }
2990
2991 static unsigned int getWorkerLoad(size_t workerIdx)
2992 {
2993 const auto mt = s_threadInfos[/* skip handler */ 1 + g_numDistributorThreads + workerIdx].mt;
2994 if (mt != nullptr) {
2995 return mt->numProcesses();
2996 }
2997 return 0;
2998 }
2999
3000 static unsigned int selectWorker(unsigned int hash)
3001 {
3002 if (s_balancingFactor == 0) {
3003 return /* skip handler */ 1 + g_numDistributorThreads + (hash % g_numWorkerThreads);
3004 }
3005
3006 /* we start with one, representing the query we are currently handling */
3007 double currentLoad = 1;
3008 std::vector<unsigned int> load(g_numWorkerThreads);
3009 for (size_t idx = 0; idx < g_numWorkerThreads; idx++) {
3010 load[idx] = getWorkerLoad(idx);
3011 currentLoad += load[idx];
3012 // cerr<<"load for worker "<<idx<<" is "<<load[idx]<<endl;
3013 }
3014
3015 double targetLoad = (currentLoad / g_numWorkerThreads) * s_balancingFactor;
3016 // cerr<<"total load is "<<currentLoad<<", number of workers is "<<g_numWorkerThreads<<", target load is "<<targetLoad<<endl;
3017
3018 unsigned int worker = hash % g_numWorkerThreads;
3019 /* at least one server has to be at or below the average load */
3020 if (load[worker] > targetLoad) {
3021 ++g_stats.rebalancedQueries;
3022 do {
3023 // cerr<<"worker "<<worker<<" is above the target load, selecting another one"<<endl;
3024 worker = (worker + 1) % g_numWorkerThreads;
3025 }
3026 while(load[worker] > targetLoad);
3027 }
3028
3029 return /* skip handler */ 1 + g_numDistributorThreads + worker;
3030 }
3031
3032 // This function is only called by the distributor threads, when pdns-distributes-queries is set
3033 void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
3034 {
3035 if (!isDistributorThread()) {
3036 g_log<<Logger::Error<<"distributeAsyncFunction() has been called by a worker ("<<t_id<<")"<<endl;
3037 exit(1);
3038 }
3039
3040 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
3041 unsigned int target = selectWorker(hash);
3042
3043 ThreadMSG* tmsg = new ThreadMSG();
3044 tmsg->func = func;
3045 tmsg->wantAnswer = false;
3046
3047 if (!trySendingQueryToWorker(target, tmsg)) {
3048 /* if this function failed but did not raise an exception, it means that the pipe
3049 was full, let's try another one */
3050 unsigned int newTarget = 0;
3051 do {
3052 newTarget = /* skip handler */ 1 + g_numDistributorThreads + dns_random(g_numWorkerThreads);
3053 } while (newTarget == target);
3054
3055 if (!trySendingQueryToWorker(newTarget, tmsg)) {
3056 g_stats.queryPipeFullDrops++;
3057 delete tmsg;
3058 }
3059 }
3060 }
3061
3062 static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
3063 {
3064 ThreadMSG* tmsg = nullptr;
3065
3066 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread || fd == readQueriesToThread
3067 unixDie("read from thread pipe returned wrong size or error");
3068 }
3069
3070 void *resp=0;
3071 try {
3072 resp = tmsg->func();
3073 }
3074 catch(std::exception& e) {
3075 if(g_logCommonErrors)
3076 g_log<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
3077 }
3078 catch(PDNSException& e) {
3079 if(g_logCommonErrors)
3080 g_log<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
3081 }
3082 if(tmsg->wantAnswer) {
3083 const auto& threadInfo = s_threadInfos.at(t_id);
3084 if(write(threadInfo.pipes.writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
3085 delete tmsg;
3086 unixDie("write to thread pipe returned wrong size or error");
3087 }
3088 }
3089
3090 delete tmsg;
3091 }
3092
3093 template<class T> void *voider(const boost::function<T*()>& func)
3094 {
3095 return func();
3096 }
3097
3098 vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
3099 {
3100 a.insert(a.end(), b.begin(), b.end());
3101 return a;
3102 }
3103
3104 vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
3105 {
3106 a.insert(a.end(), b.begin(), b.end());
3107 return a;
3108 }
3109
3110 vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
3111 {
3112 a.insert(a.end(), b.begin(), b.end());
3113 return a;
3114 }
3115
3116
3117 /*
3118 This function should only be called by the handler to gather metrics, wipe the cache,
3119 reload the Lua script (not the Lua config) or change the current trace regex,
3120 and by the SNMP thread to gather metrics. */
3121 template<class T> T broadcastAccFunction(const boost::function<T*()>& func)
3122 {
3123 if (!isHandlerThread()) {
3124 g_log<<Logger::Error<<"broadcastAccFunction has been called by a worker ("<<t_id<<")"<<endl;
3125 exit(1);
3126 }
3127
3128 unsigned int n = 0;
3129 T ret=T();
3130 for (const auto& threadInfo : s_threadInfos) {
3131 if (n++ == t_id) {
3132 continue;
3133 }
3134
3135 const auto& tps = threadInfo.pipes;
3136 ThreadMSG* tmsg = new ThreadMSG();
3137 tmsg->func = boost::bind(voider<T>, func);
3138 tmsg->wantAnswer = true;
3139
3140 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
3141 delete tmsg;
3142 unixDie("write to thread pipe returned wrong size or error");
3143 }
3144
3145 T* resp = nullptr;
3146 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
3147 unixDie("read from thread pipe returned wrong size or error");
3148
3149 if(resp) {
3150 ret += *resp;
3151 delete resp;
3152 resp = nullptr;
3153 }
3154 }
3155 return ret;
3156 }
3157
3158 template string broadcastAccFunction(const boost::function<string*()>& fun); // explicit instantiation
3159 template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun); // explicit instantiation
3160 template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun); // explicit instantiation
3161 template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun); // explicit instantiation
3162 template ThreadTimes broadcastAccFunction(const boost::function<ThreadTimes*()>& fun);
3163
3164 static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
3165 {
3166 try {
3167 string remote;
3168 string msg=s_rcc.recv(&remote);
3169 RecursorControlParser rcp;
3170 RecursorControlParser::func_t* command;
3171
3172 string answer=rcp.getAnswer(msg, &command);
3173
3174 // If we are inside a chroot, we need to strip
3175 if (!arg()["chroot"].empty()) {
3176 size_t len = arg()["chroot"].length();
3177 remote = remote.substr(len);
3178 }
3179
3180 s_rcc.send(answer, &remote);
3181 command();
3182 }
3183 catch(const std::exception& e) {
3184 g_log<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
3185 }
3186 catch(const PDNSException& ae) {
3187 g_log<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
3188 }
3189 }
3190
3191 static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
3192 {
3193 PacketID* pident=any_cast<PacketID>(&var);
3194 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
3195
3196 shared_array<char> buffer(new char[pident->inNeeded]);
3197
3198 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
3199 if(ret > 0) {
3200 pident->inMSG.append(&buffer[0], &buffer[ret]);
3201 pident->inNeeded-=(size_t)ret;
3202 if(!pident->inNeeded || pident->inIncompleteOkay) {
3203 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
3204 PacketID pid=*pident;
3205 string msg=pident->inMSG;
3206
3207 t_fdm->removeReadFD(fd);
3208 MT->sendEvent(pid, &msg);
3209 }
3210 else {
3211 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
3212 }
3213 }
3214 else {
3215 PacketID tmp=*pident;
3216 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
3217 string empty;
3218 MT->sendEvent(tmp, &empty); // this conveys error status
3219 }
3220 }
3221
3222 static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
3223 {
3224 PacketID* pid=any_cast<PacketID>(&var);
3225 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
3226 if(ret > 0) {
3227 pid->outPos+=(ssize_t)ret;
3228 if(pid->outPos==pid->outMSG.size()) {
3229 PacketID tmp=*pid;
3230 t_fdm->removeWriteFD(fd);
3231 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
3232 }
3233 }
3234 else { // error or EOF
3235 PacketID tmp(*pid);
3236 t_fdm->removeWriteFD(fd);
3237 string sent;
3238 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
3239 }
3240 }
3241
3242 // resend event to everybody chained onto it
3243 static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
3244 {
3245 if(iter->key.chain.empty())
3246 return;
3247 // cerr<<"doResends called!\n";
3248 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
3249 resend.fd=-1;
3250 resend.id=*i;
3251 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
3252
3253 MT->sendEvent(resend, &content);
3254 g_stats.chainResends++;
3255 }
3256 }
3257
3258 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
3259 {
3260 PacketID pid=any_cast<PacketID>(var);
3261 ssize_t len;
3262 std::string packet;
3263 packet.resize(g_outgoingEDNSBufsize);
3264 ComboAddress fromaddr;
3265 socklen_t addrlen=sizeof(fromaddr);
3266
3267 len=recvfrom(fd, &packet.at(0), packet.size(), 0, (sockaddr *)&fromaddr, &addrlen);
3268
3269 if(len < (ssize_t) sizeof(dnsheader)) {
3270 if(len < 0)
3271 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
3272 else {
3273 g_stats.serverParseError++;
3274 if(g_logCommonErrors)
3275 g_log<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
3276 ": packet smaller than DNS header"<<endl;
3277 }
3278
3279 t_udpclientsocks->returnSocket(fd);
3280 string empty;
3281
3282 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3283 if(iter != MT->d_waiters.end())
3284 doResends(iter, pid, empty);
3285
3286 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
3287 return;
3288 }
3289
3290 packet.resize(len);
3291 dnsheader dh;
3292 memcpy(&dh, &packet.at(0), sizeof(dh));
3293
3294 PacketID pident;
3295 pident.remote=fromaddr;
3296 pident.id=dh.id;
3297 pident.fd=fd;
3298
3299 if(!dh.qr && g_logCommonErrors) {
3300 g_log<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
3301 }
3302
3303 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
3304 !dh.qr) { // one weird server
3305 pident.domain.clear();
3306 pident.type = 0;
3307 }
3308 else {
3309 try {
3310 if(len > 12)
3311 pident.domain=DNSName(&packet.at(0), len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
3312 }
3313 catch(std::exception& e) {
3314 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
3315 g_log<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
3316 return;
3317 }
3318 }
3319
3320 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
3321 if(iter != MT->d_waiters.end()) {
3322 doResends(iter, pident, packet);
3323 }
3324
3325 retryWithName:
3326
3327 if(!MT->sendEvent(pident, &packet)) {
3328 /* we did not find a match for this response, something is wrong */
3329
3330 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
3331 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
3332 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
3333 pident.domain == mthread->key.domain) {
3334 mthread->key.nearMisses++;
3335 }
3336
3337 // be a bit paranoid here since we're weakening our matching
3338 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
3339 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
3340 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
3341 pident.domain = mthread->key.domain;
3342 pident.type = mthread->key.type;
3343 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
3344 }
3345 }
3346 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
3347 if(g_logCommonErrors) {
3348 g_log<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
3349 }
3350 }
3351 else if(fd >= 0) {
3352 /* we either found a waiter (1) or encountered an issue (-1), it's up to us to clean the socket anyway */
3353 t_udpclientsocks->returnSocket(fd);
3354 }
3355 }
3356
3357 FDMultiplexer* getMultiplexer()
3358 {
3359 FDMultiplexer* ret;
3360 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
3361 try {
3362 ret=i.second();
3363 return ret;
3364 }
3365 catch(FDMultiplexerException &fe) {
3366 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
3367 }
3368 catch(...) {
3369 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
3370 }
3371 }
3372 g_log<<Logger::Error<<"No working multiplexer found!"<<endl;
3373 exit(1);
3374 }
3375
3376
3377 static string* doReloadLuaScript()
3378 {
3379 string fname= ::arg()["lua-dns-script"];
3380 try {
3381 if(fname.empty()) {
3382 t_pdl.reset();
3383 g_log<<Logger::Info<<t_id<<" Unloaded current lua script"<<endl;
3384 return new string("unloaded\n");
3385 }
3386 else {
3387 t_pdl = std::make_shared<RecursorLua4>();
3388 t_pdl->loadFile(fname);
3389 }
3390 }
3391 catch(std::exception& e) {
3392 g_log<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
3393 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
3394 }
3395
3396 g_log<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
3397 return new string("(re)loaded '"+fname+"'\n");
3398 }
3399
3400 string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3401 {
3402 if(begin != end)
3403 ::arg().set("lua-dns-script") = *begin;
3404
3405 return broadcastAccFunction<string>(doReloadLuaScript);
3406 }
3407
3408 static string* pleaseUseNewTraceRegex(const std::string& newRegex)
3409 try
3410 {
3411 if(newRegex.empty()) {
3412 t_traceRegex.reset();
3413 return new string("unset\n");
3414 }
3415 else {
3416 t_traceRegex = std::make_shared<Regex>(newRegex);
3417 return new string("ok\n");
3418 }
3419 }
3420 catch(PDNSException& ae)
3421 {
3422 return new string(ae.reason+"\n");
3423 }
3424
3425 string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3426 {
3427 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
3428 }
3429
3430 static void checkLinuxIPv6Limits()
3431 {
3432 #ifdef __linux__
3433 string line;
3434 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
3435 int lim=std::stoi(line);
3436 if(lim < 16384) {
3437 g_log<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
3438 }
3439 }
3440 #endif
3441 }
3442 static void checkOrFixFDS()
3443 {
3444 unsigned int availFDs=getFilenumLimit();
3445 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
3446
3447 if(wantFDs > availFDs) {
3448 unsigned int hardlimit= getFilenumLimit(true);
3449 if(hardlimit >= wantFDs) {
3450 setFilenumLimit(wantFDs);
3451 g_log<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
3452 }
3453 else {
3454 int newval = (hardlimit - 25) / g_numWorkerThreads;
3455 g_log<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
3456 g_maxMThreads = newval;
3457 setFilenumLimit(hardlimit);
3458 }
3459 }
3460 }
3461
3462 static void* recursorThread(unsigned int tid, const string& threadName);
3463
3464 static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
3465 {
3466 t_allowFrom = ng;
3467 return nullptr;
3468 }
3469
3470 int g_argc;
3471 char** g_argv;
3472
3473 void parseACLs()
3474 {
3475 static bool l_initialized;
3476
3477 if(l_initialized) { // only reload configuration file on second call
3478 string configname=::arg()["config-dir"]+"/recursor.conf";
3479 if(::arg()["config-name"]!="") {
3480 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3481 }
3482 cleanSlashes(configname);
3483
3484 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
3485 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
3486 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
3487 ::arg().preParseFile(configname.c_str(), "include-dir");
3488 ::arg().preParse(g_argc, g_argv, "include-dir");
3489
3490 // then process includes
3491 std::vector<std::string> extraConfigs;
3492 ::arg().gatherIncludes(extraConfigs);
3493
3494 for(const std::string& fn : extraConfigs) {
3495 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
3496 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
3497 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
3498 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
3499 }
3500
3501 ::arg().preParse(g_argc, g_argv, "allow-from-file");
3502 ::arg().preParse(g_argc, g_argv, "allow-from");
3503 }
3504
3505 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
3506 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
3507
3508 if(!::arg()["allow-from-file"].empty()) {
3509 string line;
3510 ifstream ifs(::arg()["allow-from-file"].c_str());
3511 if(!ifs) {
3512 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
3513 }
3514
3515 string::size_type pos;
3516 while(getline(ifs,line)) {
3517 pos=line.find('#');
3518 if(pos!=string::npos)
3519 line.resize(pos);
3520 trim(line);
3521 if(line.empty())
3522 continue;
3523
3524 allowFrom->addMask(line);
3525 }
3526 g_log<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
3527 }
3528 else if(!::arg()["allow-from"].empty()) {
3529 vector<string> ips;
3530 stringtok(ips, ::arg()["allow-from"], ", ");
3531
3532 g_log<<Logger::Warning<<"Only allowing queries from: ";
3533 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
3534 allowFrom->addMask(*i);
3535 if(i!=ips.begin())
3536 g_log<<Logger::Warning<<", ";
3537 g_log<<Logger::Warning<<*i;
3538 }
3539 g_log<<Logger::Warning<<endl;
3540 }
3541 else {
3542 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
3543 g_log<<Logger::Warning<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
3544 allowFrom = nullptr;
3545 }
3546
3547 g_initialAllowFrom = allowFrom;
3548 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
3549 oldAllowFrom = nullptr;
3550
3551 l_initialized = true;
3552 }
3553
3554
3555 static void setupDelegationOnly()
3556 {
3557 vector<string> parts;
3558 stringtok(parts, ::arg()["delegation-only"], ", \t");
3559 for(const auto& p : parts) {
3560 SyncRes::addDelegationOnly(DNSName(p));
3561 }
3562 }
3563
3564 static std::map<unsigned int, std::set<int> > parseCPUMap()
3565 {
3566 std::map<unsigned int, std::set<int> > result;
3567
3568 const std::string value = ::arg()["cpu-map"];
3569
3570 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
3571 g_log<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
3572 return result;
3573 }
3574
3575 std::vector<std::string> parts;
3576
3577 stringtok(parts, value, " \t");
3578
3579 for(const auto& part : parts) {
3580 if (part.find('=') == string::npos)
3581 continue;
3582
3583 try {
3584 auto headers = splitField(part, '=');
3585 trim(headers.first);
3586 trim(headers.second);
3587
3588 unsigned int threadId = pdns_stou(headers.first);
3589 std::vector<std::string> cpus;
3590
3591 stringtok(cpus, headers.second, ",");
3592
3593 for(const auto& cpu : cpus) {
3594 int cpuId = std::stoi(cpu);
3595
3596 result[threadId].insert(cpuId);
3597 }
3598 }
3599 catch(const std::exception& e) {
3600 g_log<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
3601 }
3602 }
3603
3604 return result;
3605 }
3606
3607 static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
3608 {
3609 const auto& cpuMapping = cpusMap.find(n);
3610 if (cpuMapping != cpusMap.cend()) {
3611 int rc = mapThreadToCPUList(tid, cpuMapping->second);
3612 if (rc == 0) {
3613 g_log<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
3614 for (const auto cpu : cpuMapping->second) {
3615 g_log<<Logger::Info<<" "<<cpu;
3616 }
3617 g_log<<Logger::Info<<endl;
3618 }
3619 else {
3620 g_log<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
3621 for (const auto cpu : cpuMapping->second) {
3622 g_log<<Logger::Info<<" "<<cpu;
3623 }
3624 g_log<<Logger::Info<<strerror(rc)<<endl;
3625 }
3626 }
3627 }
3628
3629 #ifdef NOD_ENABLED
3630 static void setupNODThread()
3631 {
3632 if (g_nodEnabled) {
3633 uint32_t num_cells = ::arg().asNum("new-domain-db-size");
3634 t_nodDBp = std::make_shared<nod::NODDB>(num_cells);
3635 try {
3636 t_nodDBp->setCacheDir(::arg()["new-domain-history-dir"]);
3637 }
3638 catch (const PDNSException& e) {
3639 g_log<<Logger::Error<<"new-domain-history-dir (" << ::arg()["new-domain-history-dir"] << ") is not readable or does not exist"<<endl;
3640 _exit(1);
3641 }
3642 if (!t_nodDBp->init()) {
3643 g_log<<Logger::Error<<"Could not initialize domain tracking"<<endl;
3644 _exit(1);
3645 }
3646 std::thread t(nod::NODDB::startHousekeepingThread, t_nodDBp, std::this_thread::get_id());
3647 t.detach();
3648 g_nod_pbtag = ::arg()["new-domain-pb-tag"];
3649 }
3650 if (g_udrEnabled) {
3651 uint32_t num_cells = ::arg().asNum("unique-response-db-size");
3652 t_udrDBp = std::make_shared<nod::UniqueResponseDB>(num_cells);
3653 try {
3654 t_udrDBp->setCacheDir(::arg()["unique-response-history-dir"]);
3655 }
3656 catch (const PDNSException& e) {
3657 g_log<<Logger::Error<<"unique-response-history-dir (" << ::arg()["unique-response-history-dir"] << ") is not readable or does not exist"<<endl;
3658 _exit(1);
3659 }
3660 if (!t_udrDBp->init()) {
3661 g_log<<Logger::Error<<"Could not initialize unique response tracking"<<endl;
3662 _exit(1);
3663 }
3664 std::thread t(nod::UniqueResponseDB::startHousekeepingThread, t_udrDBp, std::this_thread::get_id());
3665 t.detach();
3666 g_udr_pbtag = ::arg()["unique-response-pb-tag"];
3667 }
3668 }
3669
3670 void parseNODWhitelist(const std::string& wlist)
3671 {
3672 vector<string> parts;
3673 stringtok(parts, wlist, ",; ");
3674 for(const auto& a : parts) {
3675 g_nodDomainWL.add(DNSName(a));
3676 }
3677 }
3678
3679 static void setupNODGlobal()
3680 {
3681 // Setup NOD subsystem
3682 g_nodEnabled = ::arg().mustDo("new-domain-tracking");
3683 g_nodLookupDomain = DNSName(::arg()["new-domain-lookup"]);
3684 g_nodLog = ::arg().mustDo("new-domain-log");
3685 parseNODWhitelist(::arg()["new-domain-whitelist"]);
3686
3687 // Setup Unique DNS Response subsystem
3688 g_udrEnabled = ::arg().mustDo("unique-response-tracking");
3689 g_udrLog = ::arg().mustDo("unique-response-log");
3690 }
3691 #endif /* NOD_ENABLED */
3692
3693 static int serviceMain(int argc, char*argv[])
3694 {
3695 g_log.setName(s_programname);
3696 g_log.disableSyslog(::arg().mustDo("disable-syslog"));
3697 g_log.setTimestamps(::arg().mustDo("log-timestamp"));
3698
3699 if(!::arg()["logging-facility"].empty()) {
3700 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
3701 if(val >= 0)
3702 g_log.setFacility(val);
3703 else
3704 g_log<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
3705 }
3706
3707 showProductVersion();
3708
3709 g_disthashseed=dns_random(0xffffffff);
3710
3711 checkLinuxIPv6Limits();
3712 try {
3713 vector<string> addrs;
3714 if(!::arg()["query-local-address6"].empty()) {
3715 SyncRes::s_doIPv6=true;
3716 g_log<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
3717
3718 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
3719 for(const string& addr : addrs) {
3720 g_localQueryAddresses6.push_back(ComboAddress(addr));
3721 }
3722 }
3723 else {
3724 g_log<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
3725 }
3726 addrs.clear();
3727 stringtok(addrs, ::arg()["query-local-address"], ", ;");
3728 for(const string& addr : addrs) {
3729 g_localQueryAddresses4.push_back(ComboAddress(addr));
3730 }
3731 }
3732 catch(std::exception& e) {
3733 g_log<<Logger::Error<<"Assigning local query addresses: "<<e.what();
3734 exit(99);
3735 }
3736
3737 // keep this ABOVE loadRecursorLuaConfig!
3738 if(::arg()["dnssec"]=="off")
3739 g_dnssecmode=DNSSECMode::Off;
3740 else if(::arg()["dnssec"]=="process-no-validate")
3741 g_dnssecmode=DNSSECMode::ProcessNoValidate;
3742 else if(::arg()["dnssec"]=="process")
3743 g_dnssecmode=DNSSECMode::Process;
3744 else if(::arg()["dnssec"]=="validate")
3745 g_dnssecmode=DNSSECMode::ValidateAll;
3746 else if(::arg()["dnssec"]=="log-fail")
3747 g_dnssecmode=DNSSECMode::ValidateForLog;
3748 else {
3749 g_log<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
3750 exit(1);
3751 }
3752
3753 g_signatureInceptionSkew = ::arg().asNum("signature-inception-skew");
3754 if (g_signatureInceptionSkew < 0) {
3755 g_log<<Logger::Error<<"A negative value for 'signature-inception-skew' is not allowed"<<endl;
3756 exit(1);
3757 }
3758
3759 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
3760 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
3761
3762 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
3763 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
3764
3765 luaConfigDelayedThreads delayedLuaThreads;
3766 try {
3767 loadRecursorLuaConfig(::arg()["lua-config-file"], delayedLuaThreads);
3768 }
3769 catch (PDNSException &e) {
3770 g_log<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
3771 exit(1);
3772 }
3773
3774 parseACLs();
3775 initPublicSuffixList(::arg()["public-suffix-list-file"]);
3776
3777 if(!::arg()["dont-query"].empty()) {
3778 vector<string> ips;
3779 stringtok(ips, ::arg()["dont-query"], ", ");
3780 ips.push_back("0.0.0.0");
3781 ips.push_back("::");
3782
3783 g_log<<Logger::Warning<<"Will not send queries to: ";
3784 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
3785 SyncRes::addDontQuery(*i);
3786 if(i!=ips.begin())
3787 g_log<<Logger::Warning<<", ";
3788 g_log<<Logger::Warning<<*i;
3789 }
3790 g_log<<Logger::Warning<<endl;
3791 }
3792
3793 g_quiet=::arg().mustDo("quiet");
3794
3795 /* this needs to be done before parseACLs(), which call broadcastFunction() */
3796 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
3797 if(g_weDistributeQueries) {
3798 g_log<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
3799 }
3800
3801 setupDelegationOnly();
3802 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
3803
3804 if(::arg()["trace"]=="fail") {
3805 SyncRes::setDefaultLogMode(SyncRes::Store);
3806 }
3807 else if(::arg().mustDo("trace")) {
3808 SyncRes::setDefaultLogMode(SyncRes::Log);
3809 ::arg().set("quiet")="no";
3810 g_quiet=false;
3811 g_dnssecLOG=true;
3812 }
3813 string myHostname = getHostname();
3814 if (myHostname == "UNKNOWN"){
3815 g_log<<Logger::Warning<<"Unable to get the hostname, NSID and id.server values will be empty"<<endl;
3816 myHostname = "";
3817 }
3818
3819 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
3820 SyncRes::s_minimumECSTTL = ::arg().asNum("ecs-minimum-ttl-override");
3821
3822 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
3823
3824 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
3825 SyncRes::s_maxbogusttl=::arg().asNum("max-cache-bogus-ttl");
3826 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
3827 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
3828 // Cap the packetcache-servfail-ttl to the packetcache-ttl
3829 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
3830 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
3831 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
3832 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
3833 SyncRes::s_serverID=::arg()["server-id"];
3834 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
3835 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
3836 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
3837 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
3838 if(SyncRes::s_serverID.empty()) {
3839 SyncRes::s_serverID = myHostname;
3840 }
3841
3842 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
3843 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
3844 SyncRes::clearECSStats();
3845 SyncRes::s_ecsipv4cachelimit = ::arg().asNum("ecs-ipv4-cache-bits");
3846 SyncRes::s_ecsipv6cachelimit = ::arg().asNum("ecs-ipv6-cache-bits");
3847 SyncRes::s_ecscachelimitttl = ::arg().asNum("ecs-cache-limit-ttl");
3848
3849 SyncRes::s_qnameminimization = ::arg().mustDo("qname-minimization");
3850
3851 if (!::arg().isEmpty("ecs-scope-zero-address")) {
3852 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
3853 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
3854 }
3855 else {
3856 bool found = false;
3857 for (const auto& addr : g_localQueryAddresses4) {
3858 if (!IsAnyAddress(addr)) {
3859 SyncRes::setECSScopeZeroAddress(Netmask(addr, 32));
3860 found = true;
3861 break;
3862 }
3863 }
3864 if (!found) {
3865 for (const auto& addr : g_localQueryAddresses6) {
3866 if (!IsAnyAddress(addr)) {
3867 SyncRes::setECSScopeZeroAddress(Netmask(addr, 128));
3868 found = true;
3869 break;
3870 }
3871 }
3872 if (!found) {
3873 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
3874 }
3875 }
3876 }
3877
3878 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
3879 SyncRes::parseEDNSSubnetAddFor(::arg()["ecs-add-for"]);
3880 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
3881
3882 g_XPFAcl.toMasks(::arg()["xpf-allow-from"]);
3883 g_xpfRRCode = ::arg().asNum("xpf-rr-code");
3884
3885 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
3886
3887 g_initialDomainMap = parseAuthAndForwards();
3888
3889 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3890
3891 g_logCommonErrors=::arg().mustDo("log-common-errors");
3892 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
3893
3894 g_anyToTcp = ::arg().mustDo("any-to-tcp");
3895 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
3896
3897 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
3898
3899 g_numDistributorThreads = ::arg().asNum("distributor-threads");
3900 g_numWorkerThreads = ::arg().asNum("threads");
3901 if (g_numWorkerThreads < 1) {
3902 g_log<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
3903 g_numWorkerThreads = 1;
3904 }
3905
3906 g_numThreads = g_numDistributorThreads + g_numWorkerThreads;
3907 g_maxMThreads = ::arg().asNum("max-mthreads");
3908
3909 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
3910
3911 g_statisticsInterval = ::arg().asNum("statistics-interval");
3912
3913 {
3914 SuffixMatchNode dontThrottleNames;
3915 vector<string> parts;
3916 stringtok(parts, ::arg()["dont-throttle-names"]);
3917 for (const auto &p : parts) {
3918 dontThrottleNames.add(DNSName(p));
3919 }
3920 g_dontThrottleNames.setState(dontThrottleNames);
3921
3922 NetmaskGroup dontThrottleNetmasks;
3923 stringtok(parts, ::arg()["dont-throttle-netmasks"]);
3924 for (const auto &p : parts) {
3925 dontThrottleNetmasks.addMask(Netmask(p));
3926 }
3927 g_dontThrottleNetmasks.setState(dontThrottleNetmasks);
3928 }
3929
3930 s_balancingFactor = ::arg().asDouble("distribution-load-factor");
3931 if (s_balancingFactor != 0.0 && s_balancingFactor < 1.0) {
3932 s_balancingFactor = 0.0;
3933 g_log<<Logger::Warning<<"Asked to run with a distribution-load-factor below 1.0, disabling it instead"<<endl;
3934 }
3935
3936 #ifdef SO_REUSEPORT
3937 g_reusePort = ::arg().mustDo("reuseport");
3938 #endif
3939
3940 s_threadInfos.resize(g_numDistributorThreads + g_numWorkerThreads + /* handler */ 1);
3941
3942 if (g_reusePort) {
3943 if (g_weDistributeQueries) {
3944 /* first thread is the handler, then distributors */
3945 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3946 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
3947 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
3948 makeUDPServerSockets(deferredAdds);
3949 makeTCPServerSockets(deferredAdds, tcpSockets);
3950 }
3951 }
3952 else {
3953 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3954 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3955 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
3956 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
3957 makeUDPServerSockets(deferredAdds);
3958 makeTCPServerSockets(deferredAdds, tcpSockets);
3959 }
3960 }
3961 }
3962 else {
3963 std::set<int> tcpSockets;
3964 /* we don't have reuseport so we can only open one socket per
3965 listening addr:port and everyone will listen on it */
3966 makeUDPServerSockets(g_deferredAdds);
3967 makeTCPServerSockets(g_deferredAdds, tcpSockets);
3968
3969 /* every listener (so distributor if g_weDistributeQueries, workers otherwise)
3970 needs to listen to the shared sockets */
3971 if (g_weDistributeQueries) {
3972 /* first thread is the handler, then distributors */
3973 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3974 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3975 }
3976 }
3977 else {
3978 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3979 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3980 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3981 }
3982 }
3983 }
3984
3985 #ifdef NOD_ENABLED
3986 // Setup newly observed domain globals
3987 setupNODGlobal();
3988 #endif /* NOD_ENABLED */
3989
3990 int forks;
3991 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
3992 if(!fork()) // we are child
3993 break;
3994 }
3995
3996 if(::arg().mustDo("daemon")) {
3997 g_log<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
3998 g_log.toConsole(Logger::Critical);
3999 daemonize();
4000 }
4001 signal(SIGUSR1,usr1Handler);
4002 signal(SIGUSR2,usr2Handler);
4003 signal(SIGPIPE,SIG_IGN);
4004
4005 checkOrFixFDS();
4006
4007 #ifdef HAVE_LIBSODIUM
4008 if (sodium_init() == -1) {
4009 g_log<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
4010 exit(99);
4011 }
4012 #endif
4013
4014 openssl_thread_setup();
4015 openssl_seed();
4016 /* setup rng before chroot */
4017 dns_random_init();
4018
4019 if(::arg()["server-id"].empty()) {
4020 ::arg().set("server-id") = myHostname;
4021 }
4022
4023 int newgid=0;
4024 if(!::arg()["setgid"].empty())
4025 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
4026 int newuid=0;
4027 if(!::arg()["setuid"].empty())
4028 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
4029
4030 Utility::dropGroupPrivs(newuid, newgid);
4031
4032 if (!::arg()["chroot"].empty()) {
4033 #ifdef HAVE_SYSTEMD
4034 char *ns;
4035 ns = getenv("NOTIFY_SOCKET");
4036 if (ns != nullptr) {
4037 g_log<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
4038 exit(1);
4039 }
4040 #endif
4041 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
4042 g_log<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
4043 exit(1);
4044 }
4045 else
4046 g_log<<Logger::Info<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
4047 }
4048
4049 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
4050 if(!s_pidfname.empty())
4051 unlink(s_pidfname.c_str()); // remove possible old pid file
4052 writePid();
4053
4054 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
4055
4056 Utility::dropUserPrivs(newuid);
4057 try {
4058 /* we might still have capabilities remaining, for example if we have been started as root
4059 without --setuid (please don't do that) or as an unprivileged user with ambient capabilities
4060 like CAP_NET_BIND_SERVICE.
4061 */
4062 dropCapabilities();
4063 }
4064 catch(const std::exception& e) {
4065 g_log<<Logger::Warning<<e.what()<<endl;
4066 }
4067
4068 startLuaConfigDelayedThreads(delayedLuaThreads, g_luaconfs.getCopy().generation);
4069
4070 makeThreadPipes();
4071
4072 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
4073 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
4074 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
4075 s_maxUDPQueriesPerRound=::arg().asNum("max-udp-queries-per-round");
4076
4077 g_useKernelTimestamp = ::arg().mustDo("protobuf-use-kernel-timestamp");
4078
4079 blacklistStats(StatComponent::API, ::arg()["stats-api-blacklist"]);
4080 blacklistStats(StatComponent::Carbon, ::arg()["stats-carbon-blacklist"]);
4081 blacklistStats(StatComponent::RecControl, ::arg()["stats-rec-control-blacklist"]);
4082 blacklistStats(StatComponent::SNMP, ::arg()["stats-snmp-blacklist"]);
4083
4084 if (::arg().mustDo("snmp-agent")) {
4085 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
4086 g_snmpAgent->run();
4087 }
4088
4089 int port = ::arg().asNum("udp-source-port-min");
4090 if(port < 1024 || port > 65535){
4091 g_log<<Logger::Error<<"Unable to launch, udp-source-port-min is not a valid port number"<<endl;
4092 exit(99); // this isn't going to fix itself either
4093 }
4094 s_minUdpSourcePort = port;
4095 port = ::arg().asNum("udp-source-port-max");
4096 if(port < 1024 || port > 65535 || port < s_minUdpSourcePort){
4097 g_log<<Logger::Error<<"Unable to launch, udp-source-port-max is not a valid port number or is smaller than udp-source-port-min"<<endl;
4098 exit(99); // this isn't going to fix itself either
4099 }
4100 s_maxUdpSourcePort = port;
4101 std::vector<string> parts {};
4102 stringtok(parts, ::arg()["udp-source-port-avoid"], ", ");
4103 for (const auto &part : parts)
4104 {
4105 port = std::stoi(part);
4106 if(port < 1024 || port > 65535){
4107 g_log<<Logger::Error<<"Unable to launch, udp-source-port-avoid contains an invalid port number: "<<part<<endl;
4108 exit(99); // this isn't going to fix itself either
4109 }
4110 s_avoidUdpSourcePorts.insert(port);
4111 }
4112
4113 unsigned int currentThreadId = 1;
4114 const auto cpusMap = parseCPUMap();
4115
4116 if(g_numThreads == 1) {
4117 g_log<<Logger::Warning<<"Operating unthreaded"<<endl;
4118 #ifdef HAVE_SYSTEMD
4119 sd_notify(0, "READY=1");
4120 #endif
4121
4122 /* This thread handles the web server, carbon, statistics and the control channel */
4123 auto& handlerInfos = s_threadInfos.at(0);
4124 handlerInfos.isHandler = true;
4125 handlerInfos.thread = std::thread(recursorThread, 0, "main");
4126
4127 setCPUMap(cpusMap, currentThreadId, pthread_self());
4128
4129 auto& infos = s_threadInfos.at(currentThreadId);
4130 infos.isListener = true;
4131 infos.isWorker = true;
4132 recursorThread(currentThreadId++, "worker");
4133 }
4134 else {
4135
4136 if (g_weDistributeQueries) {
4137 g_log<<Logger::Warning<<"Launching "<< g_numDistributorThreads <<" distributor threads"<<endl;
4138 for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
4139 auto& infos = s_threadInfos.at(currentThreadId);
4140 infos.isListener = true;
4141 infos.thread = std::thread(recursorThread, currentThreadId++, "distr");
4142
4143 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
4144 }
4145 }
4146
4147 g_log<<Logger::Warning<<"Launching "<< g_numWorkerThreads <<" worker threads"<<endl;
4148
4149 for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
4150 auto& infos = s_threadInfos.at(currentThreadId);
4151 infos.isListener = g_weDistributeQueries ? false : true;
4152 infos.isWorker = true;
4153 infos.thread = std::thread(recursorThread, currentThreadId++, "worker");
4154
4155 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
4156 }
4157
4158 #ifdef HAVE_SYSTEMD
4159 sd_notify(0, "READY=1");
4160 #endif
4161
4162 /* This thread handles the web server, carbon, statistics and the control channel */
4163 auto& infos = s_threadInfos.at(0);
4164 infos.isHandler = true;
4165 infos.thread = std::thread(recursorThread, 0, "web+stat");
4166
4167 s_threadInfos.at(0).thread.join();
4168 }
4169 return 0;
4170 }
4171
4172 static void* recursorThread(unsigned int n, const string& threadName)
4173 try
4174 {
4175 t_id=n;
4176 auto& threadInfo = s_threadInfos.at(t_id);
4177
4178 static string threadPrefix = "pdns-r/";
4179 setThreadName(threadPrefix + threadName);
4180
4181 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
4182 SyncRes::setDomainMap(g_initialDomainMap);
4183 t_allowFrom = g_initialAllowFrom;
4184 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
4185 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
4186 primeHints();
4187
4188 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
4189
4190 g_log<<Logger::Warning<<"Done priming cache with root hints"<<endl;
4191
4192 #ifdef NOD_ENABLED
4193 if (threadInfo.isWorker)
4194 setupNODThread();
4195 #endif /* NOD_ENABLED */
4196
4197 /* the listener threads handle TCP queries */
4198 if(threadInfo.isWorker || threadInfo.isListener) {
4199 try {
4200 if(!::arg()["lua-dns-script"].empty()) {
4201 t_pdl = std::make_shared<RecursorLua4>();
4202 t_pdl->loadFile(::arg()["lua-dns-script"]);
4203 g_log<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
4204 }
4205 }
4206 catch(std::exception &e) {
4207 g_log<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
4208 _exit(99);
4209 }
4210 }
4211
4212 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
4213 if(ringsize) {
4214 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4215 if(g_weDistributeQueries)
4216 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries") / g_numDistributorThreads);
4217 else
4218 t_remotes->set_capacity(ringsize);
4219 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4220 t_servfailremotes->set_capacity(ringsize);
4221 t_bogusremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4222 t_bogusremotes->set_capacity(ringsize);
4223 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4224 t_largeanswerremotes->set_capacity(ringsize);
4225 t_timeouts = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4226 t_timeouts->set_capacity(ringsize);
4227
4228 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4229 t_queryring->set_capacity(ringsize);
4230 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4231 t_servfailqueryring->set_capacity(ringsize);
4232 t_bogusqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4233 t_bogusqueryring->set_capacity(ringsize);
4234 }
4235
4236 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
4237 threadInfo.mt = MT.get();
4238
4239 #ifdef HAVE_PROTOBUF
4240 /* start protobuf export threads if needed */
4241 auto luaconfsLocal = g_luaconfs.getLocal();
4242 checkProtobufExport(luaconfsLocal);
4243 checkOutgoingProtobufExport(luaconfsLocal);
4244 #endif /* HAVE_PROTOBUF */
4245 #ifdef HAVE_FSTRM
4246 checkFrameStreamExport(luaconfsLocal);
4247 #endif
4248
4249 PacketID pident;
4250
4251 t_fdm=getMultiplexer();
4252
4253 if(threadInfo.isHandler) {
4254 if(::arg().mustDo("webserver")) {
4255 g_log<<Logger::Warning << "Enabling web server" << endl;
4256 try {
4257 new RecursorWebServer(t_fdm);
4258 }
4259 catch(PDNSException &e) {
4260 g_log<<Logger::Error<<"Exception: "<<e.reason<<endl;
4261 exit(99);
4262 }
4263 }
4264 g_log<<Logger::Info<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
4265 }
4266 else {
4267
4268 t_fdm->addReadFD(threadInfo.pipes.readToThread, handlePipeRequest);
4269 t_fdm->addReadFD(threadInfo.pipes.readQueriesToThread, handlePipeRequest);
4270
4271 if (threadInfo.isListener) {
4272 if (g_reusePort) {
4273 /* then every listener has its own FDs */
4274 for(const auto deferred : threadInfo.deferredAdds) {
4275 t_fdm->addReadFD(deferred.first, deferred.second);
4276 }
4277 }
4278 else {
4279 /* otherwise all listeners are listening on the same ones */
4280 for(const auto deferred : g_deferredAdds) {
4281 t_fdm->addReadFD(deferred.first, deferred.second);
4282 }
4283 }
4284 }
4285 }
4286
4287 registerAllStats();
4288
4289 if(threadInfo.isHandler) {
4290 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
4291 }
4292
4293 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
4294
4295 bool listenOnTCP(true);
4296
4297 time_t last_stat = 0;
4298 time_t last_carbon=0, last_lua_maintenance=0;
4299 time_t carbonInterval=::arg().asNum("carbon-interval");
4300 time_t luaMaintenanceInterval=::arg().asNum("lua-maintenance-interval");
4301 counter.store(0); // used to periodically execute certain tasks
4302 for(;;) {
4303 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
4304
4305 if(!(counter%500)) {
4306 MT->makeThread(houseKeeping, 0);
4307 }
4308
4309 if(!(counter%55)) {
4310 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
4311 expired_t expired=t_fdm->getTimeouts(g_now);
4312
4313 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
4314 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4315 if(g_logCommonErrors)
4316 g_log<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4317 t_fdm->removeReadFD(i->first);
4318 }
4319 }
4320
4321 counter++;
4322
4323 if(threadInfo.isHandler) {
4324 if(statsWanted || (g_statisticsInterval > 0 && (g_now.tv_sec - last_stat) >= g_statisticsInterval)) {
4325 doStats();
4326 last_stat = g_now.tv_sec;
4327 }
4328
4329 Utility::gettimeofday(&g_now, 0);
4330
4331 if((g_now.tv_sec - last_carbon) >= carbonInterval) {
4332 MT->makeThread(doCarbonDump, 0);
4333 last_carbon = g_now.tv_sec;
4334 }
4335 }
4336 if (t_pdl != nullptr) {
4337 // lua-dns-script directive is present, call the maintenance callback if needed
4338 /* remember that the listener threads handle TCP queries */
4339 if (threadInfo.isWorker || threadInfo.isListener) {
4340 // Only on threads processing queries
4341 if(g_now.tv_sec - last_lua_maintenance >= luaMaintenanceInterval) {
4342 t_pdl->maintenance();
4343 last_lua_maintenance = g_now.tv_sec;
4344 }
4345 }
4346 }
4347
4348 t_fdm->run(&g_now);
4349 // 'run' updates g_now for us
4350
4351 if(threadInfo.isListener) {
4352 if(listenOnTCP) {
4353 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
4354 for(const auto fd : threadInfo.tcpSockets) {
4355 t_fdm->removeReadFD(fd);
4356 }
4357 listenOnTCP=false;
4358 }
4359 }
4360 else {
4361 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
4362 for(const auto fd : threadInfo.tcpSockets) {
4363 t_fdm->addReadFD(fd, handleNewTCPQuestion);
4364 }
4365 listenOnTCP=true;
4366 }
4367 }
4368 }
4369 }
4370 }
4371 catch(PDNSException &ae) {
4372 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
4373 return 0;
4374 }
4375 catch(std::exception &e) {
4376 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
4377 return 0;
4378 }
4379 catch(...) {
4380 g_log<<Logger::Error<<"any other exception in main: "<<endl;
4381 return 0;
4382 }
4383
4384
4385 int main(int argc, char **argv)
4386 {
4387 g_argc = argc;
4388 g_argv = argv;
4389 g_stats.startupTime=time(0);
4390 Utility::srandom();
4391 versionSetProduct(ProductRecursor);
4392 reportBasicTypes();
4393 reportOtherTypes();
4394
4395 int ret = EXIT_SUCCESS;
4396
4397 try {
4398 ::arg().set("stack-size","stack size per mthread")="200000";
4399 ::arg().set("soa-minimum-ttl","Don't change")="0";
4400 ::arg().set("no-shuffle","Don't change")="off";
4401 ::arg().set("local-port","port to listen on")="53";
4402 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
4403 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
4404 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
4405 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
4406 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
4407 ::arg().set("signature-inception-skew", "Allow the signature inception to be off by this number of seconds")="60";
4408 ::arg().set("daemon","Operate as a daemon")="no";
4409 ::arg().setSwitch("write-pid","Write a PID file")="yes";
4410 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
4411 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
4412 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
4413 ::arg().set("log-common-errors","If we should log rather common errors")="no";
4414 ::arg().set("chroot","switch to chroot jail")="";
4415 ::arg().set("setgid","If set, change group id to this gid for more security")="";
4416 ::arg().set("setuid","If set, change user id to this uid for more security")="";
4417 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
4418 ::arg().set("threads", "Launch this number of threads")="2";
4419 ::arg().set("distributor-threads", "Launch this number of distributor threads, distributing queries to other threads")="0";
4420 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
4421 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
4422 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
4423 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
4424 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
4425 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
4426 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
4427 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
4428 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
4429 ::arg().set("webserver-loglevel", "Amount of logging in the webserver (none, normal, detailed)") = "normal";
4430 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
4431 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
4432 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
4433 ::arg().set("carbon-namespace", "If set overwrites the first part of the carbon string")="pdns";
4434 ::arg().set("carbon-instance", "If set overwrites the the instance name default")="recursor";
4435
4436 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
4437 ::arg().set("quiet","Suppress logging of questions and answers")="";
4438 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
4439 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
4440 ::arg().set("socket-owner","Owner of socket")="";
4441 ::arg().set("socket-group","Group of socket")="";
4442 ::arg().set("socket-mode", "Permissions for socket")="";
4443
4444 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
4445 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
4446 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
4447 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
4448 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
4449 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
4450 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
4451 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
4452 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
4453 ::arg().set("dont-throttle-names", "Do not throttle nameservers with this name or suffix")="";
4454 ::arg().set("dont-throttle-netmasks", "Do not throttle nameservers with this IP netmask")="";
4455 ::arg().set("hint-file", "If set, load root hints from this file")="";
4456 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
4457 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
4458 ::arg().set("max-cache-bogus-ttl", "maximum number of seconds to keep a Bogus (positive or negative) cached entry in memory")="3600";
4459 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
4460 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
4461 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
4462 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
4463 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname, set custom or 'disabled'")="";
4464 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
4465 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
4466 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
4467 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
4468 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
4469 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4470 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
4471 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
4472 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4473 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
4474 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
4475 ::arg().set("lua-config-file", "More powerful configuration options")="";
4476
4477 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
4478 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
4479 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
4480 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
4481 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
4482 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
4483 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
4484 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
4485 ::arg().set("lua-maintenance-interval", "Number of seconds between calls to the lua user defined maintenance() function")="1";
4486 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
4487 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
4488 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
4489 ::arg().set("ecs-ipv4-cache-bits", "Maximum number of bits of IPv4 mask to cache ECS response")="24";
4490 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
4491 ::arg().set("ecs-ipv6-cache-bits", "Maximum number of bits of IPv6 mask to cache ECS response")="56";
4492 ::arg().set("ecs-minimum-ttl-override", "Set under adverse conditions, a minimum TTL for records in ECS-specific answers")="0";
4493 ::arg().set("ecs-cache-limit-ttl", "Minimum TTL to cache ECS response")="0";
4494 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
4495 ::arg().set("ecs-add-for", "List of client netmasks for which EDNS Client Subnet will be added")="0.0.0.0/0, ::/0, " LOCAL_NETS_INVERSE;
4496 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
4497 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
4498 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
4499 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
4500 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
4501 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
4502 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
4503 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1232";
4504 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1232";
4505 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
4506 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
4507 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
4508 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
4509 ::arg().set("max-udp-queries-per-round", "Maximum number of UDP queries processed per recvmsg() round, before returning back to normal processing")="10000";
4510 ::arg().set("protobuf-use-kernel-timestamp", "Compute the latency of queries in protobuf messages by using the timestamp set by the kernel when the query was received (when available)")="";
4511 ::arg().set("distribution-pipe-buffer-size", "Size in bytes of the internal buffer of the pipe used by the distributor to pass incoming queries to a worker thread")="0";
4512
4513 ::arg().set("include-dir","Include *.conf files from this directory")="";
4514 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
4515
4516 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
4517
4518 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
4519 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
4520
4521 std::string defaultBlacklistedStats = "cache-bytes, packetcache-bytes, special-memory-usage";
4522 for (size_t idx = 0; idx < 32; idx++) {
4523 defaultBlacklistedStats += ", ecs-v4-response-bits-" + std::to_string(idx + 1);
4524 }
4525 for (size_t idx = 0; idx < 128; idx++) {
4526 defaultBlacklistedStats += ", ecs-v6-response-bits-" + std::to_string(idx + 1);
4527 }
4528 ::arg().set("stats-api-blacklist", "List of statistics that are disabled when retrieving the complete list of statistics via the API")=defaultBlacklistedStats;
4529 ::arg().set("stats-carbon-blacklist", "List of statistics that are prevented from being exported via Carbon")=defaultBlacklistedStats;
4530 ::arg().set("stats-rec-control-blacklist", "List of statistics that are prevented from being exported via rec_control get-all")=defaultBlacklistedStats;
4531 ::arg().set("stats-snmp-blacklist", "List of statistics that are prevented from being exported via SNMP")=defaultBlacklistedStats;
4532
4533 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
4534 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
4535
4536 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
4537
4538 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
4539
4540 ::arg().set("xpf-allow-from","XPF information is only processed from these subnets")="";
4541 ::arg().set("xpf-rr-code","XPF option code to use")="0";
4542
4543 ::arg().set("udp-source-port-min", "Minimum UDP port to bind on")="1024";
4544 ::arg().set("udp-source-port-max", "Maximum UDP port to bind on")="65535";
4545 ::arg().set("udp-source-port-avoid", "List of comma separated UDP port number to avoid")="11211";
4546 ::arg().set("rng", "Specify random number generator to use. Valid values are auto,sodium,openssl,getrandom,arc4random,urandom.")="auto";
4547 ::arg().set("public-suffix-list-file", "Path to the Public Suffix List file, if any")="";
4548 ::arg().set("distribution-load-factor", "The load factor used when PowerDNS is distributing queries to worker threads")="0.0";
4549 ::arg().setSwitch("qname-minimization", "Use Query Name Minimization")="no";
4550 #ifdef NOD_ENABLED
4551 ::arg().set("new-domain-tracking", "Track newly observed domains (i.e. never seen before).")="no";
4552 ::arg().set("new-domain-log", "Log newly observed domains.")="yes";
4553 ::arg().set("new-domain-lookup", "Perform a DNS lookup newly observed domains as a subdomain of the configured domain")="";
4554 ::arg().set("new-domain-history-dir", "Persist new domain tracking data here to persist between restarts")=string(NODCACHEDIR)+"/nod";
4555 ::arg().set("new-domain-whitelist", "List of domains (and implicitly all subdomains) which will never be considered a new domain")="";
4556 ::arg().set("new-domain-db-size", "Size of the DB used to track new domains in terms of number of cells. Defaults to 67108864")="67108864";
4557 ::arg().set("new-domain-pb-tag", "If protobuf is configured, the tag to use for messages containing newly observed domains. Defaults to 'pdns-nod'")="pdns-nod";
4558 ::arg().set("unique-response-tracking", "Track unique responses (tuple of query name, type and RR).")="no";
4559 ::arg().set("unique-response-log", "Log unique responses")="yes";
4560 ::arg().set("unique-response-history-dir", "Persist unique response tracking data here to persist between restarts")=string(NODCACHEDIR)+"/udr";
4561 ::arg().set("unique-response-db-size", "Size of the DB used to track unique responses in terms of number of cells. Defaults to 67108864")="67108864";
4562 ::arg().set("unique-response-pb-tag", "If protobuf is configured, the tag to use for messages containing unique DNS responses. Defaults to 'pdns-udr'")="pdns-udr";
4563 #endif /* NOD_ENABLED */
4564 ::arg().setCmd("help","Provide a helpful message");
4565 ::arg().setCmd("version","Print version string");
4566 ::arg().setCmd("config","Output blank configuration");
4567 g_log.toConsole(Logger::Info);
4568 ::arg().laxParse(argc,argv); // do a lax parse
4569
4570 string configname=::arg()["config-dir"]+"/recursor.conf";
4571 if(::arg()["config-name"]!="") {
4572 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
4573 s_programname+="-"+::arg()["config-name"];
4574 }
4575 cleanSlashes(configname);
4576
4577 if(!::arg().getCommands().empty()) {
4578 cerr<<"Fatal: non-option on the command line, perhaps a '--setting=123' statement missed the '='?"<<endl;
4579 exit(99);
4580 }
4581
4582 if(::arg().mustDo("config")) {
4583 cout<<::arg().configstring()<<endl;
4584 exit(0);
4585 }
4586
4587 if(!::arg().file(configname.c_str()))
4588 g_log<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
4589
4590 ::arg().parse(argc,argv);
4591
4592 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() ) {
4593 g_log<<Logger::Error<<"Using chroot and enabling the API is not possible"<<endl;
4594 exit(EXIT_FAILURE);
4595 }
4596
4597 if (::arg()["socket-dir"].empty()) {
4598 if (::arg()["chroot"].empty())
4599 ::arg().set("socket-dir") = LOCALSTATEDIR;
4600 else
4601 ::arg().set("socket-dir") = "/";
4602 }
4603
4604 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
4605
4606 if(::arg().asNum("threads")==1) {
4607 if (::arg().mustDo("pdns-distributes-queries")) {
4608 g_log<<Logger::Warning<<"Only one thread, no need to distribute queries ourselves"<<endl;
4609 ::arg().set("pdns-distributes-queries")="no";
4610 }
4611 }
4612
4613 if(::arg().mustDo("pdns-distributes-queries") && ::arg().asNum("distributor-threads") <= 0) {
4614 g_log<<Logger::Warning<<"Asked to run with pdns-distributes-queries set but no distributor threads, raising to 1"<<endl;
4615 ::arg().set("distributor-threads")="1";
4616 }
4617
4618 if (!::arg().mustDo("pdns-distributes-queries")) {
4619 ::arg().set("distributor-threads")="0";
4620 }
4621
4622 if(::arg().mustDo("help")) {
4623 cout<<"syntax:"<<endl<<endl;
4624 cout<<::arg().helpstring(::arg()["help"])<<endl;
4625 exit(0);
4626 }
4627 if(::arg().mustDo("version")) {
4628 showProductVersion();
4629 showBuildConfiguration();
4630 exit(0);
4631 }
4632
4633 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
4634
4635 if (logUrgency < Logger::Error)
4636 logUrgency = Logger::Error;
4637 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
4638 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
4639 }
4640 g_log.setLoglevel(logUrgency);
4641 g_log.toConsole(logUrgency);
4642
4643 serviceMain(argc, argv);
4644 }
4645 catch(PDNSException &ae) {
4646 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
4647 ret=EXIT_FAILURE;
4648 }
4649 catch(std::exception &e) {
4650 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
4651 ret=EXIT_FAILURE;
4652 }
4653 catch(...) {
4654 g_log<<Logger::Error<<"any other exception in main: "<<endl;
4655 ret=EXIT_FAILURE;
4656 }
4657
4658 return ret;
4659 }