]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/pdns_recursor.cc
wip2
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25
26 #include <netdb.h>
27 #include <sys/stat.h>
28 #include <unistd.h>
29 #ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
30 #include <boost/container/flat_set.hpp>
31 #endif
32 #include "ws-recursor.hh"
33 #include <thread>
34 #include "threadname.hh"
35 #include "recpacketcache.hh"
36 #include "utility.hh"
37 #include "dns_random.hh"
38 #ifdef HAVE_LIBSODIUM
39 #include <sodium.h>
40 #endif
41 #include "opensslsigners.hh"
42 #include <iostream>
43 #include <errno.h>
44 #include <boost/static_assert.hpp>
45 #include <map>
46 #include <set>
47 #include "recursor_cache.hh"
48 #include "cachecleaner.hh"
49 #include <stdio.h>
50 #include <signal.h>
51 #include <stdlib.h>
52 #include "misc.hh"
53 #include "mtasker.hh"
54 #include <utility>
55 #include "arguments.hh"
56 #include "syncres.hh"
57 #include <fcntl.h>
58 #include <fstream>
59 #include "sortlist.hh"
60 #include "sstuff.hh"
61 #include <boost/tuple/tuple.hpp>
62 #include <boost/tuple/tuple_comparison.hpp>
63 #include <boost/shared_array.hpp>
64 #include <boost/function.hpp>
65 #include <boost/algorithm/string.hpp>
66 #ifdef MALLOC_TRACE
67 #include "malloctrace.hh"
68 #endif
69 #include <netinet/tcp.h>
70 #include "capabilities.hh"
71 #include "dnsparser.hh"
72 #include "dnswriter.hh"
73 #include "dnsrecords.hh"
74 #include "zoneparser-tng.hh"
75 #include "rec_channel.hh"
76 #include "logger.hh"
77 #include "iputils.hh"
78 #include "mplexer.hh"
79 #include "config.h"
80 #include "lua-recursor4.hh"
81 #include "version.hh"
82 #include "responsestats.hh"
83 #include "secpoll-recursor.hh"
84 #include "dnsname.hh"
85 #include "filterpo.hh"
86 #include "rpzloader.hh"
87 #include "validate-recursor.hh"
88 #include "rec-lua-conf.hh"
89 #include "ednsoptions.hh"
90 #include "gettime.hh"
91 #include "pubsuffix.hh"
92 #ifdef NOD_ENABLED
93 #include "nod.hh"
94 #endif /* NOD_ENABLED */
95
96 #include "rec-protobuf.hh"
97 #include "rec-snmp.hh"
98
99 #ifdef HAVE_SYSTEMD
100 #include <systemd/sd-daemon.h>
101 #endif
102
103 #include "namespaces.hh"
104
105 #ifdef HAVE_PROTOBUF
106 #include "uuid-utils.hh"
107 #endif /* HAVE_PROTOBUF */
108
109 #include "xpf.hh"
110
111 typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
112
113 static thread_local std::shared_ptr<RecursorLua4> t_pdl;
114 static thread_local unsigned int t_id = 0;
115 static thread_local std::shared_ptr<Regex> t_traceRegex;
116 static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
117 #ifdef HAVE_PROTOBUF
118 static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_protobufServers{nullptr};
119 static thread_local uint64_t t_protobufServersGeneration;
120 static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_outgoingProtobufServers{nullptr};
121 static thread_local uint64_t t_outgoingProtobufServersGeneration;
122 #endif /* HAVE_PROTOBUF */
123
124 #ifdef HAVE_FSTRM
125 static thread_local std::shared_ptr<std::vector<std::unique_ptr<FrameStreamLogger>>> t_frameStreamServers{nullptr};
126 static thread_local uint64_t t_frameStreamServersGeneration;
127 #endif /* HAVE_FSTRM */
128
129 thread_local std::unique_ptr<MT_t> MT; // the big MTasker
130 std::unique_ptr<MemRecursorCache> s_RC = std::unique_ptr<MemRecursorCache>(new MemRecursorCache());
131
132
133 thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
134 thread_local FDMultiplexer* t_fdm{nullptr};
135 thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes, t_bogusremotes;
136 thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring, t_bogusqueryring;
137 thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
138 #ifdef NOD_ENABLED
139 thread_local std::shared_ptr<nod::NODDB> t_nodDBp;
140 thread_local std::shared_ptr<nod::UniqueResponseDB> t_udrDBp;
141 #endif /* NOD_ENABLED */
142 __thread struct timeval g_now; // timestamp, updated (too) frequently
143
144 typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
145
146 // for communicating with our threads
147 // effectively readonly after startup
148 struct RecThreadInfo
149 {
150 struct ThreadPipeSet
151 {
152 int writeToThread{-1};
153 int readToThread{-1};
154 int writeFromThread{-1};
155 int readFromThread{-1};
156 int writeQueriesToThread{-1}; // this one is non-blocking
157 int readQueriesToThread{-1};
158 };
159
160 /* FD corresponding to TCP sockets this thread is listening
161 on.
162 These FDs are also in deferredAdds when we have one
163 socket per listener, and in g_deferredAdds instead. */
164 std::set<int> tcpSockets;
165 /* FD corresponding to listening sockets if we have one socket per
166 listener (with reuseport), otherwise all listeners share the
167 same FD and g_deferredAdds is then used instead */
168 deferredAdd_t deferredAdds;
169 struct ThreadPipeSet pipes;
170 std::thread thread;
171 MT_t* mt{nullptr};
172 uint64_t numberOfDistributedQueries{0};
173 /* handle the web server, carbon, statistics and the control channel */
174 bool isHandler{false};
175 /* accept incoming queries (and distributes them to the workers if pdns-distributes-queries is set) */
176 bool isListener{false};
177 /* process queries */
178 bool isWorker{false};
179 };
180
181 /* first we have the handler thread, t_id == 0 (some other
182 helper threads like SNMP might have t_id == 0 as well)
183 then the distributor threads if any
184 and finally the workers */
185 static std::vector<RecThreadInfo> s_threadInfos;
186 /* without reuseport, all listeners share the same sockets */
187 static deferredAdd_t g_deferredAdds;
188
189 typedef vector<int> tcpListenSockets_t;
190 typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
191
192 static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
193 static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
194 static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
195 static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
196 static AtomicCounter counter;
197 static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
198 static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
199 static NetmaskGroup g_XPFAcl;
200 static size_t g_tcpMaxQueriesPerConn;
201 static size_t s_maxUDPQueriesPerRound;
202 static uint64_t g_latencyStatSize;
203 static uint32_t g_disthashseed;
204 static unsigned int g_maxTCPPerClient;
205 static unsigned int g_maxMThreads;
206 static unsigned int g_numDistributorThreads;
207 static unsigned int g_numWorkerThreads;
208 static int g_tcpTimeout;
209 static uint16_t g_udpTruncationThreshold;
210 static uint16_t g_xpfRRCode{0};
211 static std::atomic<bool> statsWanted;
212 static std::atomic<bool> g_quiet;
213 static bool g_logCommonErrors;
214 static bool g_anyToTcp;
215 static bool g_weDistributeQueries; // if true, 1 or more threads listen on the incoming query sockets and distribute them to workers
216 static bool g_reusePort{false};
217 static bool g_gettagNeedsEDNSOptions{false};
218 static time_t g_statisticsInterval;
219 static bool g_useIncomingECS;
220 static bool g_useKernelTimestamp;
221 std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
222 #ifdef NOD_ENABLED
223 static bool g_nodEnabled;
224 static DNSName g_nodLookupDomain;
225 static bool g_nodLog;
226 static SuffixMatchNode g_nodDomainWL;
227 static std::string g_nod_pbtag;
228 static bool g_udrEnabled;
229 static bool g_udrLog;
230 static std::string g_udr_pbtag;
231 #endif /* NOD_ENABLED */
232 #ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
233 static boost::container::flat_set<uint16_t> s_avoidUdpSourcePorts;
234 #else
235 static std::set<uint16_t> s_avoidUdpSourcePorts;
236 #endif
237 static uint16_t s_minUdpSourcePort;
238 static uint16_t s_maxUdpSourcePort;
239 static double s_balancingFactor;
240
241 RecursorControlChannel s_rcc; // only active in the handler thread
242 RecursorStats g_stats;
243 string s_programname="pdns_recursor";
244 string s_pidfname;
245 bool g_lowercaseOutgoing;
246 unsigned int g_networkTimeoutMsec;
247 unsigned int g_numThreads;
248 uint16_t g_outgoingEDNSBufsize;
249 bool g_logRPZChanges{false};
250
251 // Used in the Syncres to not throttle certain servers
252 GlobalStateHolder<SuffixMatchNode> g_dontThrottleNames;
253 GlobalStateHolder<NetmaskGroup> g_dontThrottleNetmasks;
254
255 #define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
256 #define LOCAL_NETS_INVERSE "!127.0.0.0/8, !10.0.0.0/8, !100.64.0.0/10, !169.254.0.0/16, !192.168.0.0/16, !172.16.0.0/12, !::1/128, !fc00::/7, !fe80::/10"
257 // Bad Nets taken from both:
258 // http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
259 // and
260 // http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
261 // where such a network may not be considered a valid destination
262 #define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
263 #define DONT_QUERY LOCAL_NETS ", " BAD_NETS
264
265 //! used to send information to a newborn mthread
266 struct DNSComboWriter {
267 DNSComboWriter(const std::string& query, const struct timeval& now): d_mdp(true, query), d_now(now), d_query(query)
268 {
269 }
270
271 DNSComboWriter(const std::string& query, const struct timeval& now, std::vector<std::string>&& policyTags, LuaContext::LuaObject&& data, std::vector<DNSRecord>&& records): d_mdp(true, query), d_now(now), d_query(query), d_policyTags(std::move(policyTags)), d_records(std::move(records)), d_data(std::move(data))
272 {
273 }
274
275 void setRemote(const ComboAddress& sa)
276 {
277 d_remote=sa;
278 }
279
280 void setSource(const ComboAddress& sa)
281 {
282 d_source=sa;
283 }
284
285 void setLocal(const ComboAddress& sa)
286 {
287 d_local=sa;
288 }
289
290 void setDestination(const ComboAddress& sa)
291 {
292 d_destination=sa;
293 }
294
295 void setSocket(int sock)
296 {
297 d_socket=sock;
298 }
299
300 string getRemote() const
301 {
302 if (d_source == d_remote) {
303 return d_source.toStringWithPort();
304 }
305 return d_source.toStringWithPort() + " (proxied by " + d_remote.toStringWithPort() + ")";
306 }
307
308 MOADNSParser d_mdp;
309 struct timeval d_now;
310 /* Remote client, might differ from d_source
311 in case of XPF, in which case d_source holds
312 the IP of the client and d_remote of the proxy
313 */
314 ComboAddress d_remote;
315 ComboAddress d_source;
316 /* Destination address, might differ from
317 d_destination in case of XPF, in which case
318 d_destination holds the IP of the proxy and
319 d_local holds our own. */
320 ComboAddress d_local;
321 ComboAddress d_destination;
322 #ifdef HAVE_PROTOBUF
323 boost::uuids::uuid d_uuid;
324 string d_requestorId;
325 string d_deviceId;
326 string d_deviceName;
327 struct timeval d_kernelTimestamp{0,0};
328 #endif
329 std::string d_query;
330 std::vector<std::string> d_policyTags;
331 std::vector<DNSRecord> d_records;
332 LuaContext::LuaObject d_data;
333 EDNSSubnetOpts d_ednssubnet;
334 shared_ptr<TCPConnection> d_tcpConnection;
335 boost::optional<int> d_rcode{boost::none};
336 int d_socket{-1};
337 unsigned int d_tag{0};
338 uint32_t d_qhash{0};
339 uint32_t d_ttlCap{std::numeric_limits<uint32_t>::max()};
340 uint16_t d_ecsBegin{0};
341 uint16_t d_ecsEnd{0};
342 bool d_variable{false};
343 bool d_ecsFound{false};
344 bool d_ecsParsed{false};
345 bool d_followCNAMERecords{false};
346 bool d_logResponse{false};
347 bool d_tcp{false};
348 };
349
350 MT_t* getMT()
351 {
352 return MT ? MT.get() : nullptr;
353 }
354
355 ArgvMap &arg()
356 {
357 static ArgvMap theArg;
358 return theArg;
359 }
360
361 unsigned int getRecursorThreadId()
362 {
363 return t_id;
364 }
365
366 int getMTaskerTID()
367 {
368 return MT->getTid();
369 }
370
371 static bool isDistributorThread()
372 {
373 if (t_id == 0) {
374 return false;
375 }
376
377 return g_weDistributeQueries && s_threadInfos.at(t_id).isListener;
378 }
379
380 static bool isHandlerThread()
381 {
382 if (t_id == 0) {
383 return true;
384 }
385
386 return s_threadInfos.at(t_id).isHandler;
387 }
388
389 static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
390
391 // -1 is error, 0 is timeout, 1 is success
392 int asendtcp(const string& data, Socket* sock)
393 {
394 PacketID pident;
395 pident.sock=sock;
396 pident.outMSG=data;
397
398 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
399 string packet;
400
401 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
402
403 if(!ret || ret==-1) { // timeout
404 t_fdm->removeWriteFD(sock->getHandle());
405 }
406 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
407 return -1;
408 }
409 return ret;
410 }
411
412 static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
413
414 // -1 is error, 0 is timeout, 1 is success
415 int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
416 {
417 data.clear();
418 PacketID pident;
419 pident.sock=sock;
420 pident.inNeeded=len;
421 pident.inIncompleteOkay=incompleteOkay;
422 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
423
424 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
425 if(!ret || ret==-1) { // timeout
426 t_fdm->removeReadFD(sock->getHandle());
427 }
428 else if(data.empty()) {// error, EOF or other
429 return -1;
430 }
431
432 return ret;
433 }
434
435 static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
436 {
437 PacketID pident=*any_cast<PacketID>(&var);
438 char resp[512];
439 ComboAddress fromaddr;
440 socklen_t addrlen=sizeof(fromaddr);
441
442 ssize_t ret=recvfrom(fd, resp, sizeof(resp), 0, (sockaddr *)&fromaddr, &addrlen);
443 if (fromaddr != pident.remote) {
444 g_log<<Logger::Notice<<"Response received from the wrong remote host ("<<fromaddr.toStringWithPort()<<" instead of "<<pident.remote.toStringWithPort()<<"), discarding"<<endl;
445
446 }
447
448 t_fdm->removeReadFD(fd);
449 if(ret >= 0) {
450 string data(resp, (size_t) ret);
451 MT->sendEvent(pident, &data);
452 }
453 else {
454 string empty;
455 MT->sendEvent(pident, &empty);
456 // cerr<<"Had some kind of error: "<<ret<<", "<<stringerror()<<endl;
457 }
458 }
459 string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
460 {
461 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
462 s.setNonBlocking();
463 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
464
465 s.bind(local);
466 s.connect(dest);
467 s.send(query);
468
469 PacketID pident;
470 pident.sock=&s;
471 pident.remote=dest;
472 pident.type=0;
473 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
474
475 string data;
476
477 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
478
479 if(!ret || ret==-1) { // timeout
480 t_fdm->removeReadFD(s.getHandle());
481 }
482 else if(data.empty()) {// error, EOF or other
483 // we could special case this
484 return data;
485 }
486 return data;
487 }
488
489 //! pick a random query local address
490 ComboAddress getQueryLocalAddress(int family, uint16_t port)
491 {
492 ComboAddress ret;
493 if(family==AF_INET) {
494 if(g_localQueryAddresses4.empty())
495 ret = g_local4;
496 else
497 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
498 ret.sin4.sin_port = htons(port);
499 }
500 else {
501 if(g_localQueryAddresses6.empty())
502 ret = g_local6;
503 else
504 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
505
506 ret.sin6.sin6_port = htons(port);
507 }
508 return ret;
509 }
510
511 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
512
513 static void setSocketBuffer(int fd, int optname, uint32_t size)
514 {
515 uint32_t psize=0;
516 socklen_t len=sizeof(psize);
517
518 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
519 g_log<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
520 return;
521 }
522
523 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0) {
524 int err = errno;
525 g_log << Logger::Error << "Unable to raise socket buffer size to " << size << ": " << stringerror(err) << endl;
526 }
527 }
528
529
530 static void setSocketReceiveBuffer(int fd, uint32_t size)
531 {
532 setSocketBuffer(fd, SO_RCVBUF, size);
533 }
534
535 static void setSocketSendBuffer(int fd, uint32_t size)
536 {
537 setSocketBuffer(fd, SO_SNDBUF, size);
538 }
539
540
541 // you can ask this class for a UDP socket to send a query from
542 // this socket is not yours, don't even think about deleting it
543 // but after you call 'returnSocket' on it, don't assume anything anymore
544 class UDPClientSocks
545 {
546 unsigned int d_numsocks;
547 public:
548 UDPClientSocks() : d_numsocks(0)
549 {
550 }
551
552 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
553 int getSocket(const ComboAddress& toaddr, int* fd)
554 {
555 *fd=makeClientSocket(toaddr.sin4.sin_family);
556 if(*fd < 0) // temporary error - receive exception otherwise
557 return -2;
558
559 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
560 int err = errno;
561 try {
562 closesocket(*fd);
563 }
564 catch(const PDNSException& e) {
565 g_log<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
566 }
567
568 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
569 return -2;
570 return -1;
571 }
572
573 d_numsocks++;
574 return 0;
575 }
576
577 // return a socket to the pool, or simply erase it
578 void returnSocket(int fd)
579 {
580 try {
581 t_fdm->removeReadFD(fd);
582 }
583 catch(const FDMultiplexerException& e) {
584 // we sometimes return a socket that has not yet been assigned to t_fdm
585 }
586
587 try {
588 closesocket(fd);
589 }
590 catch(const PDNSException& e) {
591 g_log<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
592 }
593
594 --d_numsocks;
595 }
596
597 private:
598
599 // returns -1 for errors which might go away, throws for ones that won't
600 static int makeClientSocket(int family)
601 {
602 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
603
604 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
605 return ret;
606
607 if(ret<0)
608 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
609
610 // setCloseOnExec(ret); // we're not going to exec
611
612 int tries=10;
613 ComboAddress sin;
614 while(--tries) {
615 uint16_t port;
616
617 if(tries==1) // fall back to kernel 'random'
618 port = 0;
619 else {
620 do {
621 port = s_minUdpSourcePort + dns_random(s_maxUdpSourcePort - s_minUdpSourcePort + 1);
622 }
623 while (s_avoidUdpSourcePorts.count(port));
624 }
625
626 sin=getQueryLocalAddress(family, port); // does htons for us
627
628 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
629 break;
630 }
631
632 if(!tries) {
633 closesocket(ret);
634 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
635 }
636
637 try {
638 setReceiveSocketErrors(ret, family);
639 setNonBlocking(ret);
640 }
641 catch(...) {
642 closesocket(ret);
643 throw;
644 }
645
646 return ret;
647 }
648 };
649
650 static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
651
652 /* these two functions are used by LWRes */
653 // -2 is OS error, -1 is error that depends on the remote, > 0 is success
654 int asendto(const char *data, size_t len, int flags,
655 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
656 {
657
658 PacketID pident;
659 pident.domain = domain;
660 pident.remote = toaddr;
661 pident.type = qtype;
662
663 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
664 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
665
666 for(; chain.first != chain.second; chain.first++) {
667 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
668 /*
669 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
670 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
671 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
672 */
673 chain.first->key.chain.insert(id); // we can chain
674 *fd=-1; // gets used in waitEvent / sendEvent later on
675 return 1;
676 }
677 }
678
679 int ret=t_udpclientsocks->getSocket(toaddr, fd);
680 if(ret < 0)
681 return ret;
682
683 pident.fd=*fd;
684 pident.id=id;
685
686 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
687 ret = send(*fd, data, len, 0);
688
689 int tmp = errno;
690
691 if(ret < 0)
692 t_udpclientsocks->returnSocket(*fd);
693
694 errno = tmp; // this is for logging purposes only
695 return ret;
696 }
697
698 // -1 is error, 0 is timeout, 1 is success
699 int arecvfrom(std::string& packet, int flags, const ComboAddress& fromaddr, size_t *d_len,
700 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
701 {
702 static optional<unsigned int> nearMissLimit;
703 if(!nearMissLimit)
704 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
705
706 PacketID pident;
707 pident.fd=fd;
708 pident.id=id;
709 pident.domain=domain;
710 pident.type = qtype;
711 pident.remote=fromaddr;
712
713 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
714
715 /* -1 means error, 0 means timeout, 1 means a result from handleUDPServerResponse() which might still be an error */
716 if(ret > 0) {
717 /* handleUDPServerResponse() will close the socket for us no matter what */
718 if(packet.empty()) // means "error"
719 return -1;
720
721 *d_len=packet.size();
722
723 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
724 g_log<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
725 g_stats.spoofCount++;
726 return -1;
727 }
728 }
729 else {
730 /* getting there means error or timeout, it's up to us to close the socket */
731 if(fd >= 0)
732 t_udpclientsocks->returnSocket(fd);
733 }
734 return ret;
735 }
736
737 static void writePid(void)
738 {
739 if(!::arg().mustDo("write-pid"))
740 return;
741 ofstream of(s_pidfname.c_str(), std::ios_base::app);
742 if(of)
743 of<< Utility::getpid() <<endl;
744 else {
745 int err = errno;
746 g_log << Logger::Error << "Writing pid for " << Utility::getpid() << " to " << s_pidfname << " failed: "
747 << stringerror(err) << endl;
748 }
749 }
750
751 uint16_t TCPConnection::s_maxInFlight;
752
753 TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : data(2, 0), d_remote(addr), d_fd(fd)
754 {
755 ++s_currentConnections;
756 (*t_tcpClientCounts)[d_remote]++;
757 }
758
759 TCPConnection::~TCPConnection()
760 {
761 try {
762 if(closesocket(d_fd) < 0)
763 g_log<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
764 }
765 catch(const PDNSException& e) {
766 g_log<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
767 }
768
769 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
770 t_tcpClientCounts->erase(d_remote);
771 --s_currentConnections;
772 }
773
774 AtomicCounter TCPConnection::s_currentConnections;
775
776 static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
777
778 // the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
779 static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
780 {
781 if(packetsize > 1000 && t_largeanswerremotes)
782 t_largeanswerremotes->push_back(remote);
783 switch(res) {
784 case RCode::ServFail:
785 if(t_servfailremotes) {
786 t_servfailremotes->push_back(remote);
787 if(query && t_servfailqueryring) // packet cache
788 t_servfailqueryring->push_back(make_pair(*query, qtype));
789 }
790 g_stats.servFails++;
791 break;
792 case RCode::NXDomain:
793 g_stats.nxDomains++;
794 break;
795 case RCode::NoError:
796 g_stats.noErrors++;
797 break;
798 }
799 }
800
801 static string makeLoginfo(const std::unique_ptr<DNSComboWriter>& dc)
802 try
803 {
804 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->getRemote())+")";
805 }
806 catch(...)
807 {
808 return "Exception making error message for exception";
809 }
810
811 #ifdef HAVE_PROTOBUF
812 static void protobufLogQuery(uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId, const std::string& deviceName)
813 {
814 if (!t_protobufServers) {
815 return;
816 }
817
818 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
819 ComboAddress requestor = requestorNM.getMaskedNetwork();
820 requestor.setPort(remote.getPort());
821 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
822 message.setServerIdentity(SyncRes::s_serverID);
823 message.setEDNSSubnet(ednssubnet, ednssubnet.isIPv4() ? maskV4 : maskV6);
824 message.setRequestorId(requestorId);
825 message.setDeviceId(deviceId);
826 message.setDeviceName(deviceName);
827
828 if (!policyTags.empty()) {
829 message.setPolicyTags(policyTags);
830 }
831
832 // cerr <<message.toDebugString()<<endl;
833 std::string str;
834 message.serialize(str);
835
836 for (auto& server : *t_protobufServers) {
837 server->queueData(str);
838 }
839 }
840
841 static void protobufLogResponse(const RecProtoBufMessage& message)
842 {
843 if (!t_protobufServers) {
844 return;
845 }
846
847 // cerr <<message.toDebugString()<<endl;
848 std::string str;
849 message.serialize(str);
850
851 for (auto& server : *t_protobufServers) {
852 server->queueData(str);
853 }
854 }
855 #endif
856
857 /**
858 * Chases the CNAME provided by the PolicyCustom RPZ policy.
859 *
860 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
861 * @param qtype: The QType of the original query
862 * @param sr: A SyncRes
863 * @param res: An integer that will contain the RCODE of the lookup we do
864 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
865 */
866 static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
867 {
868 if (spoofed.d_type == QType::CNAME) {
869 bool oldWantsRPZ = sr.getWantsRPZ();
870 sr.setWantsRPZ(false);
871 vector<DNSRecord> ans;
872 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, QClass::IN, ans);
873 for (const auto& rec : ans) {
874 if(rec.d_place == DNSResourceRecord::ANSWER) {
875 ret.push_back(rec);
876 }
877 }
878 // Reset the RPZ state of the SyncRes
879 sr.setWantsRPZ(oldWantsRPZ);
880 }
881 }
882
883 static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, uint32_t ttlCap, const uint16_t maxAnswerSize)
884 {
885 pw.startRecord(rec.d_name, rec.d_type, (rec.d_ttl > ttlCap ? ttlCap : rec.d_ttl), rec.d_class, rec.d_place);
886
887 if(rec.d_type != QType::OPT) // their TTL ain't real
888 minTTL = min(minTTL, rec.d_ttl);
889
890 rec.d_content->toPacket(pw);
891 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
892 pw.rollback();
893 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
894 pw.getHeader()->tc=1;
895 pw.truncate();
896 }
897 return false;
898 }
899
900 return true;
901 }
902
903 #ifdef HAVE_PROTOBUF
904 static std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> startProtobufServers(const ProtobufExportConfig& config)
905 {
906 auto result = std::make_shared<std::vector<std::unique_ptr<RemoteLogger>>>();
907
908 for (const auto& server : config.servers) {
909 try {
910 auto logger = make_unique<RemoteLogger>(server, config.timeout, 100*config.maxQueuedEntries, config.reconnectWaitTime, config.asyncConnect);
911 logger->setLogQueries(config.logQueries);
912 logger->setLogResponses(config.logResponses);
913 result->emplace_back(std::move(logger));
914 }
915 catch(const std::exception& e) {
916 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.what()<<endl;
917 }
918 catch(const PDNSException& e) {
919 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.reason<<endl;
920 }
921 }
922
923 return result;
924 }
925
926 static bool checkProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
927 {
928 if (!luaconfsLocal->protobufExportConfig.enabled) {
929 if (t_protobufServers) {
930 for (auto& server : *t_protobufServers) {
931 server->stop();
932 }
933 t_protobufServers.reset();
934 }
935
936 return false;
937 }
938
939 /* if the server was not running, or if it was running according to a
940 previous configuration */
941 if (!t_protobufServers ||
942 t_protobufServersGeneration < luaconfsLocal->generation) {
943
944 if (t_protobufServers) {
945 for (auto& server : *t_protobufServers) {
946 server->stop();
947 }
948 }
949 t_protobufServers.reset();
950
951 t_protobufServers = startProtobufServers(luaconfsLocal->protobufExportConfig);
952 t_protobufServersGeneration = luaconfsLocal->generation;
953 }
954
955 return true;
956 }
957
958 static bool checkOutgoingProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
959 {
960 if (!luaconfsLocal->outgoingProtobufExportConfig.enabled) {
961 if (t_outgoingProtobufServers) {
962 for (auto& server : *t_outgoingProtobufServers) {
963 server->stop();
964 }
965 }
966 t_outgoingProtobufServers.reset();
967
968 return false;
969 }
970
971 /* if the server was not running, or if it was running according to a
972 previous configuration */
973 if (!t_outgoingProtobufServers ||
974 t_outgoingProtobufServersGeneration < luaconfsLocal->generation) {
975
976 if (t_outgoingProtobufServers) {
977 for (auto& server : *t_outgoingProtobufServers) {
978 server->stop();
979 }
980 }
981 t_outgoingProtobufServers.reset();
982
983 t_outgoingProtobufServers = startProtobufServers(luaconfsLocal->outgoingProtobufExportConfig);
984 t_outgoingProtobufServersGeneration = luaconfsLocal->generation;
985 }
986
987 return true;
988 }
989
990 #ifdef HAVE_FSTRM
991
992 static std::shared_ptr<std::vector<std::unique_ptr<FrameStreamLogger>>> startFrameStreamServers(const FrameStreamExportConfig& config)
993 {
994 auto result = std::make_shared<std::vector<std::unique_ptr<FrameStreamLogger>>>();
995
996 for (const auto& server : config.servers) {
997 try {
998 std::unordered_map<string,unsigned> options;
999 options["bufferHint"] = config.bufferHint;
1000 options["flushTimeout"] = config.flushTimeout;
1001 options["inputQueueSize"] = config.inputQueueSize;
1002 options["outputQueueSize"] = config.outputQueueSize;
1003 options["queueNotifyThreshold"] = config.queueNotifyThreshold;
1004 options["reopenInterval"] = config.reopenInterval;
1005 FrameStreamLogger *fsl = nullptr;
1006 try {
1007 ComboAddress address(server);
1008 fsl = new FrameStreamLogger(address.sin4.sin_family, address.toStringWithPort(), true, options);
1009 }
1010 catch (const PDNSException& e) {
1011 fsl = new FrameStreamLogger(AF_UNIX, server, true, options);
1012 }
1013 fsl->setLogQueries(config.logQueries);
1014 fsl->setLogResponses(config.logResponses);
1015 result->emplace_back(fsl);
1016 }
1017 catch(const std::exception& e) {
1018 g_log<<Logger::Error<<"Error while starting dnstap framestream logger to '"<<server<<": "<<e.what()<<endl;
1019 }
1020 catch(const PDNSException& e) {
1021 g_log<<Logger::Error<<"Error while starting dnstap framestream logger to '"<<server<<": "<<e.reason<<endl;
1022 }
1023 }
1024
1025 return result;
1026 }
1027
1028 static bool checkFrameStreamExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
1029 {
1030 if (!luaconfsLocal->frameStreamExportConfig.enabled) {
1031 if (t_frameStreamServers) {
1032 // dt's take care of cleanup
1033 t_frameStreamServers.reset();
1034 }
1035
1036 return false;
1037 }
1038
1039 /* if the server was not running, or if it was running according to a
1040 previous configuration */
1041 if (!t_frameStreamServers ||
1042 t_frameStreamServersGeneration < luaconfsLocal->generation) {
1043
1044 if (t_frameStreamServers) {
1045 // dt's take care of cleanup
1046 t_frameStreamServers.reset();
1047 }
1048
1049 t_frameStreamServers = startFrameStreamServers(luaconfsLocal->frameStreamExportConfig);
1050 t_frameStreamServersGeneration = luaconfsLocal->generation;
1051 }
1052
1053 return true;
1054 }
1055 #endif /* HAVE_FSTRM */
1056 #endif /* HAVE_PROTOBUF */
1057
1058 #ifdef NOD_ENABLED
1059 static bool nodCheckNewDomain(const DNSName& dname)
1060 {
1061 static const QType qt(QType::A);
1062 static const uint16_t qc(QClass::IN);
1063 bool ret = false;
1064 // First check the (sub)domain isn't whitelisted for NOD purposes
1065 if (!g_nodDomainWL.check(dname)) {
1066 // Now check the NODDB (note this is probablistic so can have FNs/FPs)
1067 if (t_nodDBp && t_nodDBp->isNewDomain(dname)) {
1068 if (g_nodLog) {
1069 // This should probably log to a dedicated log file
1070 g_log<<Logger::Notice<<"Newly observed domain nod="<<dname.toLogString()<<endl;
1071 }
1072 if (!(g_nodLookupDomain.isRoot())) {
1073 // Send a DNS A query to <domain>.g_nodLookupDomain
1074 DNSName qname = dname;
1075 vector<DNSRecord> dummy;
1076 qname += g_nodLookupDomain;
1077 directResolve(qname, qt, qc, dummy);
1078 }
1079 ret = true;
1080 }
1081 }
1082 return ret;
1083 }
1084
1085 static bool udrCheckUniqueDNSRecord(const DNSName& dname, uint16_t qtype, const DNSRecord& record)
1086 {
1087 bool ret = false;
1088 if (record.d_place == DNSResourceRecord::ANSWER ||
1089 record.d_place == DNSResourceRecord::ADDITIONAL) {
1090 // Create a string that represent a triplet of (qname, qtype and RR[type, name, content])
1091 std::stringstream ss;
1092 ss << dname.toDNSStringLC() << ":" << qtype << ":" << qtype << ":" << record.d_type << ":" << record.d_name.toDNSStringLC() << ":" << record.d_content->getZoneRepresentation();
1093 if (t_udrDBp && t_udrDBp->isUniqueResponse(ss.str())) {
1094 if (g_udrLog) {
1095 // This should also probably log to a dedicated file.
1096 g_log<<Logger::Notice<<"Unique response observed: qname="<<dname.toLogString()<<" qtype="<<QType(qtype).getName()<< " rrtype=" << QType(record.d_type).getName() << " rrname=" << record.d_name.toLogString() << " rrcontent=" << record.d_content->getZoneRepresentation() << endl;
1097 }
1098 ret = true;
1099 }
1100 }
1101 return ret;
1102 }
1103 #endif /* NOD_ENABLED */
1104
1105 int followCNAMERecords(vector<DNSRecord>& ret, const QType& qtype)
1106 {
1107 vector<DNSRecord> resolved;
1108 DNSName target;
1109 for(const DNSRecord& rr : ret) {
1110 if(rr.d_type == QType::CNAME) {
1111 auto rec = getRR<CNAMERecordContent>(rr);
1112 if(rec) {
1113 target=rec->getTarget();
1114 break;
1115 }
1116 }
1117 }
1118
1119 if(target.empty()) {
1120 return 0;
1121 }
1122
1123 int rcode = directResolve(target, qtype, QClass::IN, resolved);
1124
1125 for(DNSRecord& rr : resolved) {
1126 ret.push_back(std::move(rr));
1127 }
1128 return rcode;
1129 }
1130
1131 static void startDoResolve(void *p)
1132 {
1133 auto dc=std::unique_ptr<DNSComboWriter>(reinterpret_cast<DNSComboWriter*>(p));
1134 try {
1135 if (t_queryring)
1136 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
1137
1138 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
1139 EDNSOpts edo;
1140 std::vector<pair<uint16_t, string> > ednsOpts;
1141 bool variableAnswer = dc->d_variable;
1142 bool haveEDNS=false;
1143 #ifdef NOD_ENABLED
1144 bool hasUDR = false;
1145 #endif /* NOD_ENABLED */
1146 DNSPacketWriter::optvect_t returnedEdnsOptions; // Here we stuff all the options for the return packet
1147 uint8_t ednsExtRCode = 0;
1148 if(getEDNSOpts(dc->d_mdp, &edo)) {
1149 haveEDNS=true;
1150 if (edo.d_version != 0) {
1151 ednsExtRCode = ERCode::BADVERS;
1152 }
1153
1154 if(!dc->d_tcp) {
1155 /* rfc6891 6.2.3:
1156 "Values lower than 512 MUST be treated as equal to 512."
1157 */
1158 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
1159 }
1160 ednsOpts = edo.d_options;
1161 maxanswersize -= 11; // EDNS header size
1162
1163 for (const auto& o : edo.d_options) {
1164 if (o.first == EDNSOptionCode::ECS && g_useIncomingECS && !dc->d_ecsParsed) {
1165 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
1166 } else if (o.first == EDNSOptionCode::NSID) {
1167 const static string mode_server_id = ::arg()["server-id"];
1168 if(mode_server_id != "disabled" && !mode_server_id.empty() &&
1169 maxanswersize > (2 + 2 + mode_server_id.size())) {
1170 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::NSID, mode_server_id));
1171 variableAnswer = true; // Can't packetcache an answer with NSID
1172 // Option Code and Option Length are both 2
1173 maxanswersize -= 2 + 2 + mode_server_id.size();
1174 }
1175 }
1176 }
1177 }
1178 /* perhaps there was no EDNS or no ECS but by now we looked */
1179 dc->d_ecsParsed = true;
1180 vector<DNSRecord> ret;
1181 vector<uint8_t> packet;
1182
1183 auto luaconfsLocal = g_luaconfs.getLocal();
1184 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
1185 bool wantsRPZ(true);
1186 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
1187 #ifdef HAVE_PROTOBUF
1188 if (checkProtobufExport(luaconfsLocal)) {
1189 Netmask requestorNM(dc->d_source, dc->d_source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1190 ComboAddress requestor = requestorNM.getMaskedNetwork();
1191 requestor.setPort(dc->d_source.getPort());
1192 pbMessage = RecProtoBufMessage(RecProtoBufMessage::Response, dc->d_uuid, &requestor, &dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass, dc->d_mdp.d_header.id, dc->d_tcp, 0);
1193 pbMessage->setServerIdentity(SyncRes::s_serverID);
1194 pbMessage->setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIPv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1195 }
1196 #endif /* HAVE_PROTOBUF */
1197
1198 #ifdef HAVE_FSTRM
1199 checkFrameStreamExport(luaconfsLocal);
1200 #endif
1201
1202 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
1203
1204 pw.getHeader()->aa=0;
1205 pw.getHeader()->ra=1;
1206 pw.getHeader()->qr=1;
1207 pw.getHeader()->tc=0;
1208 pw.getHeader()->id=dc->d_mdp.d_header.id;
1209 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
1210 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
1211
1212 /* This is the lowest TTL seen in the records of the response,
1213 so we can't cache it for longer than this value.
1214 If we have a TTL cap, this value can't be larger than the
1215 cap no matter what. */
1216 uint32_t minTTL = dc->d_ttlCap;
1217
1218 SyncRes sr(dc->d_now);
1219 sr.setId(MT->getTid());
1220
1221 bool DNSSECOK=false;
1222 if(t_pdl) {
1223 sr.setLuaEngine(t_pdl);
1224 }
1225 if(g_dnssecmode != DNSSECMode::Off) {
1226 sr.setDoDNSSEC(true);
1227
1228 // Does the requestor want DNSSEC records?
1229 if(edo.d_extFlags & EDNSOpts::DNSSECOK) {
1230 DNSSECOK=true;
1231 g_stats.dnssecQueries++;
1232 }
1233 if (dc->d_mdp.d_header.cd) {
1234 /* Per rfc6840 section 5.9, "When processing a request with
1235 the Checking Disabled (CD) bit set, a resolver SHOULD attempt
1236 to return all response data, even data that has failed DNSSEC
1237 validation. */
1238 ++g_stats.dnssecCheckDisabledQueries;
1239 }
1240 if (dc->d_mdp.d_header.ad) {
1241 /* Per rfc6840 section 5.7, "the AD bit in a query as a signal
1242 indicating that the requester understands and is interested in the
1243 value of the AD bit in the response. This allows a requester to
1244 indicate that it understands the AD bit without also requesting
1245 DNSSEC data via the DO bit. */
1246 ++g_stats.dnssecAuthenticDataQueries;
1247 }
1248 } else {
1249 // Ignore the client-set CD flag
1250 pw.getHeader()->cd=0;
1251 }
1252 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
1253
1254 #ifdef HAVE_PROTOBUF
1255 sr.setInitialRequestId(dc->d_uuid);
1256 sr.setOutgoingProtobufServers(t_outgoingProtobufServers);
1257 #endif
1258 #ifdef HAVE_FSTRM
1259 sr.setFrameStreamServers(t_frameStreamServers);
1260 #endif
1261 sr.setQuerySource(dc->d_remote, g_useIncomingECS && !dc->d_ednssubnet.source.empty() ? boost::optional<const EDNSSubnetOpts&>(dc->d_ednssubnet) : boost::none);
1262
1263 bool tracedQuery=false; // we could consider letting Lua know about this too
1264 bool shouldNotValidate = false;
1265
1266 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
1267 int res = RCode::NoError;
1268
1269 DNSFilterEngine::Policy appliedPolicy;
1270 std::vector<DNSRecord> spoofed;
1271 RecursorLua4::DNSQuestion dq(dc->d_source, dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ, dc->d_logResponse);
1272 dq.ednsFlags = &edo.d_extFlags;
1273 dq.ednsOptions = &ednsOpts;
1274 dq.tag = dc->d_tag;
1275 dq.discardedPolicies = &sr.d_discardedPolicies;
1276 dq.policyTags = &dc->d_policyTags;
1277 dq.appliedPolicy = &appliedPolicy;
1278 dq.currentRecords = &ret;
1279 dq.dh = &dc->d_mdp.d_header;
1280 dq.data = dc->d_data;
1281 #ifdef HAVE_PROTOBUF
1282 dq.requestorId = dc->d_requestorId;
1283 dq.deviceId = dc->d_deviceId;
1284 dq.deviceName = dc->d_deviceName;
1285 #endif
1286
1287 if(ednsExtRCode != 0) {
1288 goto sendit;
1289 }
1290
1291 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
1292 pw.getHeader()->tc = 1;
1293 res = 0;
1294 variableAnswer = true;
1295 goto sendit;
1296 }
1297
1298 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
1299 sr.setLogMode(SyncRes::Store);
1300 tracedQuery=true;
1301 }
1302
1303 if(!g_quiet || tracedQuery) {
1304 g_log<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
1305 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
1306 if(!dc->d_ednssubnet.source.empty()) {
1307 g_log<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
1308 }
1309 g_log<<endl;
1310 }
1311
1312 if(!dc->d_mdp.d_header.rd) {
1313 sr.setCacheOnly();
1314 }
1315
1316 if (dc->d_rcode != boost::none) {
1317 /* we have a response ready to go, most likely from gettag_ffi */
1318 ret = std::move(dc->d_records);
1319 res = *dc->d_rcode;
1320 if (res == RCode::NoError && dc->d_followCNAMERecords) {
1321 res = followCNAMERecords(ret, QType(dc->d_mdp.d_qtype));
1322 }
1323 goto haveAnswer;
1324 }
1325
1326 if (t_pdl) {
1327 t_pdl->prerpz(dq, res);
1328 }
1329
1330 // Check if the query has a policy attached to it
1331 if (wantsRPZ && (appliedPolicy.d_type == DNSFilterEngine::PolicyType::None || appliedPolicy.d_kind == DNSFilterEngine::PolicyKind::NoAction)) {
1332 luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_source, sr.d_discardedPolicies, appliedPolicy);
1333 }
1334
1335 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
1336 if(!t_pdl || !t_pdl->preresolve(dq, res)) {
1337
1338 sr.setWantsRPZ(wantsRPZ);
1339 if(wantsRPZ) {
1340 switch(appliedPolicy.d_kind) {
1341 case DNSFilterEngine::PolicyKind::NoAction:
1342 break;
1343 case DNSFilterEngine::PolicyKind::Drop:
1344 g_stats.policyDrops++;
1345 g_stats.policyResults[appliedPolicy.d_kind]++;
1346 return;
1347 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1348 g_stats.policyResults[appliedPolicy.d_kind]++;
1349 res=RCode::NXDomain;
1350 goto haveAnswer;
1351 case DNSFilterEngine::PolicyKind::NODATA:
1352 g_stats.policyResults[appliedPolicy.d_kind]++;
1353 res=RCode::NoError;
1354 goto haveAnswer;
1355 case DNSFilterEngine::PolicyKind::Custom:
1356 g_stats.policyResults[appliedPolicy.d_kind]++;
1357 res=RCode::NoError;
1358 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1359 for (const auto& dr : spoofed) {
1360 ret.push_back(dr);
1361 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1362 }
1363 goto haveAnswer;
1364 case DNSFilterEngine::PolicyKind::Truncate:
1365 if(!dc->d_tcp) {
1366 g_stats.policyResults[appliedPolicy.d_kind]++;
1367 res=RCode::NoError;
1368 pw.getHeader()->tc=1;
1369 goto haveAnswer;
1370 }
1371 break;
1372 }
1373 }
1374
1375 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
1376 try {
1377 sr.d_appliedPolicy = appliedPolicy;
1378 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
1379 shouldNotValidate = sr.wasOutOfBand();
1380 }
1381 catch(const ImmediateServFailException &e) {
1382 if(g_logCommonErrors) {
1383 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
1384 }
1385 res = RCode::ServFail;
1386 }
1387 catch(const PolicyHitException& e) {
1388 res = -2;
1389 }
1390 dq.validationState = sr.getValidationState();
1391 appliedPolicy = sr.d_appliedPolicy;
1392
1393 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
1394 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
1395 appliedPolicy = sr.d_appliedPolicy;
1396 g_stats.policyResults[appliedPolicy.d_kind]++;
1397 switch(appliedPolicy.d_kind) {
1398 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
1399 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
1400 case DNSFilterEngine::PolicyKind::Drop:
1401 g_stats.policyDrops++;
1402 return;
1403 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1404 ret.clear();
1405 res=RCode::NXDomain;
1406 goto haveAnswer;
1407
1408 case DNSFilterEngine::PolicyKind::NODATA:
1409 ret.clear();
1410 res=RCode::NoError;
1411 goto haveAnswer;
1412
1413 case DNSFilterEngine::PolicyKind::Truncate:
1414 if(!dc->d_tcp) {
1415 ret.clear();
1416 res=RCode::NoError;
1417 pw.getHeader()->tc=1;
1418 goto haveAnswer;
1419 }
1420 break;
1421
1422 case DNSFilterEngine::PolicyKind::Custom:
1423 ret.clear();
1424 res=RCode::NoError;
1425 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1426 for (const auto& dr : spoofed) {
1427 ret.push_back(dr);
1428 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1429 }
1430 goto haveAnswer;
1431 }
1432 }
1433
1434 if (wantsRPZ && (appliedPolicy.d_type == DNSFilterEngine::PolicyType::None || appliedPolicy.d_kind == DNSFilterEngine::PolicyKind::NoAction)) {
1435 luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies, appliedPolicy);
1436 }
1437
1438 if(t_pdl) {
1439 if(res == RCode::NoError) {
1440 auto i=ret.cbegin();
1441 for(; i!= ret.cend(); ++i)
1442 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
1443 break;
1444 if(i == ret.cend() && t_pdl->nodata(dq, res))
1445 shouldNotValidate = true;
1446
1447 }
1448 else if(res == RCode::NXDomain && t_pdl->nxdomain(dq, res))
1449 shouldNotValidate = true;
1450
1451 if(t_pdl->postresolve(dq, res))
1452 shouldNotValidate = true;
1453 }
1454
1455 if (wantsRPZ) { //XXX This block is repeated, see above
1456 g_stats.policyResults[appliedPolicy.d_kind]++;
1457 switch(appliedPolicy.d_kind) {
1458 case DNSFilterEngine::PolicyKind::NoAction:
1459 break;
1460 case DNSFilterEngine::PolicyKind::Drop:
1461 g_stats.policyDrops++;
1462 return;
1463 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1464 ret.clear();
1465 res=RCode::NXDomain;
1466 goto haveAnswer;
1467
1468 case DNSFilterEngine::PolicyKind::NODATA:
1469 ret.clear();
1470 res=RCode::NoError;
1471 goto haveAnswer;
1472
1473 case DNSFilterEngine::PolicyKind::Truncate:
1474 if(!dc->d_tcp) {
1475 ret.clear();
1476 res=RCode::NoError;
1477 pw.getHeader()->tc=1;
1478 goto haveAnswer;
1479 }
1480 break;
1481
1482 case DNSFilterEngine::PolicyKind::Custom:
1483 ret.clear();
1484 res=RCode::NoError;
1485 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1486 for (const auto& dr : spoofed) {
1487 ret.push_back(dr);
1488 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1489 }
1490 goto haveAnswer;
1491 }
1492 }
1493 }
1494 haveAnswer:;
1495 if(res == PolicyDecision::DROP) {
1496 g_stats.policyDrops++;
1497 return;
1498 }
1499 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1500 {
1501 string trace(sr.getTrace());
1502 if(!trace.empty()) {
1503 vector<string> lines;
1504 boost::split(lines, trace, boost::is_any_of("\n"));
1505 for(const string& line : lines) {
1506 if(!line.empty())
1507 g_log<<Logger::Warning<< line << endl;
1508 }
1509 }
1510 }
1511
1512 if(res == -1) {
1513 pw.getHeader()->rcode=RCode::ServFail;
1514 // no commit here, because no record
1515 g_stats.servFails++;
1516 }
1517 else {
1518 pw.getHeader()->rcode=res;
1519
1520 // Does the validation mode or query demand validation?
1521 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1522 try {
1523 if(sr.doLog()) {
1524 g_log<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<endl;
1525 }
1526
1527 auto state = sr.getValidationState();
1528
1529 if(state == Secure) {
1530 if(sr.doLog()) {
1531 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates correctly"<<endl;
1532 }
1533
1534 // Is the query source interested in the value of the ad-bit?
1535 if (dc->d_mdp.d_header.ad || DNSSECOK)
1536 pw.getHeader()->ad=1;
1537 }
1538 else if(state == Insecure) {
1539 if(sr.doLog()) {
1540 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Insecure"<<endl;
1541 }
1542
1543 pw.getHeader()->ad=0;
1544 }
1545 else if(state == Bogus) {
1546 if(t_bogusremotes)
1547 t_bogusremotes->push_back(dc->d_source);
1548 if(t_bogusqueryring)
1549 t_bogusqueryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
1550 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
1551 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Bogus"<<endl;
1552 }
1553
1554 // Does the query or validation mode sending out a SERVFAIL on validation errors?
1555 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
1556 if(sr.doLog()) {
1557 g_log<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
1558 }
1559
1560 pw.getHeader()->rcode=RCode::ServFail;
1561 goto sendit;
1562 } else {
1563 if(sr.doLog()) {
1564 g_log<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
1565 }
1566 }
1567 }
1568 }
1569 catch(const ImmediateServFailException &e) {
1570 if(g_logCommonErrors)
1571 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
1572 pw.getHeader()->rcode=RCode::ServFail;
1573 goto sendit;
1574 }
1575 }
1576
1577 if(ret.size()) {
1578 orderAndShuffle(ret);
1579 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_source)) {
1580 stable_sort(ret.begin(), ret.end(), *sl);
1581 variableAnswer=true;
1582 }
1583 }
1584
1585 bool needCommit = false;
1586 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
1587 if( ! DNSSECOK &&
1588 ( i->d_type == QType::NSEC3 ||
1589 (
1590 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1591 (
1592 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1593 i->d_place != DNSResourceRecord::ANSWER
1594 )
1595 )
1596 )
1597 ) {
1598 continue;
1599 }
1600
1601 if (!addRecordToPacket(pw, *i, minTTL, dc->d_ttlCap, maxanswersize)) {
1602 needCommit = false;
1603 break;
1604 }
1605 needCommit = true;
1606
1607 #ifdef NOD_ENABLED
1608 bool udr = false;
1609 if (g_udrEnabled) {
1610 udr = udrCheckUniqueDNSRecord(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, *i);
1611 if (!hasUDR && udr)
1612 hasUDR = true;
1613 }
1614 #endif /* NOD ENABLED */
1615
1616 #ifdef HAVE_PROTOBUF
1617 if (t_protobufServers) {
1618 #ifdef NOD_ENABLED
1619 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes, udr);
1620 #else
1621 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes);
1622 #endif /* NOD_ENABLED */
1623 }
1624 #endif
1625 }
1626 if(needCommit)
1627 pw.commit();
1628 }
1629 sendit:;
1630
1631 if(g_useIncomingECS && dc->d_ecsFound && !sr.wasVariable() && !variableAnswer) {
1632 // cerr<<"Stuffing in a 0 scope because answer is static"<<endl;
1633 EDNSSubnetOpts eo;
1634 eo.source = dc->d_ednssubnet.source;
1635 ComboAddress sa;
1636 sa.reset();
1637 sa.sin4.sin_family = eo.source.getNetwork().sin4.sin_family;
1638 eo.scope = Netmask(sa, 0);
1639
1640 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::ECS, makeEDNSSubnetOptsString(eo)));
1641 }
1642
1643 if (haveEDNS) {
1644 /* we try to add the EDNS OPT RR even for truncated answers,
1645 as rfc6891 states:
1646 "The minimal response MUST be the DNS header, question section, and an
1647 OPT record. This MUST also occur when a truncated response (using
1648 the DNS header's TC bit) is returned."
1649 */
1650 pw.addOpt(512, ednsExtRCode, DNSSECOK ? EDNSOpts::DNSSECOK : 0, returnedEdnsOptions);
1651 pw.commit();
1652 }
1653
1654 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
1655 updateResponseStats(res, dc->d_source, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1656 #ifdef NOD_ENABLED
1657 bool nod = false;
1658 if (g_nodEnabled) {
1659 if (nodCheckNewDomain(dc->d_mdp.d_qname))
1660 nod = true;
1661 }
1662 #endif /* NOD_ENABLED */
1663 #ifdef HAVE_PROTOBUF
1664 if (t_protobufServers && !(luaconfsLocal->protobufExportConfig.taggedOnly && (!appliedPolicy.d_name || appliedPolicy.d_name->empty()) && dc->d_policyTags.empty())) {
1665 pbMessage->setBytes(packet.size());
1666 pbMessage->setResponseCode(pw.getHeader()->rcode);
1667 if (appliedPolicy.d_name) {
1668 pbMessage->setAppliedPolicy(*appliedPolicy.d_name);
1669 pbMessage->setAppliedPolicyType(appliedPolicy.d_type);
1670 }
1671 pbMessage->setPolicyTags(dc->d_policyTags);
1672 if (g_useKernelTimestamp && dc->d_kernelTimestamp.tv_sec) {
1673 pbMessage->setQueryTime(dc->d_kernelTimestamp.tv_sec, dc->d_kernelTimestamp.tv_usec);
1674 }
1675 else {
1676 pbMessage->setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1677 }
1678 pbMessage->setRequestorId(dq.requestorId);
1679 pbMessage->setDeviceId(dq.deviceId);
1680 pbMessage->setDeviceName(dq.deviceName);
1681 #ifdef NOD_ENABLED
1682 if (g_nodEnabled) {
1683 if (nod) {
1684 pbMessage->setNOD(true);
1685 pbMessage->addPolicyTag(g_nod_pbtag);
1686 }
1687 if (hasUDR) {
1688 pbMessage->addPolicyTag(g_udr_pbtag);
1689 }
1690 }
1691 #endif /* NOD_ENABLED */
1692 if (dc->d_logResponse) {
1693 protobufLogResponse(*pbMessage);
1694 }
1695 #ifdef NOD_ENABLED
1696 if (g_nodEnabled) {
1697 pbMessage->setNOD(false);
1698 pbMessage->clearUDR();
1699 if (nod)
1700 pbMessage->removePolicyTag(g_nod_pbtag);
1701 if (hasUDR)
1702 pbMessage->removePolicyTag(g_udr_pbtag);
1703 }
1704 #endif /* NOD_ENABLED */
1705 }
1706 #endif
1707 if(!dc->d_tcp) {
1708 struct msghdr msgh;
1709 struct iovec iov;
1710 cmsgbuf_aligned cbuf;
1711 fillMSGHdr(&msgh, &iov, &cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
1712 msgh.msg_control=NULL;
1713
1714 if(g_fromtosockets.count(dc->d_socket)) {
1715 addCMsgSrcAddr(&msgh, &cbuf, &dc->d_local, 0);
1716 }
1717 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors) {
1718 int err = errno;
1719 g_log << Logger::Warning << "Sending UDP reply to client " << dc->getRemote() << " failed with: "
1720 << strerror(err) << endl;
1721 }
1722
1723 if(variableAnswer || sr.wasVariable()) {
1724 g_stats.variableResponses++;
1725 }
1726 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
1727 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, std::move(dc->d_query), dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
1728 string((const char*)&*packet.begin(), packet.size()),
1729 g_now.tv_sec,
1730 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
1731 min(minTTL,SyncRes::s_packetcachettl),
1732 dq.validationState,
1733 dc->d_ecsBegin,
1734 dc->d_ecsEnd,
1735 std::move(pbMessage));
1736 }
1737 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
1738 }
1739 else {
1740 char buf[2];
1741 buf[0]=packet.size()/256;
1742 buf[1]=packet.size()%256;
1743
1744 Utility::iovec iov[2];
1745
1746 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1747 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
1748
1749 int wret=Utility::writev(dc->d_socket, iov, 2);
1750 bool hadError=true;
1751
1752 if(wret == 0)
1753 g_log<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
1754 else if(wret < 0 ) {
1755 int err = errno;
1756 g_log << Logger::Error << "Error writing TCP answer to " << dc->getRemote() << ": " << strerror(err) << endl;
1757 } else if((unsigned int)wret != 2 + packet.size())
1758 g_log<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
1759 else
1760 hadError=false;
1761
1762 // update tcp connection status, closing if needed and doing the fd multiplexer accounting
1763 if (dc->d_tcpConnection->d_requestsInFlight > 0) {
1764 dc->d_tcpConnection->d_requestsInFlight--;
1765 }
1766
1767 // In the code below, we try to remove the fd from the set, but
1768 // we don't know if another mthread already did the remove, so we can get a
1769 // "Tried to remove unlisted fd" exception. Not that an inflight < limit test
1770 // will not work since we do not know if the other mthread got an error or not.
1771 if(hadError) {
1772 try {
1773 t_fdm->removeReadFD(dc->d_socket);
1774 }
1775 catch (FDMultiplexerException &) {
1776 }
1777 dc->d_socket = -1;
1778 }
1779 else {
1780 dc->d_tcpConnection->queriesCount++;
1781 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1782 try {
1783 t_fdm->removeReadFD(dc->d_socket);
1784 }
1785 catch (FDMultiplexerException &) {
1786 }
1787 dc->d_socket = -1;
1788 }
1789 else {
1790 Utility::gettimeofday(&g_now, 0); // needs to be updated
1791 struct timeval ttd = g_now;
1792 // If we cross from max to max-1 in flight requests, the fd was not listened to, add it back
1793 if (dc->d_tcpConnection->d_requestsInFlight == TCPConnection::s_maxInFlight - 1) {
1794 // A read error might have happened. If we add the fd back, it will most likely error again.
1795 // This is not a big issue, the next handleTCPClientReadable() will see another read error
1796 // and take action.
1797 ttd.tv_sec += g_tcpTimeout;
1798 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection, &ttd);
1799 } else {
1800 // fd might have been removed by read error code, so expect an exception
1801 try {
1802 t_fdm->setReadTTD(dc->d_socket, ttd, g_tcpTimeout);
1803 }
1804 catch (FDMultiplexerException &) {
1805 }
1806 }
1807 }
1808 }
1809 }
1810 float spent=makeFloat(sr.getNow()-dc->d_now);
1811 if(!g_quiet) {
1812 g_log<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1813 g_log<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
1814 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
1815 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
1816
1817 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1818 g_log<< ", dnssec="<<vStates[sr.getValidationState()];
1819 }
1820
1821 g_log<<endl;
1822
1823 }
1824
1825 if (sr.d_outqueries || sr.d_authzonequeries) {
1826 s_RC->cacheMisses++;
1827 }
1828 else {
1829 s_RC->cacheHits++;
1830 }
1831
1832 if(spent < 0.001)
1833 g_stats.answers0_1++;
1834 else if(spent < 0.010)
1835 g_stats.answers1_10++;
1836 else if(spent < 0.1)
1837 g_stats.answers10_100++;
1838 else if(spent < 1.0)
1839 g_stats.answers100_1000++;
1840 else
1841 g_stats.answersSlow++;
1842
1843 uint64_t newLat=(uint64_t)(spent*1000000);
1844 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
1845 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
1846 // no worries, we do this for packet cache hits elsewhere
1847
1848 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
1849 if(ourtime < 1)
1850 g_stats.ourtime0_1++;
1851 else if(ourtime < 2)
1852 g_stats.ourtime1_2++;
1853 else if(ourtime < 4)
1854 g_stats.ourtime2_4++;
1855 else if(ourtime < 8)
1856 g_stats.ourtime4_8++;
1857 else if(ourtime < 16)
1858 g_stats.ourtime8_16++;
1859 else if(ourtime < 32)
1860 g_stats.ourtime16_32++;
1861 else {
1862 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1863 g_stats.ourtimeSlow++;
1864 }
1865 if(ourtime >= 0.0) {
1866 newLat=ourtime*1000; // usec
1867 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
1868 }
1869 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
1870 }
1871 catch(PDNSException &ae) {
1872 g_log<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
1873 }
1874 catch(const MOADNSException &mde) {
1875 g_log<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<mde.what()<<endl;
1876 }
1877 catch(std::exception& e) {
1878 g_log<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
1879
1880 // Luawrapper nests the exception from Lua, so we unnest it here
1881 try {
1882 std::rethrow_if_nested(e);
1883 } catch(const std::exception& ne) {
1884 g_log<<". Extra info: "<<ne.what();
1885 } catch(...) {}
1886
1887 g_log<<endl;
1888 }
1889 catch(...) {
1890 g_log<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
1891 }
1892
1893 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
1894 }
1895
1896 static void makeControlChannelSocket(int processNum=-1)
1897 {
1898 string sockname=::arg()["socket-dir"]+"/"+s_programname;
1899 if(processNum >= 0)
1900 sockname += "."+std::to_string(processNum);
1901 sockname+=".controlsocket";
1902 s_rcc.listen(sockname);
1903
1904 int sockowner = -1;
1905 int sockgroup = -1;
1906
1907 if (!::arg().isEmpty("socket-group"))
1908 sockgroup=::arg().asGid("socket-group");
1909 if (!::arg().isEmpty("socket-owner"))
1910 sockowner=::arg().asUid("socket-owner");
1911
1912 if (sockgroup > -1 || sockowner > -1) {
1913 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1914 unixDie("Failed to chown control socket");
1915 }
1916 }
1917
1918 // do mode change if socket-mode is given
1919 if(!::arg().isEmpty("socket-mode")) {
1920 mode_t sockmode=::arg().asMode("socket-mode");
1921 if(chmod(sockname.c_str(), sockmode) < 0) {
1922 unixDie("Failed to chmod control socket");
1923 }
1924 }
1925 }
1926
1927 static void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass,
1928 bool& foundECS, EDNSSubnetOpts* ednssubnet, EDNSOptionViewMap* options,
1929 bool& foundXPF, ComboAddress* xpfSource, ComboAddress* xpfDest)
1930 {
1931 const bool lookForXPF = xpfSource != nullptr && g_xpfRRCode != 0;
1932 const bool lookForECS = ednssubnet != nullptr;
1933 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(question.c_str());
1934 size_t questionLen = question.length();
1935 unsigned int consumed=0;
1936 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1937
1938 size_t pos= sizeof(dnsheader)+consumed+4;
1939 const size_t headerSize = /* root */ 1 + sizeof(dnsrecordheader);
1940 const uint16_t arcount = ntohs(dh->arcount);
1941
1942 for (uint16_t arpos = 0; arpos < arcount && questionLen > (pos + headerSize) && ((lookForECS && !foundECS) || (lookForXPF && !foundXPF)); arpos++) {
1943 if (question.at(pos) != 0) {
1944 /* not an OPT or a XPF, bye. */
1945 return;
1946 }
1947
1948 pos += 1;
1949 const dnsrecordheader* drh = reinterpret_cast<const dnsrecordheader*>(&question.at(pos));
1950 pos += sizeof(dnsrecordheader);
1951
1952 if (pos >= questionLen) {
1953 return;
1954 }
1955
1956 /* OPT root label (1) followed by type (2) */
1957 if(lookForECS && ntohs(drh->d_type) == QType::OPT) {
1958 if (!options) {
1959 char* ecsStart = nullptr;
1960 size_t ecsLen = 0;
1961 /* we need to pass the record len */
1962 int res = getEDNSOption(const_cast<char*>(reinterpret_cast<const char*>(&question.at(pos - sizeof(drh->d_clen)))), questionLen - pos + sizeof(drh->d_clen), EDNSOptionCode::ECS, &ecsStart, &ecsLen);
1963 if (res == 0 && ecsLen > 4) {
1964 EDNSSubnetOpts eso;
1965 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1966 *ednssubnet=eso;
1967 foundECS = true;
1968 }
1969 }
1970 }
1971 else {
1972 /* we need to pass the record len */
1973 int res = getEDNSOptions(reinterpret_cast<const char*>(&question.at(pos -sizeof(drh->d_clen))), questionLen - pos + (sizeof(drh->d_clen)), *options);
1974 if (res == 0) {
1975 const auto& it = options->find(EDNSOptionCode::ECS);
1976 if (it != options->end() && !it->second.values.empty() && it->second.values.at(0).content != nullptr && it->second.values.at(0).size > 0) {
1977 EDNSSubnetOpts eso;
1978 if(getEDNSSubnetOptsFromString(it->second.values.at(0).content, it->second.values.at(0).size, &eso)) {
1979 *ednssubnet=eso;
1980 foundECS = true;
1981 }
1982 }
1983 }
1984 }
1985 }
1986 else if (lookForXPF && ntohs(drh->d_type) == g_xpfRRCode && ntohs(drh->d_class) == QClass::IN && drh->d_ttl == 0) {
1987 if ((questionLen - pos) < ntohs(drh->d_clen)) {
1988 return;
1989 }
1990
1991 foundXPF = parseXPFPayload(reinterpret_cast<const char*>(&question.at(pos)), ntohs(drh->d_clen), *xpfSource, xpfDest);
1992 }
1993
1994 pos += ntohs(drh->d_clen);
1995 }
1996 }
1997
1998 static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
1999 {
2000 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
2001
2002 if(conn->state==TCPConnection::BYTE0) {
2003 ssize_t bytes=recv(conn->getFD(), &conn->data[0], 2, 0);
2004 if(bytes==1)
2005 conn->state=TCPConnection::BYTE1;
2006 if(bytes==2) {
2007 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2008 conn->data.resize(conn->qlen);
2009 conn->bytesread=0;
2010 conn->state=TCPConnection::GETQUESTION;
2011 }
2012 if(!bytes || bytes < 0) {
2013 t_fdm->removeReadFD(fd);
2014 return;
2015 }
2016 }
2017 else if(conn->state==TCPConnection::BYTE1) {
2018 ssize_t bytes=recv(conn->getFD(), &conn->data[1], 1, 0);
2019 if(bytes==1) {
2020 conn->state=TCPConnection::GETQUESTION;
2021 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2022 conn->data.resize(conn->qlen);
2023 conn->bytesread=0;
2024 }
2025 if(!bytes || bytes < 0) {
2026 if(g_logCommonErrors)
2027 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected after first byte"<<endl;
2028 t_fdm->removeReadFD(fd);
2029 return;
2030 }
2031 }
2032 else if(conn->state==TCPConnection::GETQUESTION) {
2033 ssize_t bytes=recv(conn->getFD(), &conn->data[conn->bytesread], conn->qlen - conn->bytesread, 0);
2034 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
2035 if(g_logCommonErrors) {
2036 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected while reading question body"<<endl;
2037 }
2038 t_fdm->removeReadFD(fd);
2039 return;
2040 }
2041 conn->bytesread+=(uint16_t)bytes;
2042 if(conn->bytesread==conn->qlen) {
2043 conn->state = TCPConnection::BYTE0;
2044 std::unique_ptr<DNSComboWriter> dc;
2045 try {
2046 dc=std::unique_ptr<DNSComboWriter>(new DNSComboWriter(conn->data, g_now));
2047 }
2048 catch(const MOADNSException &mde) {
2049 g_stats.clientParseError++;
2050 if(g_logCommonErrors)
2051 g_log<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
2052 return;
2053 }
2054 dc->d_tcpConnection = conn; // carry the torch
2055 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
2056 dc->d_tcp=true;
2057 dc->setRemote(conn->d_remote);
2058 dc->setSource(conn->d_remote);
2059 ComboAddress dest;
2060 dest.reset();
2061 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
2062 socklen_t len = dest.getSocklen();
2063 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
2064 dc->setLocal(dest);
2065 dc->setDestination(dest);
2066 DNSName qname;
2067 uint16_t qtype=0;
2068 uint16_t qclass=0;
2069 bool needECS = false;
2070 bool needXPF = g_XPFAcl.match(conn->d_remote);
2071 string requestorId;
2072 string deviceId;
2073 string deviceName;
2074 bool logQuery = false;
2075 #ifdef HAVE_PROTOBUF
2076 auto luaconfsLocal = g_luaconfs.getLocal();
2077 if (checkProtobufExport(luaconfsLocal)) {
2078 needECS = true;
2079 }
2080 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
2081 dc->d_logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
2082 #endif /* HAVE_PROTOBUF */
2083
2084 #ifdef HAVE_FSTRM
2085 checkFrameStreamExport(luaconfsLocal);
2086 #endif
2087
2088 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag_ffi || t_pdl->d_gettag))) {
2089
2090 try {
2091 EDNSOptionViewMap ednsOptions;
2092 bool xpfFound = false;
2093 dc->d_ecsParsed = true;
2094 dc->d_ecsFound = false;
2095 getQNameAndSubnet(conn->data, &qname, &qtype, &qclass,
2096 dc->d_ecsFound, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2097 xpfFound, needXPF ? &dc->d_source : nullptr, needXPF ? &dc->d_destination : nullptr);
2098
2099 if(t_pdl) {
2100 try {
2101 if (t_pdl->d_gettag_ffi) {
2102 dc->d_tag = t_pdl->gettag_ffi(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_records, dc->d_data, ednsOptions, true, requestorId, deviceId, deviceName, dc->d_rcode, dc->d_ttlCap, dc->d_variable, logQuery, dc->d_logResponse, dc->d_followCNAMERecords);
2103 }
2104 else if (t_pdl->d_gettag) {
2105 dc->d_tag = t_pdl->gettag(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, deviceName);
2106 }
2107 }
2108 catch(const std::exception& e) {
2109 if(g_logCommonErrors)
2110 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
2111 }
2112 }
2113 }
2114 catch(const std::exception& e)
2115 {
2116 if(g_logCommonErrors)
2117 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
2118 }
2119 }
2120
2121 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(&conn->data[0]);
2122
2123 #ifdef HAVE_PROTOBUF
2124 if(t_protobufServers || t_outgoingProtobufServers) {
2125 dc->d_requestorId = requestorId;
2126 dc->d_deviceId = deviceId;
2127 dc->d_deviceName = deviceName;
2128 dc->d_uuid = getUniqueID();
2129 }
2130
2131 if(t_protobufServers) {
2132 try {
2133
2134 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && dc->d_policyTags.empty())) {
2135 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, dc->d_source, dc->d_destination, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId, dc->d_deviceName);
2136 }
2137 }
2138 catch(std::exception& e) {
2139 if(g_logCommonErrors)
2140 g_log<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
2141 }
2142 }
2143 #endif
2144 if(t_pdl) {
2145 if(t_pdl->ipfilter(dc->d_source, dc->d_destination, *dh)) {
2146 if(!g_quiet)
2147 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED TCP question from "<<dc->d_source.toStringWithPort()<<(dc->d_source != dc->d_remote ? " (via "+dc->d_remote.toStringWithPort()+")" : "")<<" based on policy"<<endl;
2148 g_stats.policyDrops++;
2149 return;
2150 }
2151 }
2152
2153 if(dc->d_mdp.d_header.qr) {
2154 g_stats.ignoredCount++;
2155 if(g_logCommonErrors) {
2156 g_log<<Logger::Error<<"Ignoring answer from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
2157 }
2158 return;
2159 }
2160 if(dc->d_mdp.d_header.opcode) {
2161 g_stats.ignoredCount++;
2162 if(g_logCommonErrors) {
2163 g_log<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
2164 }
2165 return;
2166 }
2167 else if (dh->qdcount == 0) {
2168 g_stats.emptyQueriesCount++;
2169 if(g_logCommonErrors) {
2170 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<< dc->getRemote() <<" on server socket!"<<endl;
2171 }
2172 return;
2173 }
2174 else {
2175 ++g_stats.qcounter;
2176 ++g_stats.tcpqcounter;
2177 ++conn->d_requestsInFlight;
2178 if (conn->d_requestsInFlight >= TCPConnection::s_maxInFlight) {
2179 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
2180 } else {
2181 Utility::gettimeofday(&g_now, 0); // needed?
2182 struct timeval ttd = g_now;
2183 t_fdm->setReadTTD(fd, ttd, g_tcpTimeout);
2184 }
2185 MT->makeThread(startDoResolve, dc.release()); // deletes dc
2186 return;
2187 }
2188 }
2189 }
2190 }
2191
2192 //! Handle new incoming TCP connection
2193 static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
2194 {
2195 ComboAddress addr;
2196 socklen_t addrlen=sizeof(addr);
2197 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
2198 if(newsock>=0) {
2199 if(MT->numProcesses() > g_maxMThreads) {
2200 g_stats.overCapacityDrops++;
2201 try {
2202 closesocket(newsock);
2203 }
2204 catch(const PDNSException& e) {
2205 g_log<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
2206 }
2207 return;
2208 }
2209
2210 if(t_remotes)
2211 t_remotes->push_back(addr);
2212 if(t_allowFrom && !t_allowFrom->match(&addr)) {
2213 if(!g_quiet)
2214 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2215
2216 g_stats.unauthorizedTCP++;
2217 try {
2218 closesocket(newsock);
2219 }
2220 catch(const PDNSException& e) {
2221 g_log<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
2222 }
2223 return;
2224 }
2225 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
2226 g_stats.tcpClientOverflow++;
2227 try {
2228 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
2229 }
2230 catch(const PDNSException& e) {
2231 g_log<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
2232 }
2233 return;
2234 }
2235
2236 setNonBlocking(newsock);
2237 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
2238 tc->state=TCPConnection::BYTE0;
2239
2240 struct timeval ttd;
2241 Utility::gettimeofday(&ttd, 0);
2242 ttd.tv_sec += g_tcpTimeout;
2243
2244 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc, &ttd);
2245 }
2246 }
2247
2248 static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
2249 {
2250 gettimeofday(&g_now, 0);
2251 if (tv.tv_sec) {
2252 struct timeval diff = g_now - tv;
2253 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
2254
2255 if(delta > 1000.0) {
2256 g_stats.tooOldDrops++;
2257 return nullptr;
2258 }
2259 }
2260
2261 ++g_stats.qcounter;
2262 if(fromaddr.sin4.sin_family==AF_INET6)
2263 g_stats.ipv6qcounter++;
2264
2265 string response;
2266 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
2267 unsigned int ctag=0;
2268 uint32_t qhash = 0;
2269 bool needECS = false;
2270 bool needXPF = g_XPFAcl.match(fromaddr);
2271 std::vector<std::string> policyTags;
2272 LuaContext::LuaObject data;
2273 ComboAddress source = fromaddr;
2274 ComboAddress destination = destaddr;
2275 string requestorId;
2276 string deviceId;
2277 string deviceName;
2278 bool logQuery = false;
2279 bool logResponse = false;
2280 #ifdef HAVE_PROTOBUF
2281 boost::uuids::uuid uniqueId;
2282 auto luaconfsLocal = g_luaconfs.getLocal();
2283 if (checkProtobufExport(luaconfsLocal)) {
2284 uniqueId = getUniqueID();
2285 needECS = true;
2286 } else if (checkOutgoingProtobufExport(luaconfsLocal)) {
2287 uniqueId = getUniqueID();
2288 }
2289 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
2290 logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
2291 #endif
2292 #ifdef HAVE_FSTRM
2293 checkFrameStreamExport(luaconfsLocal);
2294 #endif
2295 EDNSSubnetOpts ednssubnet;
2296 bool ecsFound = false;
2297 bool ecsParsed = false;
2298 uint16_t ecsBegin = 0;
2299 uint16_t ecsEnd = 0;
2300 std::vector<DNSRecord> records;
2301 boost::optional<int> rcode = boost::none;
2302 uint32_t ttlCap = std::numeric_limits<uint32_t>::max();
2303 bool variable = false;
2304 bool followCNAMEs = false;
2305 try {
2306 DNSName qname;
2307 uint16_t qtype=0;
2308 uint16_t qclass=0;
2309 uint32_t age;
2310 bool qnameParsed=false;
2311 #ifdef MALLOC_TRACE
2312 /*
2313 static uint64_t last=0;
2314 if(!last)
2315 g_mtracer->clearAllocators();
2316 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
2317 last=g_mtracer->getAllocs();
2318 cout<<g_mtracer->topAllocatorsString()<<endl;
2319 g_mtracer->clearAllocators();
2320 */
2321 #endif
2322
2323 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag || t_pdl->d_gettag_ffi))) {
2324 try {
2325 EDNSOptionViewMap ednsOptions;
2326 bool xpfFound = false;
2327
2328 ecsFound = false;
2329
2330 getQNameAndSubnet(question, &qname, &qtype, &qclass,
2331 ecsFound, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2332 xpfFound, needXPF ? &source : nullptr, needXPF ? &destination : nullptr);
2333
2334 qnameParsed = true;
2335 ecsParsed = true;
2336
2337 if(t_pdl) {
2338 try {
2339 if (t_pdl->d_gettag_ffi) {
2340 ctag = t_pdl->gettag_ffi(source, ednssubnet.source, destination, qname, qtype, &policyTags, records, data, ednsOptions, false, requestorId, deviceId, deviceName, rcode, ttlCap, variable, logQuery, logResponse, followCNAMEs);
2341 }
2342 else if (t_pdl->d_gettag) {
2343 ctag = t_pdl->gettag(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, deviceName);
2344 }
2345 }
2346 catch(const std::exception& e) {
2347 if(g_logCommonErrors)
2348 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
2349 }
2350 }
2351 }
2352 catch(const std::exception& e)
2353 {
2354 if(g_logCommonErrors)
2355 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
2356 }
2357 }
2358
2359 bool cacheHit = false;
2360 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
2361 #ifdef HAVE_PROTOBUF
2362 if (t_protobufServers) {
2363 pbMessage = RecProtoBufMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
2364 pbMessage->setServerIdentity(SyncRes::s_serverID);
2365 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && policyTags.empty())) {
2366 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, source, destination, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId, deviceName);
2367 }
2368 }
2369 #endif /* HAVE_PROTOBUF */
2370
2371 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
2372 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
2373 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
2374 vState valState;
2375 if (qnameParsed) {
2376 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
2377 }
2378 else {
2379 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, &qtype, &qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
2380 }
2381
2382 if (cacheHit) {
2383 if(valState == Bogus) {
2384 if(t_bogusremotes)
2385 t_bogusremotes->push_back(source);
2386 if(t_bogusqueryring)
2387 t_bogusqueryring->push_back(make_pair(qname, qtype));
2388 }
2389
2390 #ifdef HAVE_PROTOBUF
2391 if(t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && pbMessage->getAppliedPolicy().empty() && pbMessage->getPolicyTags().empty())) {
2392 Netmask requestorNM(source, source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
2393 ComboAddress requestor = requestorNM.getMaskedNetwork();
2394 requestor.setPort(source.getPort());
2395 pbMessage->update(uniqueId, &requestor, &destination, false, dh->id);
2396 pbMessage->setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIPv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
2397 if (g_useKernelTimestamp && tv.tv_sec) {
2398 pbMessage->setQueryTime(tv.tv_sec, tv.tv_usec);
2399 }
2400 else {
2401 pbMessage->setQueryTime(g_now.tv_sec, g_now.tv_usec);
2402 }
2403 pbMessage->setRequestorId(requestorId);
2404 pbMessage->setDeviceId(deviceId);
2405 pbMessage->setDeviceName(deviceName);
2406 protobufLogResponse(*pbMessage);
2407 }
2408 #endif /* HAVE_PROTOBUF */
2409 if(!g_quiet)
2410 g_log<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<endl;
2411
2412 g_stats.packetCacheHits++;
2413 SyncRes::s_queries++;
2414 ageDNSPacket(response, age);
2415 struct msghdr msgh;
2416 struct iovec iov;
2417 cmsgbuf_aligned cbuf;
2418 fillMSGHdr(&msgh, &iov, &cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
2419 msgh.msg_control=NULL;
2420
2421 if(g_fromtosockets.count(fd)) {
2422 addCMsgSrcAddr(&msgh, &cbuf, &destaddr, 0);
2423 }
2424 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors) {
2425 int err = errno;
2426 g_log << Logger::Warning << "Sending UDP reply to client " << source.toStringWithPort()
2427 << (source != fromaddr ? " (via " + fromaddr.toStringWithPort() + ")" : "") << " failed with: "
2428 << strerror(err) << endl;
2429 }
2430 if(response.length() >= sizeof(struct dnsheader)) {
2431 struct dnsheader tmpdh;
2432 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
2433 updateResponseStats(tmpdh.rcode, source, response.length(), 0, 0);
2434 }
2435 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
2436 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
2437 return 0;
2438 }
2439 }
2440 catch(std::exception& e) {
2441 if(g_logCommonErrors)
2442 g_log<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
2443 return 0;
2444 }
2445
2446 if(t_pdl) {
2447 if(t_pdl->ipfilter(source, destination, *dh)) {
2448 if(!g_quiet)
2449 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" based on policy"<<endl;
2450 g_stats.policyDrops++;
2451 return 0;
2452 }
2453 }
2454
2455 if(MT->numProcesses() > g_maxMThreads) {
2456 if(!g_quiet)
2457 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<", over capacity"<<endl;
2458
2459 g_stats.overCapacityDrops++;
2460 return 0;
2461 }
2462
2463 auto dc = std::unique_ptr<DNSComboWriter>(new DNSComboWriter(question, g_now, std::move(policyTags), std::move(data), std::move(records)));
2464 dc->setSocket(fd);
2465 dc->d_tag=ctag;
2466 dc->d_qhash=qhash;
2467 dc->setRemote(fromaddr);
2468 dc->setSource(source);
2469 dc->setLocal(destaddr);
2470 dc->setDestination(destination);
2471 dc->d_tcp=false;
2472 dc->d_ecsFound = ecsFound;
2473 dc->d_ecsParsed = ecsParsed;
2474 dc->d_ecsBegin = ecsBegin;
2475 dc->d_ecsEnd = ecsEnd;
2476 dc->d_ednssubnet = ednssubnet;
2477 dc->d_ttlCap = ttlCap;
2478 dc->d_variable = variable;
2479 dc->d_followCNAMERecords = followCNAMEs;
2480 dc->d_rcode = rcode;
2481 dc->d_logResponse = logResponse;
2482 #ifdef HAVE_PROTOBUF
2483 if (t_protobufServers || t_outgoingProtobufServers) {
2484 dc->d_uuid = std::move(uniqueId);
2485 }
2486 dc->d_requestorId = requestorId;
2487 dc->d_deviceId = deviceId;
2488 dc->d_deviceName = deviceName;
2489 dc->d_kernelTimestamp = tv;
2490 #endif
2491
2492 MT->makeThread(startDoResolve, (void*) dc.release()); // deletes dc
2493 return 0;
2494 }
2495
2496
2497 static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
2498 {
2499 ssize_t len;
2500 static const size_t maxIncomingQuerySize = 512;
2501 static thread_local std::string data;
2502 ComboAddress fromaddr;
2503 struct msghdr msgh;
2504 struct iovec iov;
2505 cmsgbuf_aligned cbuf;
2506 bool firstQuery = true;
2507
2508 for(size_t queriesCounter = 0; queriesCounter < s_maxUDPQueriesPerRound; queriesCounter++) {
2509 data.resize(maxIncomingQuerySize);
2510 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
2511 fillMSGHdr(&msgh, &iov, &cbuf, sizeof(cbuf), &data[0], data.size(), &fromaddr);
2512
2513 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
2514
2515 firstQuery = false;
2516
2517 if (static_cast<size_t>(len) < sizeof(dnsheader)) {
2518 g_stats.ignoredCount++;
2519 if (!g_quiet) {
2520 g_log<<Logger::Error<<"Ignoring too-short ("<<std::to_string(len)<<") query from "<<fromaddr.toString()<<endl;
2521 }
2522 return;
2523 }
2524
2525 if (msgh.msg_flags & MSG_TRUNC) {
2526 g_stats.truncatedDrops++;
2527 if (!g_quiet) {
2528 g_log<<Logger::Error<<"Ignoring truncated query from "<<fromaddr.toString()<<endl;
2529 }
2530 return;
2531 }
2532
2533 if(t_remotes) {
2534 t_remotes->push_back(fromaddr);
2535 }
2536
2537 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
2538 if(!g_quiet) {
2539 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2540 }
2541
2542 g_stats.unauthorizedUDP++;
2543 return;
2544 }
2545 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
2546 if(!fromaddr.sin4.sin_port) { // also works for IPv6
2547 if(!g_quiet) {
2548 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
2549 }
2550
2551 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
2552 return;
2553 }
2554
2555 try {
2556 data.resize(static_cast<size_t>(len));
2557 dnsheader* dh=(dnsheader*)&data[0];
2558
2559 if(dh->qr) {
2560 g_stats.ignoredCount++;
2561 if(g_logCommonErrors) {
2562 g_log<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
2563 }
2564 }
2565 else if(dh->opcode) {
2566 g_stats.ignoredCount++;
2567 if(g_logCommonErrors) {
2568 g_log<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
2569 }
2570 }
2571 else if (dh->qdcount == 0) {
2572 g_stats.emptyQueriesCount++;
2573 if(g_logCommonErrors) {
2574 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<<fromaddr.toString()<<" on server socket!"<<endl;
2575 }
2576 }
2577 else {
2578 struct timeval tv={0,0};
2579 HarvestTimestamp(&msgh, &tv);
2580 ComboAddress dest;
2581 dest.reset(); // this makes sure we ignore this address if not returned by recvmsg above
2582 auto loc = rplookup(g_listenSocketsAddresses, fd);
2583 if(HarvestDestinationAddress(&msgh, &dest)) {
2584 // but.. need to get port too
2585 if(loc) {
2586 dest.sin4.sin_port = loc->sin4.sin_port;
2587 }
2588 }
2589 else {
2590 if(loc) {
2591 dest = *loc;
2592 }
2593 else {
2594 dest.sin4.sin_family = fromaddr.sin4.sin_family;
2595 socklen_t slen = dest.getSocklen();
2596 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
2597 }
2598 }
2599
2600 if(g_weDistributeQueries) {
2601 distributeAsyncFunction(data, boost::bind(doProcessUDPQuestion, data, fromaddr, dest, tv, fd));
2602 }
2603 else {
2604 ++s_threadInfos[t_id].numberOfDistributedQueries;
2605 doProcessUDPQuestion(data, fromaddr, dest, tv, fd);
2606 }
2607 }
2608 }
2609 catch(const MOADNSException &mde) {
2610 g_stats.clientParseError++;
2611 if(g_logCommonErrors) {
2612 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
2613 }
2614 }
2615 catch(const std::runtime_error& e) {
2616 g_stats.clientParseError++;
2617 if(g_logCommonErrors) {
2618 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
2619 }
2620 }
2621 }
2622 else {
2623 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
2624 if(firstQuery && errno == EAGAIN) {
2625 g_stats.noPacketError++;
2626 }
2627
2628 break;
2629 }
2630 }
2631 }
2632
2633 static void makeTCPServerSockets(deferredAdd_t& deferredAdds, std::set<int>& tcpSockets)
2634 {
2635 int fd;
2636 vector<string>locals;
2637 stringtok(locals,::arg()["local-address"]," ,");
2638
2639 if(locals.empty())
2640 throw PDNSException("No local address specified");
2641
2642 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
2643 ServiceTuple st;
2644 st.port=::arg().asNum("local-port");
2645 parseService(*i, st);
2646
2647 ComboAddress sin;
2648
2649 sin.reset();
2650 sin.sin4.sin_family = AF_INET;
2651 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
2652 sin.sin6.sin6_family = AF_INET6;
2653 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
2654 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
2655 }
2656
2657 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
2658 if(fd<0)
2659 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
2660
2661 setCloseOnExec(fd);
2662
2663 int tmp=1;
2664 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
2665 g_log<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
2666 exit(1);
2667 }
2668 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
2669 int err = errno;
2670 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(err)<<endl;
2671 }
2672
2673 #ifdef TCP_DEFER_ACCEPT
2674 if(setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
2675 if(i==locals.begin())
2676 g_log<<Logger::Info<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
2677 }
2678 #endif
2679
2680 if( ::arg().mustDo("non-local-bind") )
2681 Utility::setBindAny(AF_INET, fd);
2682
2683 #ifdef SO_REUSEPORT
2684 if(g_reusePort) {
2685 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
2686 throw PDNSException("SO_REUSEPORT: "+stringerror());
2687 }
2688 #endif
2689
2690 if (::arg().asNum("tcp-fast-open") > 0) {
2691 #ifdef TCP_FASTOPEN
2692 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
2693 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
2694 int err = errno;
2695 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(err)<<endl;
2696 }
2697 #else
2698 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
2699 #endif
2700 }
2701
2702 sin.sin4.sin_port = htons(st.port);
2703 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
2704 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
2705 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
2706
2707 setNonBlocking(fd);
2708 setSocketSendBuffer(fd, 65000);
2709 listen(fd, 128);
2710 deferredAdds.push_back(make_pair(fd, handleNewTCPQuestion));
2711 tcpSockets.insert(fd);
2712
2713 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
2714 // - fd is not that which we know here, but returned from accept()
2715 if(sin.sin4.sin_family == AF_INET)
2716 g_log<<Logger::Info<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
2717 else
2718 g_log<<Logger::Info<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
2719 }
2720 }
2721
2722 static void makeUDPServerSockets(deferredAdd_t& deferredAdds)
2723 {
2724 int one=1;
2725 vector<string>locals;
2726 stringtok(locals,::arg()["local-address"]," ,");
2727
2728 if(locals.empty())
2729 throw PDNSException("No local address specified");
2730
2731 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
2732 ServiceTuple st;
2733 st.port=::arg().asNum("local-port");
2734 parseService(*i, st);
2735
2736 ComboAddress sin;
2737
2738 sin.reset();
2739 sin.sin4.sin_family = AF_INET;
2740 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
2741 sin.sin6.sin6_family = AF_INET6;
2742 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
2743 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
2744 }
2745
2746 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
2747 if(fd < 0) {
2748 throw PDNSException("Making a UDP server socket for resolver: "+stringerror());
2749 }
2750 if (!setSocketTimestamps(fd))
2751 g_log<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
2752
2753 if(IsAnyAddress(sin)) {
2754 if(sin.sin4.sin_family == AF_INET)
2755 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
2756 g_fromtosockets.insert(fd);
2757 #ifdef IPV6_RECVPKTINFO
2758 if(sin.sin4.sin_family == AF_INET6)
2759 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
2760 g_fromtosockets.insert(fd);
2761 #endif
2762 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
2763 int err = errno;
2764 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(err)<<endl;
2765 }
2766 }
2767 if( ::arg().mustDo("non-local-bind") )
2768 Utility::setBindAny(AF_INET6, fd);
2769
2770 setCloseOnExec(fd);
2771
2772 setSocketReceiveBuffer(fd, 250000);
2773 sin.sin4.sin_port = htons(st.port);
2774
2775
2776 #ifdef SO_REUSEPORT
2777 if(g_reusePort) {
2778 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
2779 throw PDNSException("SO_REUSEPORT: "+stringerror());
2780 }
2781 #endif
2782
2783 if (sin.isIPv4()) {
2784 try {
2785 setSocketIgnorePMTU(fd);
2786 }
2787 catch(const std::exception& e) {
2788 g_log<<Logger::Warning<<"Failed to set IP_MTU_DISCOVER on UDP server socket: "<<e.what()<<endl;
2789 }
2790 }
2791
2792 socklen_t socklen=sin.getSocklen();
2793 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
2794 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
2795
2796 setNonBlocking(fd);
2797
2798 deferredAdds.push_back(make_pair(fd, handleNewUDPQuestion));
2799 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
2800 if(sin.sin4.sin_family == AF_INET)
2801 g_log<<Logger::Info<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
2802 else
2803 g_log<<Logger::Info<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
2804 }
2805 }
2806
2807 static void daemonize(void)
2808 {
2809 if(fork())
2810 exit(0); // bye bye
2811
2812 setsid();
2813
2814 int i=open("/dev/null",O_RDWR); /* open stdin */
2815 if(i < 0)
2816 g_log<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
2817 else {
2818 dup2(i,0); /* stdin */
2819 dup2(i,1); /* stderr */
2820 dup2(i,2); /* stderr */
2821 close(i);
2822 }
2823 }
2824
2825 static void termIntHandler(int)
2826 {
2827 doExit();
2828 }
2829
2830 static void usr1Handler(int)
2831 {
2832 statsWanted=true;
2833 }
2834
2835 static void usr2Handler(int)
2836 {
2837 g_quiet= !g_quiet;
2838 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
2839 ::arg().set("quiet")=g_quiet ? "" : "no";
2840 }
2841
2842 static void doStats(void)
2843 {
2844 static time_t lastOutputTime;
2845 static uint64_t lastQueryCount;
2846
2847 uint64_t cacheHits = s_RC->cacheHits;
2848 uint64_t cacheMisses = s_RC->cacheMisses;
2849 uint64_t cacheSize = s_RC->size();
2850
2851 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
2852 g_log<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
2853 cacheSize << " cache entries, "<<
2854 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
2855 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
2856
2857 g_log<<Logger::Notice<<"stats: throttle map: "
2858 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
2859 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<", failed ns: "
2860 << broadcastAccFunction<uint64_t>(pleaseGetFailedServersSize)<<", ednsmap: "
2861 <<broadcastAccFunction<uint64_t>(pleaseGetEDNSStatusesSize)<<endl;
2862 g_log<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2863 g_log<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
2864 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
2865 g_log<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
2866 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
2867
2868 //g_log<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
2869 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
2870
2871 g_log<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
2872 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
2873
2874 size_t idx = 0;
2875 for (const auto& threadInfo : s_threadInfos) {
2876 if(threadInfo.isWorker) {
2877 g_log<<Logger::Notice<<"stats: thread "<<idx<<" has been distributed "<<threadInfo.numberOfDistributedQueries<<" queries"<<endl;
2878 ++idx;
2879 }
2880 }
2881
2882 time_t now = time(0);
2883 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
2884 g_log<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
2885 }
2886 lastOutputTime = now;
2887 lastQueryCount = SyncRes::s_queries;
2888 }
2889 else if(statsWanted)
2890 g_log<<Logger::Notice<<"stats: no stats yet!"<<endl;
2891
2892 statsWanted=false;
2893 }
2894
2895 static void houseKeeping(void *)
2896 {
2897 static thread_local time_t last_rootupdate, last_secpoll, last_trustAnchorUpdate{0}, last_RC_prune;
2898 static thread_local struct timeval last_prune;
2899
2900 static thread_local int cleanCounter=0;
2901 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
2902 auto luaconfsLocal = g_luaconfs.getLocal();
2903
2904 if (last_trustAnchorUpdate == 0 && !luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0) {
2905 // Loading the Lua config file already "refreshed" the TAs
2906 last_trustAnchorUpdate = g_now.tv_sec + luaconfsLocal->trustAnchorFileInfo.interval * 3600;
2907 }
2908
2909 try {
2910 if(s_running) {
2911 return;
2912 }
2913 s_running=true;
2914
2915 struct timeval now, past;
2916 Utility::gettimeofday(&now, nullptr);
2917 past = now;
2918 past.tv_sec -= 5;
2919 if (last_prune < past) {
2920 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
2921 SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
2922
2923 time_t limit;
2924 if(!((cleanCounter++)%40)) { // this is a full scan!
2925 limit=now.tv_sec-300;
2926 SyncRes::pruneNSSpeeds(limit);
2927 }
2928 limit = now.tv_sec - SyncRes::s_serverdownthrottletime * 10;
2929 SyncRes::pruneFailedServers(limit);
2930 limit = now.tv_sec - 2*3600;
2931 SyncRes::pruneEDNSStatuses(limit);
2932 SyncRes::pruneThrottledServers();
2933 Utility::gettimeofday(&last_prune, nullptr);
2934 }
2935
2936 if(isHandlerThread()) {
2937 if (now.tv_sec - last_RC_prune > 5) {
2938 s_RC->doPrune(g_maxCacheEntries);
2939 last_RC_prune = now.tv_sec;
2940 }
2941 // XXX !!! global
2942 if(now.tv_sec - last_rootupdate > 7200) {
2943 int res = SyncRes::getRootNS(g_now, nullptr);
2944 if (!res) {
2945 last_rootupdate=now.tv_sec;
2946 primeRootNSZones(g_dnssecmode != DNSSECMode::Off);
2947 }
2948 }
2949
2950 if(now.tv_sec - last_secpoll >= 3600) {
2951 try {
2952 doSecPoll(&last_secpoll);
2953 }
2954 catch(const std::exception& e)
2955 {
2956 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
2957 }
2958 catch(const PDNSException& e)
2959 {
2960 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
2961 }
2962 catch(const ImmediateServFailException &e)
2963 {
2964 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
2965 }
2966 catch(const PolicyHitException& e) {
2967 g_log<<Logger::Error<<"Policy hit while performing security poll"<<endl;
2968 }
2969 catch(...)
2970 {
2971 g_log<<Logger::Error<<"Exception while performing security poll"<<endl;
2972 }
2973 }
2974
2975 if (!luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0 &&
2976 g_now.tv_sec - last_trustAnchorUpdate >= (luaconfsLocal->trustAnchorFileInfo.interval * 3600)) {
2977 g_log<<Logger::Debug<<"Refreshing Trust Anchors from file"<<endl;
2978 try {
2979 map<DNSName, dsmap_t> dsAnchors;
2980 if (updateTrustAnchorsFromFile(luaconfsLocal->trustAnchorFileInfo.fname, dsAnchors)) {
2981 g_luaconfs.modify([&dsAnchors](LuaConfigItems& lci) {
2982 lci.dsAnchors = dsAnchors;
2983 });
2984 }
2985 last_trustAnchorUpdate = now.tv_sec;
2986 } catch (const PDNSException &pe) {
2987 g_log<<Logger::Error<<"Unable to update Trust Anchors: "<<pe.reason<<endl;
2988 }
2989 }
2990 }
2991 s_running=false;
2992 }
2993 catch(PDNSException& ae)
2994 {
2995 s_running=false;
2996 g_log<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
2997 throw;
2998 }
2999 }
3000
3001 static void makeThreadPipes()
3002 {
3003 auto pipeBufferSize = ::arg().asNum("distribution-pipe-buffer-size");
3004 if (pipeBufferSize > 0) {
3005 g_log<<Logger::Info<<"Resizing the buffer of the distribution pipe to "<<pipeBufferSize<<endl;
3006 }
3007
3008 /* thread 0 is the handler / SNMP, we start at 1 */
3009 for(unsigned int n = 1; n <= (g_numWorkerThreads + g_numDistributorThreads); ++n) {
3010 auto& threadInfos = s_threadInfos.at(n);
3011
3012 int fd[2];
3013 if(pipe(fd) < 0)
3014 unixDie("Creating pipe for inter-thread communications");
3015
3016 threadInfos.pipes.readToThread = fd[0];
3017 threadInfos.pipes.writeToThread = fd[1];
3018
3019 if(pipe(fd) < 0)
3020 unixDie("Creating pipe for inter-thread communications");
3021
3022 threadInfos.pipes.readFromThread = fd[0];
3023 threadInfos.pipes.writeFromThread = fd[1];
3024
3025 if(pipe(fd) < 0)
3026 unixDie("Creating pipe for inter-thread communications");
3027
3028 threadInfos.pipes.readQueriesToThread = fd[0];
3029 threadInfos.pipes.writeQueriesToThread = fd[1];
3030
3031 if (pipeBufferSize > 0) {
3032 if (!setPipeBufferSize(threadInfos.pipes.writeQueriesToThread, pipeBufferSize)) {
3033 int err = errno;
3034 g_log<<Logger::Warning<<"Error resizing the buffer of the distribution pipe for thread "<<n<<" to "<<pipeBufferSize<<": "<<strerror(err)<<endl;
3035 auto existingSize = getPipeBufferSize(threadInfos.pipes.writeQueriesToThread);
3036 if (existingSize > 0) {
3037 g_log<<Logger::Warning<<"The current size of the distribution pipe's buffer for thread "<<n<<" is "<<existingSize<<endl;
3038 }
3039 }
3040 }
3041
3042 if (!setNonBlocking(threadInfos.pipes.writeQueriesToThread)) {
3043 unixDie("Making pipe for inter-thread communications non-blocking");
3044 }
3045 }
3046 }
3047
3048 struct ThreadMSG
3049 {
3050 pipefunc_t func;
3051 bool wantAnswer;
3052 };
3053
3054 void broadcastFunction(const pipefunc_t& func)
3055 {
3056 /* This function might be called by the worker with t_id 0 during startup
3057 for the initialization of ACLs and domain maps. After that it should only
3058 be called by the handler. */
3059
3060 if (s_threadInfos.empty() && isHandlerThread()) {
3061 /* the handler and distributors will call themselves below, but
3062 during startup we get called while s_threadInfos has not been
3063 populated yet to update the ACL or domain maps, so we need to
3064 handle that case.
3065 */
3066 func();
3067 }
3068
3069 unsigned int n = 0;
3070 for (const auto& threadInfo : s_threadInfos) {
3071 if(n++ == t_id) {
3072 func(); // don't write to ourselves!
3073 continue;
3074 }
3075
3076 ThreadMSG* tmsg = new ThreadMSG();
3077 tmsg->func = func;
3078 tmsg->wantAnswer = true;
3079 if(write(threadInfo.pipes.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
3080 delete tmsg;
3081
3082 unixDie("write to thread pipe returned wrong size or error");
3083 }
3084
3085 string* resp = nullptr;
3086 if(read(threadInfo.pipes.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
3087 unixDie("read from thread pipe returned wrong size or error");
3088
3089 if(resp) {
3090 delete resp;
3091 resp = nullptr;
3092 }
3093 }
3094 }
3095
3096 static bool trySendingQueryToWorker(unsigned int target, ThreadMSG* tmsg)
3097 {
3098 auto& targetInfo = s_threadInfos[target];
3099 if(!targetInfo.isWorker) {
3100 g_log<<Logger::Error<<"distributeAsyncFunction() tried to assign a query to a non-worker thread"<<endl;
3101 exit(1);
3102 }
3103
3104 const auto& tps = targetInfo.pipes;
3105
3106 ssize_t written = write(tps.writeQueriesToThread, &tmsg, sizeof(tmsg));
3107 if (written > 0) {
3108 if (static_cast<size_t>(written) != sizeof(tmsg)) {
3109 delete tmsg;
3110 unixDie("write to thread pipe returned wrong size or error");
3111 }
3112 }
3113 else {
3114 int error = errno;
3115 if (error == EAGAIN || error == EWOULDBLOCK) {
3116 return false;
3117 } else {
3118 delete tmsg;
3119 unixDie("write to thread pipe returned wrong size or error:" + std::to_string(error));
3120 }
3121 }
3122
3123 ++targetInfo.numberOfDistributedQueries;
3124
3125 return true;
3126 }
3127
3128 static unsigned int getWorkerLoad(size_t workerIdx)
3129 {
3130 const auto mt = s_threadInfos[/* skip handler */ 1 + g_numDistributorThreads + workerIdx].mt;
3131 if (mt != nullptr) {
3132 return mt->numProcesses();
3133 }
3134 return 0;
3135 }
3136
3137 static unsigned int selectWorker(unsigned int hash)
3138 {
3139 if (s_balancingFactor == 0) {
3140 return /* skip handler */ 1 + g_numDistributorThreads + (hash % g_numWorkerThreads);
3141 }
3142
3143 /* we start with one, representing the query we are currently handling */
3144 double currentLoad = 1;
3145 std::vector<unsigned int> load(g_numWorkerThreads);
3146 for (size_t idx = 0; idx < g_numWorkerThreads; idx++) {
3147 load[idx] = getWorkerLoad(idx);
3148 currentLoad += load[idx];
3149 // cerr<<"load for worker "<<idx<<" is "<<load[idx]<<endl;
3150 }
3151
3152 double targetLoad = (currentLoad / g_numWorkerThreads) * s_balancingFactor;
3153 // cerr<<"total load is "<<currentLoad<<", number of workers is "<<g_numWorkerThreads<<", target load is "<<targetLoad<<endl;
3154
3155 unsigned int worker = hash % g_numWorkerThreads;
3156 /* at least one server has to be at or below the average load */
3157 if (load[worker] > targetLoad) {
3158 ++g_stats.rebalancedQueries;
3159 do {
3160 // cerr<<"worker "<<worker<<" is above the target load, selecting another one"<<endl;
3161 worker = (worker + 1) % g_numWorkerThreads;
3162 }
3163 while(load[worker] > targetLoad);
3164 }
3165
3166 return /* skip handler */ 1 + g_numDistributorThreads + worker;
3167 }
3168
3169 // This function is only called by the distributor threads, when pdns-distributes-queries is set
3170 void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
3171 {
3172 if (!isDistributorThread()) {
3173 g_log<<Logger::Error<<"distributeAsyncFunction() has been called by a worker ("<<t_id<<")"<<endl;
3174 exit(1);
3175 }
3176
3177 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
3178 unsigned int target = selectWorker(hash);
3179
3180 ThreadMSG* tmsg = new ThreadMSG();
3181 tmsg->func = func;
3182 tmsg->wantAnswer = false;
3183
3184 if (!trySendingQueryToWorker(target, tmsg)) {
3185 /* if this function failed but did not raise an exception, it means that the pipe
3186 was full, let's try another one */
3187 unsigned int newTarget = 0;
3188 do {
3189 newTarget = /* skip handler */ 1 + g_numDistributorThreads + dns_random(g_numWorkerThreads);
3190 } while (newTarget == target);
3191
3192 if (!trySendingQueryToWorker(newTarget, tmsg)) {
3193 g_stats.queryPipeFullDrops++;
3194 delete tmsg;
3195 }
3196 }
3197 }
3198
3199 static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
3200 {
3201 ThreadMSG* tmsg = nullptr;
3202
3203 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread || fd == readQueriesToThread
3204 unixDie("read from thread pipe returned wrong size or error");
3205 }
3206
3207 void *resp=0;
3208 try {
3209 resp = tmsg->func();
3210 }
3211 catch(std::exception& e) {
3212 if(g_logCommonErrors)
3213 g_log<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
3214 }
3215 catch(PDNSException& e) {
3216 if(g_logCommonErrors)
3217 g_log<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
3218 }
3219 if(tmsg->wantAnswer) {
3220 const auto& threadInfo = s_threadInfos.at(t_id);
3221 if(write(threadInfo.pipes.writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
3222 delete tmsg;
3223 unixDie("write to thread pipe returned wrong size or error");
3224 }
3225 }
3226
3227 delete tmsg;
3228 }
3229
3230 template<class T> void *voider(const boost::function<T*()>& func)
3231 {
3232 return func();
3233 }
3234
3235 vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
3236 {
3237 a.insert(a.end(), b.begin(), b.end());
3238 return a;
3239 }
3240
3241 vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
3242 {
3243 a.insert(a.end(), b.begin(), b.end());
3244 return a;
3245 }
3246
3247 vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
3248 {
3249 a.insert(a.end(), b.begin(), b.end());
3250 return a;
3251 }
3252
3253
3254 /*
3255 This function should only be called by the handler to gather metrics, wipe the cache,
3256 reload the Lua script (not the Lua config) or change the current trace regex,
3257 and by the SNMP thread to gather metrics. */
3258 template<class T> T broadcastAccFunction(const boost::function<T*()>& func)
3259 {
3260 if (!isHandlerThread()) {
3261 g_log<<Logger::Error<<"broadcastAccFunction has been called by a worker ("<<t_id<<")"<<endl;
3262 exit(1);
3263 }
3264
3265 unsigned int n = 0;
3266 T ret=T();
3267 for (const auto& threadInfo : s_threadInfos) {
3268 if (n++ == t_id) {
3269 continue;
3270 }
3271
3272 const auto& tps = threadInfo.pipes;
3273 ThreadMSG* tmsg = new ThreadMSG();
3274 tmsg->func = boost::bind(voider<T>, func);
3275 tmsg->wantAnswer = true;
3276
3277 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
3278 delete tmsg;
3279 unixDie("write to thread pipe returned wrong size or error");
3280 }
3281
3282 T* resp = nullptr;
3283 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
3284 unixDie("read from thread pipe returned wrong size or error");
3285
3286 if(resp) {
3287 ret += *resp;
3288 delete resp;
3289 resp = nullptr;
3290 }
3291 }
3292 return ret;
3293 }
3294
3295 template string broadcastAccFunction(const boost::function<string*()>& fun); // explicit instantiation
3296 template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun); // explicit instantiation
3297 template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun); // explicit instantiation
3298 template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun); // explicit instantiation
3299 template ThreadTimes broadcastAccFunction(const boost::function<ThreadTimes*()>& fun);
3300
3301 static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
3302 {
3303 try {
3304 string remote;
3305 string msg=s_rcc.recv(&remote);
3306 RecursorControlParser rcp;
3307 RecursorControlParser::func_t* command;
3308
3309 string answer=rcp.getAnswer(msg, &command);
3310
3311 // If we are inside a chroot, we need to strip
3312 if (!arg()["chroot"].empty()) {
3313 size_t len = arg()["chroot"].length();
3314 remote = remote.substr(len);
3315 }
3316
3317 s_rcc.send(answer, &remote);
3318 command();
3319 }
3320 catch(const std::exception& e) {
3321 g_log<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
3322 }
3323 catch(const PDNSException& ae) {
3324 g_log<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
3325 }
3326 }
3327
3328 static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
3329 {
3330 PacketID* pident=any_cast<PacketID>(&var);
3331 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
3332
3333 shared_array<char> buffer(new char[pident->inNeeded]);
3334
3335 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
3336 if(ret > 0) {
3337 pident->inMSG.append(&buffer[0], &buffer[ret]);
3338 pident->inNeeded-=(size_t)ret;
3339 if(!pident->inNeeded || pident->inIncompleteOkay) {
3340 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
3341 PacketID pid=*pident;
3342 string msg=pident->inMSG;
3343
3344 t_fdm->removeReadFD(fd);
3345 MT->sendEvent(pid, &msg);
3346 }
3347 else {
3348 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
3349 }
3350 }
3351 else {
3352 PacketID tmp=*pident;
3353 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
3354 string empty;
3355 MT->sendEvent(tmp, &empty); // this conveys error status
3356 }
3357 }
3358
3359 static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
3360 {
3361 PacketID* pid=any_cast<PacketID>(&var);
3362 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
3363 if(ret > 0) {
3364 pid->outPos+=(ssize_t)ret;
3365 if(pid->outPos==pid->outMSG.size()) {
3366 PacketID tmp=*pid;
3367 t_fdm->removeWriteFD(fd);
3368 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
3369 }
3370 }
3371 else { // error or EOF
3372 PacketID tmp(*pid);
3373 t_fdm->removeWriteFD(fd);
3374 string sent;
3375 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
3376 }
3377 }
3378
3379 // resend event to everybody chained onto it
3380 static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
3381 {
3382 if(iter->key.chain.empty())
3383 return;
3384 // cerr<<"doResends called!\n";
3385 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
3386 resend.fd=-1;
3387 resend.id=*i;
3388 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
3389
3390 MT->sendEvent(resend, &content);
3391 g_stats.chainResends++;
3392 }
3393 }
3394
3395 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
3396 {
3397 PacketID pid=any_cast<PacketID>(var);
3398 ssize_t len;
3399 std::string packet;
3400 packet.resize(g_outgoingEDNSBufsize);
3401 ComboAddress fromaddr;
3402 socklen_t addrlen=sizeof(fromaddr);
3403
3404 len=recvfrom(fd, &packet.at(0), packet.size(), 0, (sockaddr *)&fromaddr, &addrlen);
3405
3406 if(len < (ssize_t) sizeof(dnsheader)) {
3407 if(len < 0)
3408 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
3409 else {
3410 g_stats.serverParseError++;
3411 if(g_logCommonErrors)
3412 g_log<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
3413 ": packet smaller than DNS header"<<endl;
3414 }
3415
3416 t_udpclientsocks->returnSocket(fd);
3417 string empty;
3418
3419 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3420 if(iter != MT->d_waiters.end())
3421 doResends(iter, pid, empty);
3422
3423 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
3424 return;
3425 }
3426
3427 packet.resize(len);
3428 dnsheader dh;
3429 memcpy(&dh, &packet.at(0), sizeof(dh));
3430
3431 PacketID pident;
3432 pident.remote=fromaddr;
3433 pident.id=dh.id;
3434 pident.fd=fd;
3435
3436 if(!dh.qr && g_logCommonErrors) {
3437 g_log<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
3438 }
3439
3440 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
3441 !dh.qr) { // one weird server
3442 pident.domain.clear();
3443 pident.type = 0;
3444 }
3445 else {
3446 try {
3447 if(len > 12)
3448 pident.domain=DNSName(&packet.at(0), len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
3449 }
3450 catch(std::exception& e) {
3451 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
3452 g_log<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
3453 return;
3454 }
3455 }
3456
3457 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
3458 if(iter != MT->d_waiters.end()) {
3459 doResends(iter, pident, packet);
3460 }
3461
3462 retryWithName:
3463
3464 if(!MT->sendEvent(pident, &packet)) {
3465 /* we did not find a match for this response, something is wrong */
3466
3467 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
3468 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
3469 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
3470 pident.domain == mthread->key.domain) {
3471 mthread->key.nearMisses++;
3472 }
3473
3474 // be a bit paranoid here since we're weakening our matching
3475 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
3476 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
3477 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
3478 pident.domain = mthread->key.domain;
3479 pident.type = mthread->key.type;
3480 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
3481 }
3482 }
3483 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
3484 if(g_logCommonErrors) {
3485 g_log<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
3486 }
3487 }
3488 else if(fd >= 0) {
3489 /* we either found a waiter (1) or encountered an issue (-1), it's up to us to clean the socket anyway */
3490 t_udpclientsocks->returnSocket(fd);
3491 }
3492 }
3493
3494 FDMultiplexer* getMultiplexer()
3495 {
3496 FDMultiplexer* ret;
3497 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
3498 try {
3499 ret=i.second();
3500 return ret;
3501 }
3502 catch(FDMultiplexerException &fe) {
3503 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
3504 }
3505 catch(...) {
3506 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
3507 }
3508 }
3509 g_log<<Logger::Error<<"No working multiplexer found!"<<endl;
3510 exit(1);
3511 }
3512
3513
3514 static string* doReloadLuaScript()
3515 {
3516 string fname= ::arg()["lua-dns-script"];
3517 try {
3518 if(fname.empty()) {
3519 t_pdl.reset();
3520 g_log<<Logger::Info<<t_id<<" Unloaded current lua script"<<endl;
3521 return new string("unloaded\n");
3522 }
3523 else {
3524 t_pdl = std::make_shared<RecursorLua4>();
3525 t_pdl->loadFile(fname);
3526 }
3527 }
3528 catch(std::exception& e) {
3529 g_log<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
3530 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
3531 }
3532
3533 g_log<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
3534 return new string("(re)loaded '"+fname+"'\n");
3535 }
3536
3537 string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3538 {
3539 if(begin != end)
3540 ::arg().set("lua-dns-script") = *begin;
3541
3542 return broadcastAccFunction<string>(doReloadLuaScript);
3543 }
3544
3545 static string* pleaseUseNewTraceRegex(const std::string& newRegex)
3546 try
3547 {
3548 if(newRegex.empty()) {
3549 t_traceRegex.reset();
3550 return new string("unset\n");
3551 }
3552 else {
3553 t_traceRegex = std::make_shared<Regex>(newRegex);
3554 return new string("ok\n");
3555 }
3556 }
3557 catch(PDNSException& ae)
3558 {
3559 return new string(ae.reason+"\n");
3560 }
3561
3562 string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3563 {
3564 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
3565 }
3566
3567 static void checkLinuxIPv6Limits()
3568 {
3569 #ifdef __linux__
3570 string line;
3571 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
3572 int lim=std::stoi(line);
3573 if(lim < 16384) {
3574 g_log<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
3575 }
3576 }
3577 #endif
3578 }
3579 static void checkOrFixFDS()
3580 {
3581 unsigned int availFDs=getFilenumLimit();
3582 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
3583
3584 if(wantFDs > availFDs) {
3585 unsigned int hardlimit= getFilenumLimit(true);
3586 if(hardlimit >= wantFDs) {
3587 setFilenumLimit(wantFDs);
3588 g_log<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
3589 }
3590 else {
3591 int newval = (hardlimit - 25) / g_numWorkerThreads;
3592 g_log<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
3593 g_maxMThreads = newval;
3594 setFilenumLimit(hardlimit);
3595 }
3596 }
3597 }
3598
3599 static void* recursorThread(unsigned int tid, const string& threadName);
3600
3601 static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
3602 {
3603 t_allowFrom = ng;
3604 return nullptr;
3605 }
3606
3607 int g_argc;
3608 char** g_argv;
3609
3610 void parseACLs()
3611 {
3612 static bool l_initialized;
3613
3614 if(l_initialized) { // only reload configuration file on second call
3615 string configname=::arg()["config-dir"]+"/recursor.conf";
3616 if(::arg()["config-name"]!="") {
3617 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3618 }
3619 cleanSlashes(configname);
3620
3621 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
3622 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
3623 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
3624 ::arg().preParseFile(configname.c_str(), "include-dir");
3625 ::arg().preParse(g_argc, g_argv, "include-dir");
3626
3627 // then process includes
3628 std::vector<std::string> extraConfigs;
3629 ::arg().gatherIncludes(extraConfigs);
3630
3631 for(const std::string& fn : extraConfigs) {
3632 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
3633 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
3634 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
3635 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
3636 }
3637
3638 ::arg().preParse(g_argc, g_argv, "allow-from-file");
3639 ::arg().preParse(g_argc, g_argv, "allow-from");
3640 }
3641
3642 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
3643 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
3644
3645 if(!::arg()["allow-from-file"].empty()) {
3646 string line;
3647 ifstream ifs(::arg()["allow-from-file"].c_str());
3648 if(!ifs) {
3649 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
3650 }
3651
3652 string::size_type pos;
3653 while(getline(ifs,line)) {
3654 pos=line.find('#');
3655 if(pos!=string::npos)
3656 line.resize(pos);
3657 trim(line);
3658 if(line.empty())
3659 continue;
3660
3661 allowFrom->addMask(line);
3662 }
3663 g_log<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
3664 }
3665 else if(!::arg()["allow-from"].empty()) {
3666 vector<string> ips;
3667 stringtok(ips, ::arg()["allow-from"], ", ");
3668
3669 g_log<<Logger::Warning<<"Only allowing queries from: ";
3670 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
3671 allowFrom->addMask(*i);
3672 if(i!=ips.begin())
3673 g_log<<Logger::Warning<<", ";
3674 g_log<<Logger::Warning<<*i;
3675 }
3676 g_log<<Logger::Warning<<endl;
3677 }
3678 else {
3679 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
3680 g_log<<Logger::Warning<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
3681 allowFrom = nullptr;
3682 }
3683
3684 g_initialAllowFrom = allowFrom;
3685 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
3686 oldAllowFrom = nullptr;
3687
3688 l_initialized = true;
3689 }
3690
3691
3692 static void setupDelegationOnly()
3693 {
3694 vector<string> parts;
3695 stringtok(parts, ::arg()["delegation-only"], ", \t");
3696 for(const auto& p : parts) {
3697 SyncRes::addDelegationOnly(DNSName(p));
3698 }
3699 }
3700
3701 static std::map<unsigned int, std::set<int> > parseCPUMap()
3702 {
3703 std::map<unsigned int, std::set<int> > result;
3704
3705 const std::string value = ::arg()["cpu-map"];
3706
3707 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
3708 g_log<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
3709 return result;
3710 }
3711
3712 std::vector<std::string> parts;
3713
3714 stringtok(parts, value, " \t");
3715
3716 for(const auto& part : parts) {
3717 if (part.find('=') == string::npos)
3718 continue;
3719
3720 try {
3721 auto headers = splitField(part, '=');
3722 trim(headers.first);
3723 trim(headers.second);
3724
3725 unsigned int threadId = pdns_stou(headers.first);
3726 std::vector<std::string> cpus;
3727
3728 stringtok(cpus, headers.second, ",");
3729
3730 for(const auto& cpu : cpus) {
3731 int cpuId = std::stoi(cpu);
3732
3733 result[threadId].insert(cpuId);
3734 }
3735 }
3736 catch(const std::exception& e) {
3737 g_log<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
3738 }
3739 }
3740
3741 return result;
3742 }
3743
3744 static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
3745 {
3746 const auto& cpuMapping = cpusMap.find(n);
3747 if (cpuMapping != cpusMap.cend()) {
3748 int rc = mapThreadToCPUList(tid, cpuMapping->second);
3749 if (rc == 0) {
3750 g_log<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
3751 for (const auto cpu : cpuMapping->second) {
3752 g_log<<Logger::Info<<" "<<cpu;
3753 }
3754 g_log<<Logger::Info<<endl;
3755 }
3756 else {
3757 g_log<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
3758 for (const auto cpu : cpuMapping->second) {
3759 g_log<<Logger::Info<<" "<<cpu;
3760 }
3761 g_log<<Logger::Info<<strerror(rc)<<endl;
3762 }
3763 }
3764 }
3765
3766 #ifdef NOD_ENABLED
3767 static void setupNODThread()
3768 {
3769 if (g_nodEnabled) {
3770 uint32_t num_cells = ::arg().asNum("new-domain-db-size");
3771 t_nodDBp = std::make_shared<nod::NODDB>(num_cells);
3772 try {
3773 t_nodDBp->setCacheDir(::arg()["new-domain-history-dir"]);
3774 }
3775 catch (const PDNSException& e) {
3776 g_log<<Logger::Error<<"new-domain-history-dir (" << ::arg()["new-domain-history-dir"] << ") is not readable or does not exist"<<endl;
3777 _exit(1);
3778 }
3779 if (!t_nodDBp->init()) {
3780 g_log<<Logger::Error<<"Could not initialize domain tracking"<<endl;
3781 _exit(1);
3782 }
3783 std::thread t(nod::NODDB::startHousekeepingThread, t_nodDBp, std::this_thread::get_id());
3784 t.detach();
3785 g_nod_pbtag = ::arg()["new-domain-pb-tag"];
3786 }
3787 if (g_udrEnabled) {
3788 uint32_t num_cells = ::arg().asNum("unique-response-db-size");
3789 t_udrDBp = std::make_shared<nod::UniqueResponseDB>(num_cells);
3790 try {
3791 t_udrDBp->setCacheDir(::arg()["unique-response-history-dir"]);
3792 }
3793 catch (const PDNSException& e) {
3794 g_log<<Logger::Error<<"unique-response-history-dir (" << ::arg()["unique-response-history-dir"] << ") is not readable or does not exist"<<endl;
3795 _exit(1);
3796 }
3797 if (!t_udrDBp->init()) {
3798 g_log<<Logger::Error<<"Could not initialize unique response tracking"<<endl;
3799 _exit(1);
3800 }
3801 std::thread t(nod::UniqueResponseDB::startHousekeepingThread, t_udrDBp, std::this_thread::get_id());
3802 t.detach();
3803 g_udr_pbtag = ::arg()["unique-response-pb-tag"];
3804 }
3805 }
3806
3807 void parseNODWhitelist(const std::string& wlist)
3808 {
3809 vector<string> parts;
3810 stringtok(parts, wlist, ",; ");
3811 for(const auto& a : parts) {
3812 g_nodDomainWL.add(DNSName(a));
3813 }
3814 }
3815
3816 static void setupNODGlobal()
3817 {
3818 // Setup NOD subsystem
3819 g_nodEnabled = ::arg().mustDo("new-domain-tracking");
3820 g_nodLookupDomain = DNSName(::arg()["new-domain-lookup"]);
3821 g_nodLog = ::arg().mustDo("new-domain-log");
3822 parseNODWhitelist(::arg()["new-domain-whitelist"]);
3823
3824 // Setup Unique DNS Response subsystem
3825 g_udrEnabled = ::arg().mustDo("unique-response-tracking");
3826 g_udrLog = ::arg().mustDo("unique-response-log");
3827 }
3828 #endif /* NOD_ENABLED */
3829
3830 static void checkSocketDir(void)
3831 {
3832 struct stat st;
3833 string dir(::arg()["socket-dir"]);
3834 string msg;
3835
3836 if (stat(dir.c_str(), &st) == -1) {
3837 msg = "it does not exist or cannot access";
3838 }
3839 else if (!S_ISDIR(st.st_mode)) {
3840 msg = "it is not a directory";
3841 }
3842 else if (access(dir.c_str(), R_OK | W_OK | X_OK) != 0) {
3843 msg = "cannot read, write or search";
3844 } else {
3845 return;
3846 }
3847 g_log << Logger::Error << "Problem with socket directory " << dir << ": " << msg << "; see https://docs.powerdns.com/recursor/upgrade.html#x-to-4-3-0-or-master" << endl;
3848 _exit(1);
3849 }
3850
3851 static int serviceMain(int argc, char*argv[])
3852 {
3853 g_log.setName(s_programname);
3854 g_log.disableSyslog(::arg().mustDo("disable-syslog"));
3855 g_log.setTimestamps(::arg().mustDo("log-timestamp"));
3856
3857 if(!::arg()["logging-facility"].empty()) {
3858 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
3859 if(val >= 0)
3860 g_log.setFacility(val);
3861 else
3862 g_log<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
3863 }
3864
3865 showProductVersion();
3866
3867 g_disthashseed=dns_random(0xffffffff);
3868
3869 checkLinuxIPv6Limits();
3870 try {
3871 vector<string> addrs;
3872 if(!::arg()["query-local-address6"].empty()) {
3873 SyncRes::s_doIPv6=true;
3874 g_log<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
3875
3876 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
3877 for(const string& addr : addrs) {
3878 g_localQueryAddresses6.push_back(ComboAddress(addr));
3879 }
3880 }
3881 else {
3882 g_log<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
3883 }
3884 addrs.clear();
3885 stringtok(addrs, ::arg()["query-local-address"], ", ;");
3886 for(const string& addr : addrs) {
3887 g_localQueryAddresses4.push_back(ComboAddress(addr));
3888 }
3889 }
3890 catch(std::exception& e) {
3891 g_log<<Logger::Error<<"Assigning local query addresses: "<<e.what();
3892 exit(99);
3893 }
3894
3895 // keep this ABOVE loadRecursorLuaConfig!
3896 if(::arg()["dnssec"]=="off")
3897 g_dnssecmode=DNSSECMode::Off;
3898 else if(::arg()["dnssec"]=="process-no-validate")
3899 g_dnssecmode=DNSSECMode::ProcessNoValidate;
3900 else if(::arg()["dnssec"]=="process")
3901 g_dnssecmode=DNSSECMode::Process;
3902 else if(::arg()["dnssec"]=="validate")
3903 g_dnssecmode=DNSSECMode::ValidateAll;
3904 else if(::arg()["dnssec"]=="log-fail")
3905 g_dnssecmode=DNSSECMode::ValidateForLog;
3906 else {
3907 g_log<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
3908 exit(1);
3909 }
3910
3911 g_signatureInceptionSkew = ::arg().asNum("signature-inception-skew");
3912 if (g_signatureInceptionSkew < 0) {
3913 g_log<<Logger::Error<<"A negative value for 'signature-inception-skew' is not allowed"<<endl;
3914 exit(1);
3915 }
3916
3917 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
3918 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
3919
3920 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
3921 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
3922
3923 luaConfigDelayedThreads delayedLuaThreads;
3924 try {
3925 loadRecursorLuaConfig(::arg()["lua-config-file"], delayedLuaThreads);
3926 }
3927 catch (PDNSException &e) {
3928 g_log<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
3929 exit(1);
3930 }
3931
3932 parseACLs();
3933 initPublicSuffixList(::arg()["public-suffix-list-file"]);
3934
3935 if(!::arg()["dont-query"].empty()) {
3936 vector<string> ips;
3937 stringtok(ips, ::arg()["dont-query"], ", ");
3938 ips.push_back("0.0.0.0");
3939 ips.push_back("::");
3940
3941 g_log<<Logger::Warning<<"Will not send queries to: ";
3942 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
3943 SyncRes::addDontQuery(*i);
3944 if(i!=ips.begin())
3945 g_log<<Logger::Warning<<", ";
3946 g_log<<Logger::Warning<<*i;
3947 }
3948 g_log<<Logger::Warning<<endl;
3949 }
3950
3951 g_quiet=::arg().mustDo("quiet");
3952
3953 /* this needs to be done before parseACLs(), which call broadcastFunction() */
3954 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
3955 if(g_weDistributeQueries) {
3956 g_log<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
3957 }
3958
3959 setupDelegationOnly();
3960 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
3961
3962 if(::arg()["trace"]=="fail") {
3963 SyncRes::setDefaultLogMode(SyncRes::Store);
3964 }
3965 else if(::arg().mustDo("trace")) {
3966 SyncRes::setDefaultLogMode(SyncRes::Log);
3967 ::arg().set("quiet")="no";
3968 g_quiet=false;
3969 g_dnssecLOG=true;
3970 }
3971 string myHostname = getHostname();
3972 if (myHostname == "UNKNOWN"){
3973 g_log<<Logger::Warning<<"Unable to get the hostname, NSID and id.server values will be empty"<<endl;
3974 myHostname = "";
3975 }
3976
3977 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
3978 SyncRes::s_minimumECSTTL = ::arg().asNum("ecs-minimum-ttl-override");
3979
3980 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
3981
3982 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
3983 SyncRes::s_maxbogusttl=::arg().asNum("max-cache-bogus-ttl");
3984 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
3985 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
3986 // Cap the packetcache-servfail-ttl to the packetcache-ttl
3987 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
3988 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
3989 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
3990 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
3991 SyncRes::s_serverID=::arg()["server-id"];
3992 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
3993 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
3994 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
3995 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
3996 if(SyncRes::s_serverID.empty()) {
3997 SyncRes::s_serverID = myHostname;
3998 }
3999
4000 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
4001 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
4002 SyncRes::clearECSStats();
4003 SyncRes::s_ecsipv4cachelimit = ::arg().asNum("ecs-ipv4-cache-bits");
4004 SyncRes::s_ecsipv6cachelimit = ::arg().asNum("ecs-ipv6-cache-bits");
4005 SyncRes::s_ecscachelimitttl = ::arg().asNum("ecs-cache-limit-ttl");
4006
4007 SyncRes::s_qnameminimization = ::arg().mustDo("qname-minimization");
4008
4009 if (SyncRes::s_qnameminimization) {
4010 // With an empty cache, a rev ipv6 query with dnssec enabled takes
4011 // almost 100 queries. Default maxqperq is 60.
4012 SyncRes::s_maxqperq = std::max(SyncRes::s_maxqperq, static_cast<unsigned int>(100));
4013 }
4014
4015 SyncRes::s_hardenNXD = SyncRes::HardenNXD::DNSSEC;
4016 string value = ::arg()["nothing-below-nxdomain"];
4017 if (value == "yes") {
4018 SyncRes::s_hardenNXD = SyncRes::HardenNXD::Yes;
4019 } else if (value == "no") {
4020 SyncRes::s_hardenNXD = SyncRes::HardenNXD::No;
4021 } else if (value != "dnssec") {
4022 g_log << Logger::Error << "Unknown nothing-below-nxdomain mode: " << value << endl;
4023 exit(1);
4024 }
4025
4026 if (!::arg().isEmpty("ecs-scope-zero-address")) {
4027 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
4028 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
4029 }
4030 else {
4031 bool found = false;
4032 for (const auto& addr : g_localQueryAddresses4) {
4033 if (!IsAnyAddress(addr)) {
4034 SyncRes::setECSScopeZeroAddress(Netmask(addr, 32));
4035 found = true;
4036 break;
4037 }
4038 }
4039 if (!found) {
4040 for (const auto& addr : g_localQueryAddresses6) {
4041 if (!IsAnyAddress(addr)) {
4042 SyncRes::setECSScopeZeroAddress(Netmask(addr, 128));
4043 found = true;
4044 break;
4045 }
4046 }
4047 if (!found) {
4048 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
4049 }
4050 }
4051 }
4052
4053 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
4054 SyncRes::parseEDNSSubnetAddFor(::arg()["ecs-add-for"]);
4055 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
4056
4057 g_XPFAcl.toMasks(::arg()["xpf-allow-from"]);
4058 g_xpfRRCode = ::arg().asNum("xpf-rr-code");
4059
4060 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
4061
4062 g_initialDomainMap = parseAuthAndForwards();
4063
4064 g_latencyStatSize=::arg().asNum("latency-statistic-size");
4065
4066 g_logCommonErrors=::arg().mustDo("log-common-errors");
4067 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
4068
4069 g_anyToTcp = ::arg().mustDo("any-to-tcp");
4070 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
4071
4072 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
4073
4074 g_numDistributorThreads = ::arg().asNum("distributor-threads");
4075 g_numWorkerThreads = ::arg().asNum("threads");
4076 if (g_numWorkerThreads < 1) {
4077 g_log<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
4078 g_numWorkerThreads = 1;
4079 }
4080
4081 g_numThreads = g_numDistributorThreads + g_numWorkerThreads;
4082 g_maxMThreads = ::arg().asNum("max-mthreads");
4083
4084
4085 int64_t maxInFlight = ::arg().asNum("max-concurrent-requests-per-tcp-connection");
4086 if (maxInFlight < 1 || maxInFlight > USHRT_MAX || maxInFlight >= g_maxMThreads) {
4087 g_log<<Logger::Warning<<"Asked to run with illegal max-concurrent-requests-per-tcp-connection, setting to default (10)"<<endl;
4088 TCPConnection::s_maxInFlight = 10;
4089 } else {
4090 TCPConnection::s_maxInFlight = maxInFlight;
4091 }
4092
4093
4094 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
4095
4096 g_statisticsInterval = ::arg().asNum("statistics-interval");
4097
4098 {
4099 SuffixMatchNode dontThrottleNames;
4100 vector<string> parts;
4101 stringtok(parts, ::arg()["dont-throttle-names"], " ,");
4102 for (const auto &p : parts) {
4103 dontThrottleNames.add(DNSName(p));
4104 }
4105 g_dontThrottleNames.setState(std::move(dontThrottleNames));
4106
4107 NetmaskGroup dontThrottleNetmasks;
4108 stringtok(parts, ::arg()["dont-throttle-netmasks"], " ,");
4109 for (const auto &p : parts) {
4110 dontThrottleNetmasks.addMask(Netmask(p));
4111 }
4112 g_dontThrottleNetmasks.setState(std::move(dontThrottleNetmasks));
4113 }
4114
4115 s_balancingFactor = ::arg().asDouble("distribution-load-factor");
4116 if (s_balancingFactor != 0.0 && s_balancingFactor < 1.0) {
4117 s_balancingFactor = 0.0;
4118 g_log<<Logger::Warning<<"Asked to run with a distribution-load-factor below 1.0, disabling it instead"<<endl;
4119 }
4120
4121 #ifdef SO_REUSEPORT
4122 g_reusePort = ::arg().mustDo("reuseport");
4123 #endif
4124
4125 s_threadInfos.resize(g_numDistributorThreads + g_numWorkerThreads + /* handler */ 1);
4126
4127 if (g_reusePort) {
4128 if (g_weDistributeQueries) {
4129 /* first thread is the handler, then distributors */
4130 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
4131 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
4132 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
4133 makeUDPServerSockets(deferredAdds);
4134 makeTCPServerSockets(deferredAdds, tcpSockets);
4135 }
4136 }
4137 else {
4138 /* first thread is the handler, there is no distributor here and workers are accepting queries */
4139 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
4140 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
4141 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
4142 makeUDPServerSockets(deferredAdds);
4143 makeTCPServerSockets(deferredAdds, tcpSockets);
4144 }
4145 }
4146 }
4147 else {
4148 std::set<int> tcpSockets;
4149 /* we don't have reuseport so we can only open one socket per
4150 listening addr:port and everyone will listen on it */
4151 makeUDPServerSockets(g_deferredAdds);
4152 makeTCPServerSockets(g_deferredAdds, tcpSockets);
4153
4154 /* every listener (so distributor if g_weDistributeQueries, workers otherwise)
4155 needs to listen to the shared sockets */
4156 if (g_weDistributeQueries) {
4157 /* first thread is the handler, then distributors */
4158 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
4159 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
4160 }
4161 }
4162 else {
4163 /* first thread is the handler, there is no distributor here and workers are accepting queries */
4164 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
4165 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
4166 }
4167 }
4168 }
4169
4170 #ifdef NOD_ENABLED
4171 // Setup newly observed domain globals
4172 setupNODGlobal();
4173 #endif /* NOD_ENABLED */
4174
4175 int forks;
4176 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
4177 if(!fork()) // we are child
4178 break;
4179 }
4180
4181 if(::arg().mustDo("daemon")) {
4182 g_log<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
4183 g_log.toConsole(Logger::Critical);
4184 daemonize();
4185 }
4186 if(Utility::getpid() == 1) {
4187 /* We are running as pid 1, register sigterm and sigint handler
4188
4189 The Linux kernel will handle SIGTERM and SIGINT for all processes, except PID 1.
4190 It assumes that the processes running as pid 1 is an "init" like system.
4191 For years, this was a safe assumption, but containers change that: in
4192 most (all?) container implementations, the application itself is running
4193 as pid 1. This means that sending signals to those applications, will not
4194 be handled by default. Results might be "your container not responsing
4195 when asking it to stop", or "ctrl-c not working even when the app is
4196 running in the foreground inside a container".
4197
4198 So TL;DR: If we're running pid 1 (container), we should handle SIGTERM and SIGINT ourselves */
4199
4200 signal(SIGTERM,termIntHandler);
4201 signal(SIGINT,termIntHandler);
4202 }
4203
4204 signal(SIGUSR1,usr1Handler);
4205 signal(SIGUSR2,usr2Handler);
4206 signal(SIGPIPE,SIG_IGN);
4207
4208 checkOrFixFDS();
4209
4210 #ifdef HAVE_LIBSODIUM
4211 if (sodium_init() == -1) {
4212 g_log<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
4213 exit(99);
4214 }
4215 #endif
4216
4217 openssl_thread_setup();
4218 openssl_seed();
4219 /* setup rng before chroot */
4220 dns_random_init();
4221
4222 if(::arg()["server-id"].empty()) {
4223 ::arg().set("server-id") = myHostname;
4224 }
4225
4226 int newgid=0;
4227 if(!::arg()["setgid"].empty())
4228 newgid = strToGID(::arg()["setgid"]);
4229 int newuid=0;
4230 if(!::arg()["setuid"].empty())
4231 newuid = strToUID(::arg()["setuid"]);
4232
4233 Utility::dropGroupPrivs(newuid, newgid);
4234
4235 if (!::arg()["chroot"].empty()) {
4236 #ifdef HAVE_SYSTEMD
4237 char *ns;
4238 ns = getenv("NOTIFY_SOCKET");
4239 if (ns != nullptr) {
4240 g_log<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
4241 exit(1);
4242 }
4243 #endif
4244 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
4245 int err = errno;
4246 g_log<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (err)<<", exiting"<<endl;
4247 exit(1);
4248 }
4249 else
4250 g_log<<Logger::Info<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
4251 }
4252
4253 checkSocketDir();
4254
4255 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
4256 if(!s_pidfname.empty())
4257 unlink(s_pidfname.c_str()); // remove possible old pid file
4258 writePid();
4259
4260 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
4261
4262 Utility::dropUserPrivs(newuid);
4263 try {
4264 /* we might still have capabilities remaining, for example if we have been started as root
4265 without --setuid (please don't do that) or as an unprivileged user with ambient capabilities
4266 like CAP_NET_BIND_SERVICE.
4267 */
4268 dropCapabilities();
4269 }
4270 catch(const std::exception& e) {
4271 g_log<<Logger::Warning<<e.what()<<endl;
4272 }
4273
4274 startLuaConfigDelayedThreads(delayedLuaThreads, g_luaconfs.getCopy().generation);
4275
4276 makeThreadPipes();
4277
4278 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
4279 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
4280 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
4281 s_maxUDPQueriesPerRound=::arg().asNum("max-udp-queries-per-round");
4282
4283 g_useKernelTimestamp = ::arg().mustDo("protobuf-use-kernel-timestamp");
4284
4285 blacklistStats(StatComponent::API, ::arg()["stats-api-blacklist"]);
4286 blacklistStats(StatComponent::Carbon, ::arg()["stats-carbon-blacklist"]);
4287 blacklistStats(StatComponent::RecControl, ::arg()["stats-rec-control-blacklist"]);
4288 blacklistStats(StatComponent::SNMP, ::arg()["stats-snmp-blacklist"]);
4289
4290 if (::arg().mustDo("snmp-agent")) {
4291 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
4292 g_snmpAgent->run();
4293 }
4294
4295 int port = ::arg().asNum("udp-source-port-min");
4296 if(port < 1024 || port > 65535){
4297 g_log<<Logger::Error<<"Unable to launch, udp-source-port-min is not a valid port number"<<endl;
4298 exit(99); // this isn't going to fix itself either
4299 }
4300 s_minUdpSourcePort = port;
4301 port = ::arg().asNum("udp-source-port-max");
4302 if(port < 1024 || port > 65535 || port < s_minUdpSourcePort){
4303 g_log<<Logger::Error<<"Unable to launch, udp-source-port-max is not a valid port number or is smaller than udp-source-port-min"<<endl;
4304 exit(99); // this isn't going to fix itself either
4305 }
4306 s_maxUdpSourcePort = port;
4307 std::vector<string> parts {};
4308 stringtok(parts, ::arg()["udp-source-port-avoid"], ", ");
4309 for (const auto &part : parts)
4310 {
4311 port = std::stoi(part);
4312 if(port < 1024 || port > 65535){
4313 g_log<<Logger::Error<<"Unable to launch, udp-source-port-avoid contains an invalid port number: "<<part<<endl;
4314 exit(99); // this isn't going to fix itself either
4315 }
4316 s_avoidUdpSourcePorts.insert(port);
4317 }
4318
4319 unsigned int currentThreadId = 1;
4320 const auto cpusMap = parseCPUMap();
4321
4322 if(g_numThreads == 1) {
4323 g_log<<Logger::Warning<<"Operating unthreaded"<<endl;
4324 #ifdef HAVE_SYSTEMD
4325 sd_notify(0, "READY=1");
4326 #endif
4327
4328 /* This thread handles the web server, carbon, statistics and the control channel */
4329 auto& handlerInfos = s_threadInfos.at(0);
4330 handlerInfos.isHandler = true;
4331 handlerInfos.thread = std::thread(recursorThread, 0, "main");
4332
4333 setCPUMap(cpusMap, currentThreadId, pthread_self());
4334
4335 auto& infos = s_threadInfos.at(currentThreadId);
4336 infos.isListener = true;
4337 infos.isWorker = true;
4338 recursorThread(currentThreadId++, "worker");
4339
4340 handlerInfos.thread.join();
4341 }
4342 else {
4343
4344
4345 if (g_weDistributeQueries) {
4346 for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
4347 auto& infos = s_threadInfos.at(currentThreadId + n);
4348 infos.isListener = true;
4349 }
4350 }
4351 for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
4352 auto& infos = s_threadInfos.at(currentThreadId + (g_weDistributeQueries ? g_numDistributorThreads : 0) + n);
4353 infos.isListener = !g_weDistributeQueries;
4354 infos.isWorker = true;
4355 }
4356
4357 if (g_weDistributeQueries) {
4358 g_log<<Logger::Warning<<"Launching "<< g_numDistributorThreads <<" distributor threads"<<endl;
4359 for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
4360 auto& infos = s_threadInfos.at(currentThreadId);
4361 infos.thread = std::thread(recursorThread, currentThreadId++, "distr");
4362 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
4363 }
4364 }
4365
4366 g_log<<Logger::Warning<<"Launching "<< g_numWorkerThreads <<" worker threads"<<endl;
4367
4368 for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
4369 auto& infos = s_threadInfos.at(currentThreadId);
4370 infos.thread = std::thread(recursorThread, currentThreadId++, "worker");
4371 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
4372 }
4373
4374 #ifdef HAVE_SYSTEMD
4375 sd_notify(0, "READY=1");
4376 #endif
4377
4378 /* This thread handles the web server, carbon, statistics and the control channel */
4379 auto& infos = s_threadInfos.at(0);
4380 infos.isHandler = true;
4381 infos.thread = std::thread(recursorThread, 0, "web+stat");
4382
4383 for (auto & ti : s_threadInfos) {
4384 ti.thread.join();
4385 }
4386 }
4387
4388 #ifdef HAVE_PROTOBUF
4389 google::protobuf::ShutdownProtobufLibrary();
4390 #endif /* HAVE_PROTOBUF */
4391 return 0;
4392 }
4393
4394 static void* recursorThread(unsigned int n, const string& threadName)
4395 try
4396 {
4397 t_id=n;
4398 auto& threadInfo = s_threadInfos.at(t_id);
4399
4400 static string threadPrefix = "pdns-r/";
4401 setThreadName(threadPrefix + threadName);
4402
4403 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
4404 SyncRes::setDomainMap(g_initialDomainMap);
4405 t_allowFrom = g_initialAllowFrom;
4406 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
4407 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
4408 primeHints();
4409
4410 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
4411
4412 g_log<<Logger::Warning<<"Done priming cache with root hints"<<endl;
4413
4414 #ifdef NOD_ENABLED
4415 if (threadInfo.isWorker)
4416 setupNODThread();
4417 #endif /* NOD_ENABLED */
4418
4419 /* the listener threads handle TCP queries */
4420 if(threadInfo.isWorker || threadInfo.isListener) {
4421 try {
4422 if(!::arg()["lua-dns-script"].empty()) {
4423 t_pdl = std::make_shared<RecursorLua4>();
4424 t_pdl->loadFile(::arg()["lua-dns-script"]);
4425 g_log<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
4426 }
4427 }
4428 catch(std::exception &e) {
4429 g_log<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
4430 _exit(99);
4431 }
4432 }
4433
4434 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
4435 if(ringsize) {
4436 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4437 if(g_weDistributeQueries)
4438 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries") / g_numDistributorThreads);
4439 else
4440 t_remotes->set_capacity(ringsize);
4441 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4442 t_servfailremotes->set_capacity(ringsize);
4443 t_bogusremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4444 t_bogusremotes->set_capacity(ringsize);
4445 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4446 t_largeanswerremotes->set_capacity(ringsize);
4447 t_timeouts = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4448 t_timeouts->set_capacity(ringsize);
4449
4450 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4451 t_queryring->set_capacity(ringsize);
4452 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4453 t_servfailqueryring->set_capacity(ringsize);
4454 t_bogusqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4455 t_bogusqueryring->set_capacity(ringsize);
4456 }
4457
4458 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
4459 threadInfo.mt = MT.get();
4460
4461 #ifdef HAVE_PROTOBUF
4462 /* start protobuf export threads if needed */
4463 auto luaconfsLocal = g_luaconfs.getLocal();
4464 checkProtobufExport(luaconfsLocal);
4465 checkOutgoingProtobufExport(luaconfsLocal);
4466 #endif /* HAVE_PROTOBUF */
4467 #ifdef HAVE_FSTRM
4468 checkFrameStreamExport(luaconfsLocal);
4469 #endif
4470
4471 PacketID pident;
4472
4473 t_fdm=getMultiplexer();
4474
4475 RecursorWebServer *rws = nullptr;
4476
4477 if(threadInfo.isHandler) {
4478 if(::arg().mustDo("webserver")) {
4479 g_log<<Logger::Warning << "Enabling web server" << endl;
4480 try {
4481 rws = new RecursorWebServer(t_fdm);
4482 }
4483 catch(PDNSException &e) {
4484 g_log<<Logger::Error<<"Exception: "<<e.reason<<endl;
4485 exit(99);
4486 }
4487 }
4488 g_log<<Logger::Info<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
4489 }
4490 else {
4491
4492 t_fdm->addReadFD(threadInfo.pipes.readToThread, handlePipeRequest);
4493 t_fdm->addReadFD(threadInfo.pipes.readQueriesToThread, handlePipeRequest);
4494
4495 if (threadInfo.isListener) {
4496 if (g_reusePort) {
4497 /* then every listener has its own FDs */
4498 for(const auto deferred : threadInfo.deferredAdds) {
4499 t_fdm->addReadFD(deferred.first, deferred.second);
4500 }
4501 }
4502 else {
4503 /* otherwise all listeners are listening on the same ones */
4504 for(const auto deferred : g_deferredAdds) {
4505 t_fdm->addReadFD(deferred.first, deferred.second);
4506 }
4507 }
4508 }
4509 }
4510
4511 registerAllStats();
4512
4513 if(threadInfo.isHandler) {
4514 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
4515 }
4516
4517 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
4518
4519 bool listenOnTCP(true);
4520
4521 time_t last_stat = 0;
4522 time_t last_carbon=0, last_lua_maintenance=0;
4523 time_t carbonInterval=::arg().asNum("carbon-interval");
4524 time_t luaMaintenanceInterval=::arg().asNum("lua-maintenance-interval");
4525 counter.store(0); // used to periodically execute certain tasks
4526
4527 while (!RecursorControlChannel::stop) {
4528 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
4529
4530 if(!(counter%500)) {
4531 MT->makeThread(houseKeeping, 0);
4532 }
4533
4534 if(!(counter%55)) {
4535 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
4536 expired_t expired=t_fdm->getTimeouts(g_now);
4537
4538 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
4539 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4540 if(g_logCommonErrors)
4541 g_log<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4542 t_fdm->removeReadFD(i->first);
4543 }
4544 }
4545
4546 counter++;
4547
4548 if(threadInfo.isHandler) {
4549 if(statsWanted || (g_statisticsInterval > 0 && (g_now.tv_sec - last_stat) >= g_statisticsInterval)) {
4550 doStats();
4551 last_stat = g_now.tv_sec;
4552 }
4553
4554 Utility::gettimeofday(&g_now, 0);
4555
4556 if((g_now.tv_sec - last_carbon) >= carbonInterval) {
4557 MT->makeThread(doCarbonDump, 0);
4558 last_carbon = g_now.tv_sec;
4559 }
4560 }
4561 if (t_pdl != nullptr) {
4562 // lua-dns-script directive is present, call the maintenance callback if needed
4563 /* remember that the listener threads handle TCP queries */
4564 if (threadInfo.isWorker || threadInfo.isListener) {
4565 // Only on threads processing queries
4566 if(g_now.tv_sec - last_lua_maintenance >= luaMaintenanceInterval) {
4567 t_pdl->maintenance();
4568 last_lua_maintenance = g_now.tv_sec;
4569 }
4570 }
4571 }
4572
4573 t_fdm->run(&g_now);
4574 // 'run' updates g_now for us
4575
4576 if(threadInfo.isListener) {
4577 if(listenOnTCP) {
4578 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
4579 for(const auto fd : threadInfo.tcpSockets) {
4580 t_fdm->removeReadFD(fd);
4581 }
4582 listenOnTCP=false;
4583 }
4584 }
4585 else {
4586 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
4587 for(const auto fd : threadInfo.tcpSockets) {
4588 t_fdm->addReadFD(fd, handleNewTCPQuestion);
4589 }
4590 listenOnTCP=true;
4591 }
4592 }
4593 }
4594 }
4595 delete rws;
4596 delete t_fdm;
4597 return 0;
4598 }
4599 catch(PDNSException &ae) {
4600 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
4601 return 0;
4602 }
4603 catch(std::exception &e) {
4604 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
4605 return 0;
4606 }
4607 catch(...) {
4608 g_log<<Logger::Error<<"any other exception in main: "<<endl;
4609 return 0;
4610 }
4611
4612
4613 int main(int argc, char **argv)
4614 {
4615 g_argc = argc;
4616 g_argv = argv;
4617 g_stats.startupTime=time(0);
4618 Utility::srandom();
4619 versionSetProduct(ProductRecursor);
4620 reportBasicTypes();
4621 reportOtherTypes();
4622
4623 int ret = EXIT_SUCCESS;
4624
4625 try {
4626 ::arg().set("stack-size","stack size per mthread")="200000";
4627 ::arg().set("soa-minimum-ttl","Don't change")="0";
4628 ::arg().set("no-shuffle","Don't change")="off";
4629 ::arg().set("local-port","port to listen on")="53";
4630 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
4631 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
4632 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
4633 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
4634 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
4635 ::arg().set("signature-inception-skew", "Allow the signature inception to be off by this number of seconds")="60";
4636 ::arg().set("daemon","Operate as a daemon")="no";
4637 ::arg().setSwitch("write-pid","Write a PID file")="yes";
4638 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
4639 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
4640 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
4641 ::arg().set("log-common-errors","If we should log rather common errors")="no";
4642 ::arg().set("chroot","switch to chroot jail")="";
4643 ::arg().set("setgid","If set, change group id to this gid for more security"
4644 #ifdef HAVE_SYSTEMD
4645 #define SYSTEMD_SETID_MSG ". When running inside systemd, use the User and Group settings in the unit-file!"
4646 SYSTEMD_SETID_MSG
4647 #endif
4648 )="";
4649 ::arg().set("setuid","If set, change user id to this uid for more security"
4650 #ifdef HAVE_SYSTEMD
4651 SYSTEMD_SETID_MSG
4652 #endif
4653 )="";
4654 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
4655 ::arg().set("threads", "Launch this number of threads")="2";
4656 ::arg().set("distributor-threads", "Launch this number of distributor threads, distributing queries to other threads")="0";
4657 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
4658 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
4659 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
4660 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
4661 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
4662 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
4663 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
4664 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
4665 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
4666 ::arg().set("webserver-loglevel", "Amount of logging in the webserver (none, normal, detailed)") = "normal";
4667 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
4668 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
4669 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
4670 ::arg().set("carbon-namespace", "If set overwrites the first part of the carbon string")="pdns";
4671 ::arg().set("carbon-instance", "If set overwrites the the instance name default")="recursor";
4672
4673 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
4674 ::arg().set("quiet","Suppress logging of questions and answers")="";
4675 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
4676 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
4677 ::arg().set("socket-owner","Owner of socket")="";
4678 ::arg().set("socket-group","Group of socket")="";
4679 ::arg().set("socket-mode", "Permissions for socket")="";
4680
4681 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+"/pdns-recursor when unset and not chrooted" )="";
4682 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
4683 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
4684 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
4685 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
4686 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
4687 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
4688 ::arg().set("max-concurrent-requests-per-tcp-connection", "Maximum number of requests handled concurrently per TCP connection") = "10";
4689 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
4690 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
4691 ::arg().set("dont-throttle-names", "Do not throttle nameservers with this name or suffix")="";
4692 ::arg().set("dont-throttle-netmasks", "Do not throttle nameservers with this IP netmask")="";
4693 ::arg().set("hint-file", "If set, load root hints from this file")="";
4694 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
4695 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
4696 ::arg().set("max-cache-bogus-ttl", "maximum number of seconds to keep a Bogus (positive or negative) cached entry in memory")="3600";
4697 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
4698 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
4699 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
4700 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
4701 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname, set custom or 'disabled'")="";
4702 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
4703 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
4704 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
4705 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
4706 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
4707 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4708 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
4709 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
4710 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4711 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
4712 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
4713 ::arg().set("lua-config-file", "More powerful configuration options")="";
4714 ::arg().setSwitch("allow-trust-anchor-query", "Allow queries for trustanchor.server CH TXT and negativetrustanchor.server CH TXT")="no";
4715
4716 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
4717 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
4718 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
4719 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
4720 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
4721 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
4722 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
4723 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
4724 ::arg().set("lua-maintenance-interval", "Number of seconds between calls to the lua user defined maintenance() function")="1";
4725 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
4726 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
4727 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
4728 ::arg().set("ecs-ipv4-cache-bits", "Maximum number of bits of IPv4 mask to cache ECS response")="24";
4729 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
4730 ::arg().set("ecs-ipv6-cache-bits", "Maximum number of bits of IPv6 mask to cache ECS response")="56";
4731 ::arg().set("ecs-minimum-ttl-override", "Set under adverse conditions, a minimum TTL for records in ECS-specific answers")="0";
4732 ::arg().set("ecs-cache-limit-ttl", "Minimum TTL to cache ECS response")="0";
4733 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
4734 ::arg().set("ecs-add-for", "List of client netmasks for which EDNS Client Subnet will be added")="0.0.0.0/0, ::/0, " LOCAL_NETS_INVERSE;
4735 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
4736 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
4737 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
4738 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
4739 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
4740 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
4741 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
4742 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1232";
4743 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1232";
4744 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
4745 ::arg().set("max-qperq", "Maximum outgoing queries per query")="60";
4746 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
4747 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
4748 ::arg().set("max-udp-queries-per-round", "Maximum number of UDP queries processed per recvmsg() round, before returning back to normal processing")="10000";
4749 ::arg().set("protobuf-use-kernel-timestamp", "Compute the latency of queries in protobuf messages by using the timestamp set by the kernel when the query was received (when available)")="";
4750 ::arg().set("distribution-pipe-buffer-size", "Size in bytes of the internal buffer of the pipe used by the distributor to pass incoming queries to a worker thread")="0";
4751
4752 ::arg().set("include-dir","Include *.conf files from this directory")="";
4753 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
4754
4755 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
4756
4757 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
4758 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
4759
4760 std::string defaultBlacklistedStats = "cache-bytes, packetcache-bytes, special-memory-usage";
4761 for (size_t idx = 0; idx < 32; idx++) {
4762 defaultBlacklistedStats += ", ecs-v4-response-bits-" + std::to_string(idx + 1);
4763 }
4764 for (size_t idx = 0; idx < 128; idx++) {
4765 defaultBlacklistedStats += ", ecs-v6-response-bits-" + std::to_string(idx + 1);
4766 }
4767 ::arg().set("stats-api-blacklist", "List of statistics that are disabled when retrieving the complete list of statistics via the API")=defaultBlacklistedStats;
4768 ::arg().set("stats-carbon-blacklist", "List of statistics that are prevented from being exported via Carbon")=defaultBlacklistedStats;
4769 ::arg().set("stats-rec-control-blacklist", "List of statistics that are prevented from being exported via rec_control get-all")=defaultBlacklistedStats;
4770 ::arg().set("stats-snmp-blacklist", "List of statistics that are prevented from being exported via SNMP")=defaultBlacklistedStats;
4771
4772 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
4773 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
4774
4775 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
4776
4777 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
4778
4779 ::arg().set("xpf-allow-from","XPF information is only processed from these subnets")="";
4780 ::arg().set("xpf-rr-code","XPF option code to use")="0";
4781
4782 ::arg().set("udp-source-port-min", "Minimum UDP port to bind on")="1024";
4783 ::arg().set("udp-source-port-max", "Maximum UDP port to bind on")="65535";
4784 ::arg().set("udp-source-port-avoid", "List of comma separated UDP port number to avoid")="11211";
4785 ::arg().set("rng", "Specify random number generator to use. Valid values are auto,sodium,openssl,getrandom,arc4random,urandom.")="auto";
4786 ::arg().set("public-suffix-list-file", "Path to the Public Suffix List file, if any")="";
4787 ::arg().set("distribution-load-factor", "The load factor used when PowerDNS is distributing queries to worker threads")="0.0";
4788
4789 ::arg().setSwitch("qname-minimization", "Use Query Name Minimization")="yes";
4790 ::arg().setSwitch("nothing-below-nxdomain", "When an NXDOMAIN exists in cache for a name with fewer labels than the qname, send NXDOMAIN without doing a lookup (see RFC 8020)")="dnssec";
4791 ::arg().set("max-generate-steps", "Maximum number of $GENERATE steps when loading a zone from a file")="0";
4792
4793 #ifdef NOD_ENABLED
4794 ::arg().set("new-domain-tracking", "Track newly observed domains (i.e. never seen before).")="no";
4795 ::arg().set("new-domain-log", "Log newly observed domains.")="yes";
4796 ::arg().set("new-domain-lookup", "Perform a DNS lookup newly observed domains as a subdomain of the configured domain")="";
4797 ::arg().set("new-domain-history-dir", "Persist new domain tracking data here to persist between restarts")=string(NODCACHEDIR)+"/nod";
4798 ::arg().set("new-domain-whitelist", "List of domains (and implicitly all subdomains) which will never be considered a new domain")="";
4799 ::arg().set("new-domain-db-size", "Size of the DB used to track new domains in terms of number of cells. Defaults to 67108864")="67108864";
4800 ::arg().set("new-domain-pb-tag", "If protobuf is configured, the tag to use for messages containing newly observed domains. Defaults to 'pdns-nod'")="pdns-nod";
4801 ::arg().set("unique-response-tracking", "Track unique responses (tuple of query name, type and RR).")="no";
4802 ::arg().set("unique-response-log", "Log unique responses")="yes";
4803 ::arg().set("unique-response-history-dir", "Persist unique response tracking data here to persist between restarts")=string(NODCACHEDIR)+"/udr";
4804 ::arg().set("unique-response-db-size", "Size of the DB used to track unique responses in terms of number of cells. Defaults to 67108864")="67108864";
4805 ::arg().set("unique-response-pb-tag", "If protobuf is configured, the tag to use for messages containing unique DNS responses. Defaults to 'pdns-udr'")="pdns-udr";
4806 #endif /* NOD_ENABLED */
4807 ::arg().setCmd("help","Provide a helpful message");
4808 ::arg().setCmd("version","Print version string");
4809 ::arg().setCmd("config","Output blank configuration");
4810 ::arg().setDefaults();
4811 g_log.toConsole(Logger::Info);
4812 ::arg().laxParse(argc,argv); // do a lax parse
4813
4814 string configname=::arg()["config-dir"]+"/recursor.conf";
4815 if(::arg()["config-name"]!="") {
4816 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
4817 s_programname+="-"+::arg()["config-name"];
4818 }
4819 cleanSlashes(configname);
4820
4821 if(!::arg().getCommands().empty()) {
4822 cerr<<"Fatal: non-option";
4823 if (::arg().getCommands().size() > 1) {
4824 cerr<<"s";
4825 }
4826 cerr<<" (";
4827 bool first = true;
4828 for (auto const c : ::arg().getCommands()) {
4829 if (!first) {
4830 cerr<<", ";
4831 }
4832 first = false;
4833 cerr<<c;
4834 }
4835 cerr<<") on the command line, perhaps a '--setting=123' statement missed the '='?"<<endl;
4836 exit(99);
4837 }
4838
4839 if(::arg().mustDo("config")) {
4840 cout<<::arg().configstring(false, true);
4841 exit(0);
4842 }
4843
4844 if(!::arg().file(configname.c_str()))
4845 g_log<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
4846
4847 ::arg().parse(argc,argv);
4848
4849 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() ) {
4850 g_log<<Logger::Error<<"Using chroot and enabling the API is not possible"<<endl;
4851 exit(EXIT_FAILURE);
4852 }
4853
4854 if (::arg()["socket-dir"].empty()) {
4855 if (::arg()["chroot"].empty())
4856 ::arg().set("socket-dir") = std::string(LOCALSTATEDIR) + "/pdns-recursor";
4857 else
4858 ::arg().set("socket-dir") = "/";
4859 }
4860
4861 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
4862
4863 if(::arg().asNum("threads")==1) {
4864 if (::arg().mustDo("pdns-distributes-queries")) {
4865 g_log<<Logger::Warning<<"Only one thread, no need to distribute queries ourselves"<<endl;
4866 ::arg().set("pdns-distributes-queries")="no";
4867 }
4868 }
4869
4870 if(::arg().mustDo("pdns-distributes-queries") && ::arg().asNum("distributor-threads") <= 0) {
4871 g_log<<Logger::Warning<<"Asked to run with pdns-distributes-queries set but no distributor threads, raising to 1"<<endl;
4872 ::arg().set("distributor-threads")="1";
4873 }
4874
4875 if (!::arg().mustDo("pdns-distributes-queries")) {
4876 ::arg().set("distributor-threads")="0";
4877 }
4878
4879 if(::arg().mustDo("help")) {
4880 cout<<"syntax:"<<endl<<endl;
4881 cout<<::arg().helpstring(::arg()["help"])<<endl;
4882 exit(0);
4883 }
4884 if(::arg().mustDo("version")) {
4885 showProductVersion();
4886 showBuildConfiguration();
4887 exit(0);
4888 }
4889
4890 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
4891
4892 if (logUrgency < Logger::Error)
4893 logUrgency = Logger::Error;
4894 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
4895 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
4896 }
4897 g_log.setLoglevel(logUrgency);
4898 g_log.toConsole(logUrgency);
4899
4900 serviceMain(argc, argv);
4901 }
4902 catch(PDNSException &ae) {
4903 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
4904 ret=EXIT_FAILURE;
4905 }
4906 catch(std::exception &e) {
4907 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
4908 ret=EXIT_FAILURE;
4909 }
4910 catch(...) {
4911 g_log<<Logger::Error<<"any other exception in main: "<<endl;
4912 ret=EXIT_FAILURE;
4913 }
4914
4915 return ret;
4916 }