]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/pdns_recursor.cc
Merge pull request #8705 from rgacogne/rec-rpz-order
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25
26 #include <netdb.h>
27 #include <sys/stat.h>
28 #include <unistd.h>
29 #ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
30 #include <boost/container/flat_set.hpp>
31 #endif
32 #include "ws-recursor.hh"
33 #include <thread>
34 #include "threadname.hh"
35 #include "recpacketcache.hh"
36 #include "utility.hh"
37 #include "dns_random.hh"
38 #ifdef HAVE_LIBSODIUM
39 #include <sodium.h>
40 #endif
41 #include "opensslsigners.hh"
42 #include <iostream>
43 #include <errno.h>
44 #include <boost/static_assert.hpp>
45 #include <map>
46 #include <set>
47 #include "recursor_cache.hh"
48 #include "cachecleaner.hh"
49 #include <stdio.h>
50 #include <signal.h>
51 #include <stdlib.h>
52 #include "misc.hh"
53 #include "mtasker.hh"
54 #include <utility>
55 #include "arguments.hh"
56 #include "syncres.hh"
57 #include <fcntl.h>
58 #include <fstream>
59 #include "sortlist.hh"
60 #include "sstuff.hh"
61 #include <boost/tuple/tuple.hpp>
62 #include <boost/tuple/tuple_comparison.hpp>
63 #include <boost/shared_array.hpp>
64 #include <boost/function.hpp>
65 #include <boost/algorithm/string.hpp>
66 #ifdef MALLOC_TRACE
67 #include "malloctrace.hh"
68 #endif
69 #include <netinet/tcp.h>
70 #include "capabilities.hh"
71 #include "dnsparser.hh"
72 #include "dnswriter.hh"
73 #include "dnsrecords.hh"
74 #include "zoneparser-tng.hh"
75 #include "rec_channel.hh"
76 #include "logger.hh"
77 #include "iputils.hh"
78 #include "mplexer.hh"
79 #include "config.h"
80 #include "lua-recursor4.hh"
81 #include "version.hh"
82 #include "responsestats.hh"
83 #include "secpoll-recursor.hh"
84 #include "dnsname.hh"
85 #include "filterpo.hh"
86 #include "rpzloader.hh"
87 #include "validate-recursor.hh"
88 #include "rec-lua-conf.hh"
89 #include "ednsoptions.hh"
90 #include "gettime.hh"
91 #include "pubsuffix.hh"
92 #ifdef NOD_ENABLED
93 #include "nod.hh"
94 #endif /* NOD_ENABLED */
95
96 #include "rec-protobuf.hh"
97 #include "rec-snmp.hh"
98
99 #ifdef HAVE_SYSTEMD
100 #include <systemd/sd-daemon.h>
101 #endif
102
103 #include "namespaces.hh"
104
105 #ifdef HAVE_PROTOBUF
106 #include "uuid-utils.hh"
107 #endif /* HAVE_PROTOBUF */
108
109 #include "xpf.hh"
110
111 typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
112
113 static thread_local std::shared_ptr<RecursorLua4> t_pdl;
114 static thread_local unsigned int t_id = 0;
115 static thread_local std::shared_ptr<Regex> t_traceRegex;
116 static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
117 #ifdef HAVE_PROTOBUF
118 static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_protobufServers{nullptr};
119 static thread_local uint64_t t_protobufServersGeneration;
120 static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_outgoingProtobufServers{nullptr};
121 static thread_local uint64_t t_outgoingProtobufServersGeneration;
122 #endif /* HAVE_PROTOBUF */
123
124 #ifdef HAVE_FSTRM
125 static thread_local std::shared_ptr<std::vector<std::unique_ptr<FrameStreamLogger>>> t_frameStreamServers{nullptr};
126 static thread_local uint64_t t_frameStreamServersGeneration;
127 #endif /* HAVE_FSTRM */
128
129 thread_local std::unique_ptr<MT_t> MT; // the big MTasker
130 thread_local std::unique_ptr<MemRecursorCache> t_RC;
131 thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
132 thread_local FDMultiplexer* t_fdm{nullptr};
133 thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes, t_bogusremotes;
134 thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring, t_bogusqueryring;
135 thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
136 #ifdef NOD_ENABLED
137 thread_local std::shared_ptr<nod::NODDB> t_nodDBp;
138 thread_local std::shared_ptr<nod::UniqueResponseDB> t_udrDBp;
139 #endif /* NOD_ENABLED */
140 __thread struct timeval g_now; // timestamp, updated (too) frequently
141
142 typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
143
144 // for communicating with our threads
145 // effectively readonly after startup
146 struct RecThreadInfo
147 {
148 struct ThreadPipeSet
149 {
150 int writeToThread{-1};
151 int readToThread{-1};
152 int writeFromThread{-1};
153 int readFromThread{-1};
154 int writeQueriesToThread{-1}; // this one is non-blocking
155 int readQueriesToThread{-1};
156 };
157
158 /* FD corresponding to TCP sockets this thread is listening
159 on.
160 These FDs are also in deferredAdds when we have one
161 socket per listener, and in g_deferredAdds instead. */
162 std::set<int> tcpSockets;
163 /* FD corresponding to listening sockets if we have one socket per
164 listener (with reuseport), otherwise all listeners share the
165 same FD and g_deferredAdds is then used instead */
166 deferredAdd_t deferredAdds;
167 struct ThreadPipeSet pipes;
168 std::thread thread;
169 MT_t* mt{nullptr};
170 uint64_t numberOfDistributedQueries{0};
171 /* handle the web server, carbon, statistics and the control channel */
172 bool isHandler{false};
173 /* accept incoming queries (and distributes them to the workers if pdns-distributes-queries is set) */
174 bool isListener{false};
175 /* process queries */
176 bool isWorker{false};
177 };
178
179 /* first we have the handler thread, t_id == 0 (some other
180 helper threads like SNMP might have t_id == 0 as well)
181 then the distributor threads if any
182 and finally the workers */
183 static std::vector<RecThreadInfo> s_threadInfos;
184 /* without reuseport, all listeners share the same sockets */
185 static deferredAdd_t g_deferredAdds;
186
187 typedef vector<int> tcpListenSockets_t;
188 typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
189
190 static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
191 static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
192 static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
193 static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
194 static AtomicCounter counter;
195 static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
196 static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
197 static NetmaskGroup g_XPFAcl;
198 static size_t g_tcpMaxQueriesPerConn;
199 static size_t s_maxUDPQueriesPerRound;
200 static uint64_t g_latencyStatSize;
201 static uint32_t g_disthashseed;
202 static unsigned int g_maxTCPPerClient;
203 static unsigned int g_maxMThreads;
204 static unsigned int g_numDistributorThreads;
205 static unsigned int g_numWorkerThreads;
206 static int g_tcpTimeout;
207 static uint16_t g_udpTruncationThreshold;
208 static uint16_t g_xpfRRCode{0};
209 static std::atomic<bool> statsWanted;
210 static std::atomic<bool> g_quiet;
211 static bool g_logCommonErrors;
212 static bool g_anyToTcp;
213 static bool g_weDistributeQueries; // if true, 1 or more threads listen on the incoming query sockets and distribute them to workers
214 static bool g_reusePort{false};
215 static bool g_gettagNeedsEDNSOptions{false};
216 static time_t g_statisticsInterval;
217 static bool g_useIncomingECS;
218 static bool g_useKernelTimestamp;
219 std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
220 #ifdef NOD_ENABLED
221 static bool g_nodEnabled;
222 static DNSName g_nodLookupDomain;
223 static bool g_nodLog;
224 static SuffixMatchNode g_nodDomainWL;
225 static std::string g_nod_pbtag;
226 static bool g_udrEnabled;
227 static bool g_udrLog;
228 static std::string g_udr_pbtag;
229 #endif /* NOD_ENABLED */
230 #ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
231 static boost::container::flat_set<uint16_t> s_avoidUdpSourcePorts;
232 #else
233 static std::set<uint16_t> s_avoidUdpSourcePorts;
234 #endif
235 static uint16_t s_minUdpSourcePort;
236 static uint16_t s_maxUdpSourcePort;
237 static double s_balancingFactor;
238
239 RecursorControlChannel s_rcc; // only active in the handler thread
240 RecursorStats g_stats;
241 string s_programname="pdns_recursor";
242 string s_pidfname;
243 bool g_lowercaseOutgoing;
244 unsigned int g_networkTimeoutMsec;
245 unsigned int g_numThreads;
246 uint16_t g_outgoingEDNSBufsize;
247 bool g_logRPZChanges{false};
248
249 // Used in the Syncres to not throttle certain servers
250 GlobalStateHolder<SuffixMatchNode> g_dontThrottleNames;
251 GlobalStateHolder<NetmaskGroup> g_dontThrottleNetmasks;
252
253 #define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
254 #define LOCAL_NETS_INVERSE "!127.0.0.0/8, !10.0.0.0/8, !100.64.0.0/10, !169.254.0.0/16, !192.168.0.0/16, !172.16.0.0/12, !::1/128, !fc00::/7, !fe80::/10"
255 // Bad Nets taken from both:
256 // http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
257 // and
258 // http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
259 // where such a network may not be considered a valid destination
260 #define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
261 #define DONT_QUERY LOCAL_NETS ", " BAD_NETS
262
263 //! used to send information to a newborn mthread
264 struct DNSComboWriter {
265 DNSComboWriter(const std::string& query, const struct timeval& now): d_mdp(true, query), d_now(now), d_query(query)
266 {
267 }
268
269 DNSComboWriter(const std::string& query, const struct timeval& now, std::vector<std::string>&& policyTags, LuaContext::LuaObject&& data, std::vector<DNSRecord>&& records): d_mdp(true, query), d_now(now), d_query(query), d_policyTags(std::move(policyTags)), d_records(std::move(records)), d_data(std::move(data))
270 {
271 }
272
273 void setRemote(const ComboAddress& sa)
274 {
275 d_remote=sa;
276 }
277
278 void setSource(const ComboAddress& sa)
279 {
280 d_source=sa;
281 }
282
283 void setLocal(const ComboAddress& sa)
284 {
285 d_local=sa;
286 }
287
288 void setDestination(const ComboAddress& sa)
289 {
290 d_destination=sa;
291 }
292
293 void setSocket(int sock)
294 {
295 d_socket=sock;
296 }
297
298 string getRemote() const
299 {
300 if (d_source == d_remote) {
301 return d_source.toStringWithPort();
302 }
303 return d_source.toStringWithPort() + " (proxied by " + d_remote.toStringWithPort() + ")";
304 }
305
306 MOADNSParser d_mdp;
307 struct timeval d_now;
308 /* Remote client, might differ from d_source
309 in case of XPF, in which case d_source holds
310 the IP of the client and d_remote of the proxy
311 */
312 ComboAddress d_remote;
313 ComboAddress d_source;
314 /* Destination address, might differ from
315 d_destination in case of XPF, in which case
316 d_destination holds the IP of the proxy and
317 d_local holds our own. */
318 ComboAddress d_local;
319 ComboAddress d_destination;
320 #ifdef HAVE_PROTOBUF
321 boost::uuids::uuid d_uuid;
322 string d_requestorId;
323 string d_deviceId;
324 string d_deviceName;
325 struct timeval d_kernelTimestamp{0,0};
326 #endif
327 std::string d_query;
328 std::vector<std::string> d_policyTags;
329 std::vector<DNSRecord> d_records;
330 LuaContext::LuaObject d_data;
331 EDNSSubnetOpts d_ednssubnet;
332 shared_ptr<TCPConnection> d_tcpConnection;
333 boost::optional<int> d_rcode{boost::none};
334 int d_socket{-1};
335 unsigned int d_tag{0};
336 uint32_t d_qhash{0};
337 uint32_t d_ttlCap{std::numeric_limits<uint32_t>::max()};
338 uint16_t d_ecsBegin{0};
339 uint16_t d_ecsEnd{0};
340 bool d_variable{false};
341 bool d_ecsFound{false};
342 bool d_ecsParsed{false};
343 bool d_followCNAMERecords{false};
344 bool d_logResponse{false};
345 bool d_tcp{false};
346 };
347
348 MT_t* getMT()
349 {
350 return MT ? MT.get() : nullptr;
351 }
352
353 ArgvMap &arg()
354 {
355 static ArgvMap theArg;
356 return theArg;
357 }
358
359 unsigned int getRecursorThreadId()
360 {
361 return t_id;
362 }
363
364 int getMTaskerTID()
365 {
366 return MT->getTid();
367 }
368
369 static bool isDistributorThread()
370 {
371 if (t_id == 0) {
372 return false;
373 }
374
375 return g_weDistributeQueries && s_threadInfos.at(t_id).isListener;
376 }
377
378 static bool isHandlerThread()
379 {
380 if (t_id == 0) {
381 return true;
382 }
383
384 return s_threadInfos.at(t_id).isHandler;
385 }
386
387 static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
388
389 // -1 is error, 0 is timeout, 1 is success
390 int asendtcp(const string& data, Socket* sock)
391 {
392 PacketID pident;
393 pident.sock=sock;
394 pident.outMSG=data;
395
396 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
397 string packet;
398
399 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
400
401 if(!ret || ret==-1) { // timeout
402 t_fdm->removeWriteFD(sock->getHandle());
403 }
404 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
405 return -1;
406 }
407 return ret;
408 }
409
410 static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
411
412 // -1 is error, 0 is timeout, 1 is success
413 int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
414 {
415 data.clear();
416 PacketID pident;
417 pident.sock=sock;
418 pident.inNeeded=len;
419 pident.inIncompleteOkay=incompleteOkay;
420 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
421
422 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
423 if(!ret || ret==-1) { // timeout
424 t_fdm->removeReadFD(sock->getHandle());
425 }
426 else if(data.empty()) {// error, EOF or other
427 return -1;
428 }
429
430 return ret;
431 }
432
433 static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
434 {
435 PacketID pident=*any_cast<PacketID>(&var);
436 char resp[512];
437 ComboAddress fromaddr;
438 socklen_t addrlen=sizeof(fromaddr);
439
440 ssize_t ret=recvfrom(fd, resp, sizeof(resp), 0, (sockaddr *)&fromaddr, &addrlen);
441 if (fromaddr != pident.remote) {
442 g_log<<Logger::Notice<<"Response received from the wrong remote host ("<<fromaddr.toStringWithPort()<<" instead of "<<pident.remote.toStringWithPort()<<"), discarding"<<endl;
443
444 }
445
446 t_fdm->removeReadFD(fd);
447 if(ret >= 0) {
448 string data(resp, (size_t) ret);
449 MT->sendEvent(pident, &data);
450 }
451 else {
452 string empty;
453 MT->sendEvent(pident, &empty);
454 // cerr<<"Had some kind of error: "<<ret<<", "<<stringerror()<<endl;
455 }
456 }
457 string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
458 {
459 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
460 s.setNonBlocking();
461 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
462
463 s.bind(local);
464 s.connect(dest);
465 s.send(query);
466
467 PacketID pident;
468 pident.sock=&s;
469 pident.remote=dest;
470 pident.type=0;
471 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
472
473 string data;
474
475 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
476
477 if(!ret || ret==-1) { // timeout
478 t_fdm->removeReadFD(s.getHandle());
479 }
480 else if(data.empty()) {// error, EOF or other
481 // we could special case this
482 return data;
483 }
484 return data;
485 }
486
487 //! pick a random query local address
488 ComboAddress getQueryLocalAddress(int family, uint16_t port)
489 {
490 ComboAddress ret;
491 if(family==AF_INET) {
492 if(g_localQueryAddresses4.empty())
493 ret = g_local4;
494 else
495 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
496 ret.sin4.sin_port = htons(port);
497 }
498 else {
499 if(g_localQueryAddresses6.empty())
500 ret = g_local6;
501 else
502 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
503
504 ret.sin6.sin6_port = htons(port);
505 }
506 return ret;
507 }
508
509 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
510
511 static void setSocketBuffer(int fd, int optname, uint32_t size)
512 {
513 uint32_t psize=0;
514 socklen_t len=sizeof(psize);
515
516 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
517 g_log<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
518 return;
519 }
520
521 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0) {
522 int err = errno;
523 g_log << Logger::Error << "Unable to raise socket buffer size to " << size << ": " << stringerror(err) << endl;
524 }
525 }
526
527
528 static void setSocketReceiveBuffer(int fd, uint32_t size)
529 {
530 setSocketBuffer(fd, SO_RCVBUF, size);
531 }
532
533 static void setSocketSendBuffer(int fd, uint32_t size)
534 {
535 setSocketBuffer(fd, SO_SNDBUF, size);
536 }
537
538
539 // you can ask this class for a UDP socket to send a query from
540 // this socket is not yours, don't even think about deleting it
541 // but after you call 'returnSocket' on it, don't assume anything anymore
542 class UDPClientSocks
543 {
544 unsigned int d_numsocks;
545 public:
546 UDPClientSocks() : d_numsocks(0)
547 {
548 }
549
550 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
551 int getSocket(const ComboAddress& toaddr, int* fd)
552 {
553 *fd=makeClientSocket(toaddr.sin4.sin_family);
554 if(*fd < 0) // temporary error - receive exception otherwise
555 return -2;
556
557 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
558 int err = errno;
559 try {
560 closesocket(*fd);
561 }
562 catch(const PDNSException& e) {
563 g_log<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
564 }
565
566 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
567 return -2;
568 return -1;
569 }
570
571 d_numsocks++;
572 return 0;
573 }
574
575 // return a socket to the pool, or simply erase it
576 void returnSocket(int fd)
577 {
578 try {
579 t_fdm->removeReadFD(fd);
580 }
581 catch(const FDMultiplexerException& e) {
582 // we sometimes return a socket that has not yet been assigned to t_fdm
583 }
584
585 try {
586 closesocket(fd);
587 }
588 catch(const PDNSException& e) {
589 g_log<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
590 }
591
592 --d_numsocks;
593 }
594
595 private:
596
597 // returns -1 for errors which might go away, throws for ones that won't
598 static int makeClientSocket(int family)
599 {
600 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
601
602 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
603 return ret;
604
605 if(ret<0)
606 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
607
608 // setCloseOnExec(ret); // we're not going to exec
609
610 int tries=10;
611 ComboAddress sin;
612 while(--tries) {
613 uint16_t port;
614
615 if(tries==1) // fall back to kernel 'random'
616 port = 0;
617 else {
618 do {
619 port = s_minUdpSourcePort + dns_random(s_maxUdpSourcePort - s_minUdpSourcePort + 1);
620 }
621 while (s_avoidUdpSourcePorts.count(port));
622 }
623
624 sin=getQueryLocalAddress(family, port); // does htons for us
625
626 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
627 break;
628 }
629
630 if(!tries) {
631 closesocket(ret);
632 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
633 }
634
635 try {
636 setReceiveSocketErrors(ret, family);
637 setNonBlocking(ret);
638 }
639 catch(...) {
640 closesocket(ret);
641 throw;
642 }
643
644 return ret;
645 }
646 };
647
648 static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
649
650 /* these two functions are used by LWRes */
651 // -2 is OS error, -1 is error that depends on the remote, > 0 is success
652 int asendto(const char *data, size_t len, int flags,
653 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
654 {
655
656 PacketID pident;
657 pident.domain = domain;
658 pident.remote = toaddr;
659 pident.type = qtype;
660
661 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
662 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
663
664 for(; chain.first != chain.second; chain.first++) {
665 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
666 /*
667 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
668 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
669 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
670 */
671 chain.first->key.chain.insert(id); // we can chain
672 *fd=-1; // gets used in waitEvent / sendEvent later on
673 return 1;
674 }
675 }
676
677 int ret=t_udpclientsocks->getSocket(toaddr, fd);
678 if(ret < 0)
679 return ret;
680
681 pident.fd=*fd;
682 pident.id=id;
683
684 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
685 ret = send(*fd, data, len, 0);
686
687 int tmp = errno;
688
689 if(ret < 0)
690 t_udpclientsocks->returnSocket(*fd);
691
692 errno = tmp; // this is for logging purposes only
693 return ret;
694 }
695
696 // -1 is error, 0 is timeout, 1 is success
697 int arecvfrom(std::string& packet, int flags, const ComboAddress& fromaddr, size_t *d_len,
698 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
699 {
700 static optional<unsigned int> nearMissLimit;
701 if(!nearMissLimit)
702 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
703
704 PacketID pident;
705 pident.fd=fd;
706 pident.id=id;
707 pident.domain=domain;
708 pident.type = qtype;
709 pident.remote=fromaddr;
710
711 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
712
713 /* -1 means error, 0 means timeout, 1 means a result from handleUDPServerResponse() which might still be an error */
714 if(ret > 0) {
715 /* handleUDPServerResponse() will close the socket for us no matter what */
716 if(packet.empty()) // means "error"
717 return -1;
718
719 *d_len=packet.size();
720
721 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
722 g_log<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
723 g_stats.spoofCount++;
724 return -1;
725 }
726 }
727 else {
728 /* getting there means error or timeout, it's up to us to close the socket */
729 if(fd >= 0)
730 t_udpclientsocks->returnSocket(fd);
731 }
732 return ret;
733 }
734
735 static void writePid(void)
736 {
737 if(!::arg().mustDo("write-pid"))
738 return;
739 ofstream of(s_pidfname.c_str(), std::ios_base::app);
740 if(of)
741 of<< Utility::getpid() <<endl;
742 else {
743 int err = errno;
744 g_log << Logger::Error << "Writing pid for " << Utility::getpid() << " to " << s_pidfname << " failed: "
745 << stringerror(err) << endl;
746 }
747 }
748
749 uint16_t TCPConnection::s_maxInFlight;
750
751 TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : data(2, 0), d_remote(addr), d_fd(fd)
752 {
753 ++s_currentConnections;
754 (*t_tcpClientCounts)[d_remote]++;
755 }
756
757 TCPConnection::~TCPConnection()
758 {
759 try {
760 if(closesocket(d_fd) < 0)
761 g_log<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
762 }
763 catch(const PDNSException& e) {
764 g_log<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
765 }
766
767 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
768 t_tcpClientCounts->erase(d_remote);
769 --s_currentConnections;
770 }
771
772 AtomicCounter TCPConnection::s_currentConnections;
773
774 static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
775
776 // the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
777 static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
778 {
779 if(packetsize > 1000 && t_largeanswerremotes)
780 t_largeanswerremotes->push_back(remote);
781 switch(res) {
782 case RCode::ServFail:
783 if(t_servfailremotes) {
784 t_servfailremotes->push_back(remote);
785 if(query && t_servfailqueryring) // packet cache
786 t_servfailqueryring->push_back(make_pair(*query, qtype));
787 }
788 g_stats.servFails++;
789 break;
790 case RCode::NXDomain:
791 g_stats.nxDomains++;
792 break;
793 case RCode::NoError:
794 g_stats.noErrors++;
795 break;
796 }
797 }
798
799 static string makeLoginfo(const std::unique_ptr<DNSComboWriter>& dc)
800 try
801 {
802 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->getRemote())+")";
803 }
804 catch(...)
805 {
806 return "Exception making error message for exception";
807 }
808
809 #ifdef HAVE_PROTOBUF
810 static void protobufLogQuery(uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId, const std::string& deviceName)
811 {
812 if (!t_protobufServers) {
813 return;
814 }
815
816 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
817 const ComboAddress requestor = requestorNM.getMaskedNetwork();
818 requestor.setPort(remote.getPort());
819 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
820 message.setServerIdentity(SyncRes::s_serverID);
821 message.setEDNSSubnet(ednssubnet, ednssubnet.isIPv4() ? maskV4 : maskV6);
822 message.setRequestorId(requestorId);
823 message.setDeviceId(deviceId);
824 message.setDeviceName(deviceName);
825
826 if (!policyTags.empty()) {
827 message.setPolicyTags(policyTags);
828 }
829
830 // cerr <<message.toDebugString()<<endl;
831 std::string str;
832 message.serialize(str);
833
834 for (auto& server : *t_protobufServers) {
835 server->queueData(str);
836 }
837 }
838
839 static void protobufLogResponse(const RecProtoBufMessage& message)
840 {
841 if (!t_protobufServers) {
842 return;
843 }
844
845 // cerr <<message.toDebugString()<<endl;
846 std::string str;
847 message.serialize(str);
848
849 for (auto& server : *t_protobufServers) {
850 server->queueData(str);
851 }
852 }
853 #endif
854
855 /**
856 * Chases the CNAME provided by the PolicyCustom RPZ policy.
857 *
858 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
859 * @param qtype: The QType of the original query
860 * @param sr: A SyncRes
861 * @param res: An integer that will contain the RCODE of the lookup we do
862 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
863 */
864 static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
865 {
866 if (spoofed.d_type == QType::CNAME) {
867 bool oldWantsRPZ = sr.getWantsRPZ();
868 sr.setWantsRPZ(false);
869 vector<DNSRecord> ans;
870 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, QClass::IN, ans);
871 for (const auto& rec : ans) {
872 if(rec.d_place == DNSResourceRecord::ANSWER) {
873 ret.push_back(rec);
874 }
875 }
876 // Reset the RPZ state of the SyncRes
877 sr.setWantsRPZ(oldWantsRPZ);
878 }
879 }
880
881 static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, uint32_t ttlCap, const uint16_t maxAnswerSize)
882 {
883 pw.startRecord(rec.d_name, rec.d_type, (rec.d_ttl > ttlCap ? ttlCap : rec.d_ttl), rec.d_class, rec.d_place);
884
885 if(rec.d_type != QType::OPT) // their TTL ain't real
886 minTTL = min(minTTL, rec.d_ttl);
887
888 rec.d_content->toPacket(pw);
889 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
890 pw.rollback();
891 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
892 pw.getHeader()->tc=1;
893 pw.truncate();
894 }
895 return false;
896 }
897
898 return true;
899 }
900
901 #ifdef HAVE_PROTOBUF
902 static std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> startProtobufServers(const ProtobufExportConfig& config)
903 {
904 auto result = std::make_shared<std::vector<std::unique_ptr<RemoteLogger>>>();
905
906 for (const auto& server : config.servers) {
907 try {
908 auto logger = make_unique<RemoteLogger>(server, config.timeout, 100*config.maxQueuedEntries, config.reconnectWaitTime, config.asyncConnect);
909 logger->setLogQueries(config.logQueries);
910 logger->setLogResponses(config.logResponses);
911 result->emplace_back(std::move(logger));
912 }
913 catch(const std::exception& e) {
914 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.what()<<endl;
915 }
916 catch(const PDNSException& e) {
917 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.reason<<endl;
918 }
919 }
920
921 return result;
922 }
923
924 static bool checkProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
925 {
926 if (!luaconfsLocal->protobufExportConfig.enabled) {
927 if (t_protobufServers) {
928 for (auto& server : *t_protobufServers) {
929 server->stop();
930 }
931 t_protobufServers.reset();
932 }
933
934 return false;
935 }
936
937 /* if the server was not running, or if it was running according to a
938 previous configuration */
939 if (!t_protobufServers ||
940 t_protobufServersGeneration < luaconfsLocal->generation) {
941
942 if (t_protobufServers) {
943 for (auto& server : *t_protobufServers) {
944 server->stop();
945 }
946 }
947 t_protobufServers.reset();
948
949 t_protobufServers = startProtobufServers(luaconfsLocal->protobufExportConfig);
950 t_protobufServersGeneration = luaconfsLocal->generation;
951 }
952
953 return true;
954 }
955
956 static bool checkOutgoingProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
957 {
958 if (!luaconfsLocal->outgoingProtobufExportConfig.enabled) {
959 if (t_outgoingProtobufServers) {
960 for (auto& server : *t_outgoingProtobufServers) {
961 server->stop();
962 }
963 }
964 t_outgoingProtobufServers.reset();
965
966 return false;
967 }
968
969 /* if the server was not running, or if it was running according to a
970 previous configuration */
971 if (!t_outgoingProtobufServers ||
972 t_outgoingProtobufServersGeneration < luaconfsLocal->generation) {
973
974 if (t_outgoingProtobufServers) {
975 for (auto& server : *t_outgoingProtobufServers) {
976 server->stop();
977 }
978 }
979 t_outgoingProtobufServers.reset();
980
981 t_outgoingProtobufServers = startProtobufServers(luaconfsLocal->outgoingProtobufExportConfig);
982 t_outgoingProtobufServersGeneration = luaconfsLocal->generation;
983 }
984
985 return true;
986 }
987
988 #ifdef HAVE_FSTRM
989
990 static std::shared_ptr<std::vector<std::unique_ptr<FrameStreamLogger>>> startFrameStreamServers(const FrameStreamExportConfig& config)
991 {
992 auto result = std::make_shared<std::vector<std::unique_ptr<FrameStreamLogger>>>();
993
994 for (const auto& server : config.servers) {
995 try {
996 std::unordered_map<string,unsigned> options;
997 options["bufferHint"] = config.bufferHint;
998 options["flushTimeout"] = config.flushTimeout;
999 options["inputQueueSize"] = config.inputQueueSize;
1000 options["outputQueueSize"] = config.outputQueueSize;
1001 options["queueNotifyThreshold"] = config.queueNotifyThreshold;
1002 options["reopenInterval"] = config.reopenInterval;
1003 FrameStreamLogger *fsl = nullptr;
1004 try {
1005 ComboAddress address(server);
1006 fsl = new FrameStreamLogger(address.sin4.sin_family, address.toStringWithPort(), true, options);
1007 }
1008 catch (const PDNSException& e) {
1009 fsl = new FrameStreamLogger(AF_UNIX, server, true, options);
1010 }
1011 fsl->setLogQueries(config.logQueries);
1012 fsl->setLogResponses(config.logResponses);
1013 result->emplace_back(fsl);
1014 }
1015 catch(const std::exception& e) {
1016 g_log<<Logger::Error<<"Error while starting dnstap framestream logger to '"<<server<<": "<<e.what()<<endl;
1017 }
1018 catch(const PDNSException& e) {
1019 g_log<<Logger::Error<<"Error while starting dnstap framestream logger to '"<<server<<": "<<e.reason<<endl;
1020 }
1021 }
1022
1023 return result;
1024 }
1025
1026 static bool checkFrameStreamExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
1027 {
1028 if (!luaconfsLocal->frameStreamExportConfig.enabled) {
1029 if (t_frameStreamServers) {
1030 // dt's take care of cleanup
1031 t_frameStreamServers.reset();
1032 }
1033
1034 return false;
1035 }
1036
1037 /* if the server was not running, or if it was running according to a
1038 previous configuration */
1039 if (!t_frameStreamServers ||
1040 t_frameStreamServersGeneration < luaconfsLocal->generation) {
1041
1042 if (t_frameStreamServers) {
1043 // dt's take care of cleanup
1044 t_frameStreamServers.reset();
1045 }
1046
1047 t_frameStreamServers = startFrameStreamServers(luaconfsLocal->frameStreamExportConfig);
1048 t_frameStreamServersGeneration = luaconfsLocal->generation;
1049 }
1050
1051 return true;
1052 }
1053 #endif /* HAVE_FSTRM */
1054 #endif /* HAVE_PROTOBUF */
1055
1056 #ifdef NOD_ENABLED
1057 static bool nodCheckNewDomain(const DNSName& dname)
1058 {
1059 static const QType qt(QType::A);
1060 static const uint16_t qc(QClass::IN);
1061 bool ret = false;
1062 // First check the (sub)domain isn't whitelisted for NOD purposes
1063 if (!g_nodDomainWL.check(dname)) {
1064 // Now check the NODDB (note this is probablistic so can have FNs/FPs)
1065 if (t_nodDBp && t_nodDBp->isNewDomain(dname)) {
1066 if (g_nodLog) {
1067 // This should probably log to a dedicated log file
1068 g_log<<Logger::Notice<<"Newly observed domain nod="<<dname.toLogString()<<endl;
1069 }
1070 if (!(g_nodLookupDomain.isRoot())) {
1071 // Send a DNS A query to <domain>.g_nodLookupDomain
1072 DNSName qname = dname;
1073 vector<DNSRecord> dummy;
1074 qname += g_nodLookupDomain;
1075 directResolve(qname, qt, qc, dummy);
1076 }
1077 ret = true;
1078 }
1079 }
1080 return ret;
1081 }
1082
1083 static bool udrCheckUniqueDNSRecord(const DNSName& dname, uint16_t qtype, const DNSRecord& record)
1084 {
1085 bool ret = false;
1086 if (record.d_place == DNSResourceRecord::ANSWER ||
1087 record.d_place == DNSResourceRecord::ADDITIONAL) {
1088 // Create a string that represent a triplet of (qname, qtype and RR[type, name, content])
1089 std::stringstream ss;
1090 ss << dname.toDNSStringLC() << ":" << qtype << ":" << qtype << ":" << record.d_type << ":" << record.d_name.toDNSStringLC() << ":" << record.d_content->getZoneRepresentation();
1091 if (t_udrDBp && t_udrDBp->isUniqueResponse(ss.str())) {
1092 if (g_udrLog) {
1093 // This should also probably log to a dedicated file.
1094 g_log<<Logger::Notice<<"Unique response observed: qname="<<dname.toLogString()<<" qtype="<<QType(qtype).getName()<< " rrtype=" << QType(record.d_type).getName() << " rrname=" << record.d_name.toLogString() << " rrcontent=" << record.d_content->getZoneRepresentation() << endl;
1095 }
1096 ret = true;
1097 }
1098 }
1099 return ret;
1100 }
1101 #endif /* NOD_ENABLED */
1102
1103 int followCNAMERecords(vector<DNSRecord>& ret, const QType& qtype)
1104 {
1105 vector<DNSRecord> resolved;
1106 DNSName target;
1107 for(const DNSRecord& rr : ret) {
1108 if(rr.d_type == QType::CNAME) {
1109 auto rec = getRR<CNAMERecordContent>(rr);
1110 if(rec) {
1111 target=rec->getTarget();
1112 break;
1113 }
1114 }
1115 }
1116
1117 if(target.empty()) {
1118 return 0;
1119 }
1120
1121 int rcode = directResolve(target, qtype, QClass::IN, resolved);
1122
1123 for(DNSRecord& rr : resolved) {
1124 ret.push_back(std::move(rr));
1125 }
1126 return rcode;
1127 }
1128
1129 static void startDoResolve(void *p)
1130 {
1131 auto dc=std::unique_ptr<DNSComboWriter>(reinterpret_cast<DNSComboWriter*>(p));
1132 try {
1133 if (t_queryring)
1134 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
1135
1136 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
1137 EDNSOpts edo;
1138 std::vector<pair<uint16_t, string> > ednsOpts;
1139 bool variableAnswer = dc->d_variable;
1140 bool haveEDNS=false;
1141 #ifdef NOD_ENABLED
1142 bool hasUDR = false;
1143 #endif /* NOD_ENABLED */
1144 DNSPacketWriter::optvect_t returnedEdnsOptions; // Here we stuff all the options for the return packet
1145 uint8_t ednsExtRCode = 0;
1146 if(getEDNSOpts(dc->d_mdp, &edo)) {
1147 haveEDNS=true;
1148 if (edo.d_version != 0) {
1149 ednsExtRCode = ERCode::BADVERS;
1150 }
1151
1152 if(!dc->d_tcp) {
1153 /* rfc6891 6.2.3:
1154 "Values lower than 512 MUST be treated as equal to 512."
1155 */
1156 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
1157 }
1158 ednsOpts = edo.d_options;
1159 maxanswersize -= 11; // EDNS header size
1160
1161 for (const auto& o : edo.d_options) {
1162 if (o.first == EDNSOptionCode::ECS && g_useIncomingECS && !dc->d_ecsParsed) {
1163 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
1164 } else if (o.first == EDNSOptionCode::NSID) {
1165 const static string mode_server_id = ::arg()["server-id"];
1166 if(mode_server_id != "disabled" && !mode_server_id.empty() &&
1167 maxanswersize > (2 + 2 + mode_server_id.size())) {
1168 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::NSID, mode_server_id));
1169 variableAnswer = true; // Can't packetcache an answer with NSID
1170 // Option Code and Option Length are both 2
1171 maxanswersize -= 2 + 2 + mode_server_id.size();
1172 }
1173 }
1174 }
1175 }
1176 /* perhaps there was no EDNS or no ECS but by now we looked */
1177 dc->d_ecsParsed = true;
1178 vector<DNSRecord> ret;
1179 vector<uint8_t> packet;
1180
1181 auto luaconfsLocal = g_luaconfs.getLocal();
1182 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
1183 bool wantsRPZ(true);
1184 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
1185 #ifdef HAVE_PROTOBUF
1186 if (checkProtobufExport(luaconfsLocal)) {
1187 Netmask requestorNM(dc->d_source, dc->d_source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1188 ComboAddress requestor = requestorNM.getMaskedNetwork();
1189 requestor.setPort(dc->d_source.getPort());
1190 pbMessage = RecProtoBufMessage(RecProtoBufMessage::Response, dc->d_uuid, &requestor, &dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass, dc->d_mdp.d_header.id, dc->d_tcp, 0);
1191 pbMessage->setServerIdentity(SyncRes::s_serverID);
1192 pbMessage->setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIPv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1193 }
1194 #endif /* HAVE_PROTOBUF */
1195
1196 #ifdef HAVE_FSTRM
1197 checkFrameStreamExport(luaconfsLocal);
1198 #endif
1199
1200 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
1201
1202 pw.getHeader()->aa=0;
1203 pw.getHeader()->ra=1;
1204 pw.getHeader()->qr=1;
1205 pw.getHeader()->tc=0;
1206 pw.getHeader()->id=dc->d_mdp.d_header.id;
1207 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
1208 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
1209
1210 /* This is the lowest TTL seen in the records of the response,
1211 so we can't cache it for longer than this value.
1212 If we have a TTL cap, this value can't be larger than the
1213 cap no matter what. */
1214 uint32_t minTTL = dc->d_ttlCap;
1215
1216 SyncRes sr(dc->d_now);
1217 sr.setId(MT->getTid());
1218
1219 bool DNSSECOK=false;
1220 if(t_pdl) {
1221 sr.setLuaEngine(t_pdl);
1222 }
1223 if(g_dnssecmode != DNSSECMode::Off) {
1224 sr.setDoDNSSEC(true);
1225
1226 // Does the requestor want DNSSEC records?
1227 if(edo.d_extFlags & EDNSOpts::DNSSECOK) {
1228 DNSSECOK=true;
1229 g_stats.dnssecQueries++;
1230 }
1231 if (dc->d_mdp.d_header.cd) {
1232 /* Per rfc6840 section 5.9, "When processing a request with
1233 the Checking Disabled (CD) bit set, a resolver SHOULD attempt
1234 to return all response data, even data that has failed DNSSEC
1235 validation. */
1236 ++g_stats.dnssecCheckDisabledQueries;
1237 }
1238 if (dc->d_mdp.d_header.ad) {
1239 /* Per rfc6840 section 5.7, "the AD bit in a query as a signal
1240 indicating that the requester understands and is interested in the
1241 value of the AD bit in the response. This allows a requester to
1242 indicate that it understands the AD bit without also requesting
1243 DNSSEC data via the DO bit. */
1244 ++g_stats.dnssecAuthenticDataQueries;
1245 }
1246 } else {
1247 // Ignore the client-set CD flag
1248 pw.getHeader()->cd=0;
1249 }
1250 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
1251
1252 #ifdef HAVE_PROTOBUF
1253 sr.setInitialRequestId(dc->d_uuid);
1254 sr.setOutgoingProtobufServers(t_outgoingProtobufServers);
1255 #endif
1256 #ifdef HAVE_FSTRM
1257 sr.setFrameStreamServers(t_frameStreamServers);
1258 #endif
1259 sr.setQuerySource(dc->d_remote, g_useIncomingECS && !dc->d_ednssubnet.source.empty() ? boost::optional<const EDNSSubnetOpts&>(dc->d_ednssubnet) : boost::none);
1260
1261 bool tracedQuery=false; // we could consider letting Lua know about this too
1262 bool shouldNotValidate = false;
1263
1264 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
1265 int res = RCode::NoError;
1266
1267 DNSFilterEngine::Policy appliedPolicy;
1268 std::vector<DNSRecord> spoofed;
1269 RecursorLua4::DNSQuestion dq(dc->d_source, dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ, dc->d_logResponse);
1270 dq.ednsFlags = &edo.d_extFlags;
1271 dq.ednsOptions = &ednsOpts;
1272 dq.tag = dc->d_tag;
1273 dq.discardedPolicies = &sr.d_discardedPolicies;
1274 dq.policyTags = &dc->d_policyTags;
1275 dq.appliedPolicy = &appliedPolicy;
1276 dq.currentRecords = &ret;
1277 dq.dh = &dc->d_mdp.d_header;
1278 dq.data = dc->d_data;
1279 #ifdef HAVE_PROTOBUF
1280 dq.requestorId = dc->d_requestorId;
1281 dq.deviceId = dc->d_deviceId;
1282 dq.deviceName = dc->d_deviceName;
1283 #endif
1284
1285 if(ednsExtRCode != 0) {
1286 goto sendit;
1287 }
1288
1289 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
1290 pw.getHeader()->tc = 1;
1291 res = 0;
1292 variableAnswer = true;
1293 goto sendit;
1294 }
1295
1296 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
1297 sr.setLogMode(SyncRes::Store);
1298 tracedQuery=true;
1299 }
1300
1301 if(!g_quiet || tracedQuery) {
1302 g_log<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
1303 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
1304 if(!dc->d_ednssubnet.source.empty()) {
1305 g_log<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
1306 }
1307 g_log<<endl;
1308 }
1309
1310 if(!dc->d_mdp.d_header.rd) {
1311 sr.setCacheOnly();
1312 }
1313
1314 if (dc->d_rcode != boost::none) {
1315 /* we have a response ready to go, most likely from gettag_ffi */
1316 ret = std::move(dc->d_records);
1317 res = *dc->d_rcode;
1318 if (res == RCode::NoError && dc->d_followCNAMERecords) {
1319 res = followCNAMERecords(ret, QType(dc->d_mdp.d_qtype));
1320 }
1321 goto haveAnswer;
1322 }
1323
1324 if (t_pdl) {
1325 t_pdl->prerpz(dq, res);
1326 }
1327
1328 // Check if the query has a policy attached to it
1329 if (wantsRPZ && appliedPolicy.d_type == DNSFilterEngine::PolicyType::None) {
1330 appliedPolicy = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_source, sr.d_discardedPolicies);
1331 }
1332
1333 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
1334 if(!t_pdl || !t_pdl->preresolve(dq, res)) {
1335
1336 sr.setWantsRPZ(wantsRPZ);
1337 if(wantsRPZ) {
1338 switch(appliedPolicy.d_kind) {
1339 case DNSFilterEngine::PolicyKind::NoAction:
1340 break;
1341 case DNSFilterEngine::PolicyKind::Drop:
1342 g_stats.policyDrops++;
1343 g_stats.policyResults[appliedPolicy.d_kind]++;
1344 return;
1345 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1346 g_stats.policyResults[appliedPolicy.d_kind]++;
1347 res=RCode::NXDomain;
1348 goto haveAnswer;
1349 case DNSFilterEngine::PolicyKind::NODATA:
1350 g_stats.policyResults[appliedPolicy.d_kind]++;
1351 res=RCode::NoError;
1352 goto haveAnswer;
1353 case DNSFilterEngine::PolicyKind::Custom:
1354 g_stats.policyResults[appliedPolicy.d_kind]++;
1355 res=RCode::NoError;
1356 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1357 for (const auto& dr : spoofed) {
1358 ret.push_back(dr);
1359 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1360 }
1361 goto haveAnswer;
1362 case DNSFilterEngine::PolicyKind::Truncate:
1363 if(!dc->d_tcp) {
1364 g_stats.policyResults[appliedPolicy.d_kind]++;
1365 res=RCode::NoError;
1366 pw.getHeader()->tc=1;
1367 goto haveAnswer;
1368 }
1369 break;
1370 }
1371 }
1372
1373 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
1374 try {
1375 sr.d_appliedPolicy = appliedPolicy;
1376 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
1377 shouldNotValidate = sr.wasOutOfBand();
1378 }
1379 catch(const ImmediateServFailException &e) {
1380 if(g_logCommonErrors) {
1381 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
1382 }
1383 res = RCode::ServFail;
1384 }
1385 catch(const PolicyHitException& e) {
1386 res = -2;
1387 }
1388 dq.validationState = sr.getValidationState();
1389
1390 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
1391 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
1392 appliedPolicy = sr.d_appliedPolicy;
1393 g_stats.policyResults[appliedPolicy.d_kind]++;
1394 switch(appliedPolicy.d_kind) {
1395 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
1396 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
1397 case DNSFilterEngine::PolicyKind::Drop:
1398 g_stats.policyDrops++;
1399 return;
1400 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1401 ret.clear();
1402 res=RCode::NXDomain;
1403 goto haveAnswer;
1404
1405 case DNSFilterEngine::PolicyKind::NODATA:
1406 ret.clear();
1407 res=RCode::NoError;
1408 goto haveAnswer;
1409
1410 case DNSFilterEngine::PolicyKind::Truncate:
1411 if(!dc->d_tcp) {
1412 ret.clear();
1413 res=RCode::NoError;
1414 pw.getHeader()->tc=1;
1415 goto haveAnswer;
1416 }
1417 break;
1418
1419 case DNSFilterEngine::PolicyKind::Custom:
1420 ret.clear();
1421 res=RCode::NoError;
1422 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1423 for (const auto& dr : spoofed) {
1424 ret.push_back(dr);
1425 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1426 }
1427 goto haveAnswer;
1428 }
1429 }
1430
1431 if (wantsRPZ && appliedPolicy.d_type == DNSFilterEngine::PolicyType::None) {
1432 appliedPolicy = luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies);
1433 }
1434
1435 if(t_pdl) {
1436 if(res == RCode::NoError) {
1437 auto i=ret.cbegin();
1438 for(; i!= ret.cend(); ++i)
1439 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
1440 break;
1441 if(i == ret.cend() && t_pdl->nodata(dq, res))
1442 shouldNotValidate = true;
1443
1444 }
1445 else if(res == RCode::NXDomain && t_pdl->nxdomain(dq, res))
1446 shouldNotValidate = true;
1447
1448 if(t_pdl->postresolve(dq, res))
1449 shouldNotValidate = true;
1450 }
1451
1452 if (wantsRPZ) { //XXX This block is repeated, see above
1453 g_stats.policyResults[appliedPolicy.d_kind]++;
1454 switch(appliedPolicy.d_kind) {
1455 case DNSFilterEngine::PolicyKind::NoAction:
1456 break;
1457 case DNSFilterEngine::PolicyKind::Drop:
1458 g_stats.policyDrops++;
1459 return;
1460 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1461 ret.clear();
1462 res=RCode::NXDomain;
1463 goto haveAnswer;
1464
1465 case DNSFilterEngine::PolicyKind::NODATA:
1466 ret.clear();
1467 res=RCode::NoError;
1468 goto haveAnswer;
1469
1470 case DNSFilterEngine::PolicyKind::Truncate:
1471 if(!dc->d_tcp) {
1472 ret.clear();
1473 res=RCode::NoError;
1474 pw.getHeader()->tc=1;
1475 goto haveAnswer;
1476 }
1477 break;
1478
1479 case DNSFilterEngine::PolicyKind::Custom:
1480 ret.clear();
1481 res=RCode::NoError;
1482 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1483 for (const auto& dr : spoofed) {
1484 ret.push_back(dr);
1485 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1486 }
1487 goto haveAnswer;
1488 }
1489 }
1490 }
1491 haveAnswer:;
1492 if(res == PolicyDecision::DROP) {
1493 g_stats.policyDrops++;
1494 return;
1495 }
1496 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1497 {
1498 string trace(sr.getTrace());
1499 if(!trace.empty()) {
1500 vector<string> lines;
1501 boost::split(lines, trace, boost::is_any_of("\n"));
1502 for(const string& line : lines) {
1503 if(!line.empty())
1504 g_log<<Logger::Warning<< line << endl;
1505 }
1506 }
1507 }
1508
1509 if(res == -1) {
1510 pw.getHeader()->rcode=RCode::ServFail;
1511 // no commit here, because no record
1512 g_stats.servFails++;
1513 }
1514 else {
1515 pw.getHeader()->rcode=res;
1516
1517 // Does the validation mode or query demand validation?
1518 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1519 try {
1520 if(sr.doLog()) {
1521 g_log<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<endl;
1522 }
1523
1524 auto state = sr.getValidationState();
1525
1526 if(state == Secure) {
1527 if(sr.doLog()) {
1528 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates correctly"<<endl;
1529 }
1530
1531 // Is the query source interested in the value of the ad-bit?
1532 if (dc->d_mdp.d_header.ad || DNSSECOK)
1533 pw.getHeader()->ad=1;
1534 }
1535 else if(state == Insecure) {
1536 if(sr.doLog()) {
1537 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Insecure"<<endl;
1538 }
1539
1540 pw.getHeader()->ad=0;
1541 }
1542 else if(state == Bogus) {
1543 if(t_bogusremotes)
1544 t_bogusremotes->push_back(dc->d_source);
1545 if(t_bogusqueryring)
1546 t_bogusqueryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
1547 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
1548 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Bogus"<<endl;
1549 }
1550
1551 // Does the query or validation mode sending out a SERVFAIL on validation errors?
1552 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
1553 if(sr.doLog()) {
1554 g_log<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
1555 }
1556
1557 pw.getHeader()->rcode=RCode::ServFail;
1558 goto sendit;
1559 } else {
1560 if(sr.doLog()) {
1561 g_log<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
1562 }
1563 }
1564 }
1565 }
1566 catch(const ImmediateServFailException &e) {
1567 if(g_logCommonErrors)
1568 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
1569 pw.getHeader()->rcode=RCode::ServFail;
1570 goto sendit;
1571 }
1572 }
1573
1574 if(ret.size()) {
1575 orderAndShuffle(ret);
1576 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_source)) {
1577 stable_sort(ret.begin(), ret.end(), *sl);
1578 variableAnswer=true;
1579 }
1580 }
1581
1582 bool needCommit = false;
1583 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
1584 if( ! DNSSECOK &&
1585 ( i->d_type == QType::NSEC3 ||
1586 (
1587 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1588 (
1589 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1590 i->d_place != DNSResourceRecord::ANSWER
1591 )
1592 )
1593 )
1594 ) {
1595 continue;
1596 }
1597
1598 if (!addRecordToPacket(pw, *i, minTTL, dc->d_ttlCap, maxanswersize)) {
1599 needCommit = false;
1600 break;
1601 }
1602 needCommit = true;
1603
1604 #ifdef NOD_ENABLED
1605 bool udr = false;
1606 if (g_udrEnabled) {
1607 udr = udrCheckUniqueDNSRecord(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, *i);
1608 if (!hasUDR && udr)
1609 hasUDR = true;
1610 }
1611 #endif /* NOD ENABLED */
1612
1613 #ifdef HAVE_PROTOBUF
1614 if (t_protobufServers) {
1615 #ifdef NOD_ENABLED
1616 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes, udr);
1617 #else
1618 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes);
1619 #endif /* NOD_ENABLED */
1620 }
1621 #endif
1622 }
1623 if(needCommit)
1624 pw.commit();
1625 }
1626 sendit:;
1627
1628 if(g_useIncomingECS && dc->d_ecsFound && !sr.wasVariable() && !variableAnswer) {
1629 // cerr<<"Stuffing in a 0 scope because answer is static"<<endl;
1630 EDNSSubnetOpts eo;
1631 eo.source = dc->d_ednssubnet.source;
1632 ComboAddress sa;
1633 sa.reset();
1634 sa.sin4.sin_family = eo.source.getNetwork().sin4.sin_family;
1635 eo.scope = Netmask(sa, 0);
1636
1637 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::ECS, makeEDNSSubnetOptsString(eo)));
1638 }
1639
1640 if (haveEDNS) {
1641 /* we try to add the EDNS OPT RR even for truncated answers,
1642 as rfc6891 states:
1643 "The minimal response MUST be the DNS header, question section, and an
1644 OPT record. This MUST also occur when a truncated response (using
1645 the DNS header's TC bit) is returned."
1646 */
1647 pw.addOpt(512, ednsExtRCode, DNSSECOK ? EDNSOpts::DNSSECOK : 0, returnedEdnsOptions);
1648 pw.commit();
1649 }
1650
1651 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
1652 updateResponseStats(res, dc->d_source, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1653 #ifdef NOD_ENABLED
1654 bool nod = false;
1655 if (g_nodEnabled) {
1656 if (nodCheckNewDomain(dc->d_mdp.d_qname))
1657 nod = true;
1658 }
1659 #endif /* NOD_ENABLED */
1660 #ifdef HAVE_PROTOBUF
1661 if (t_protobufServers && !(luaconfsLocal->protobufExportConfig.taggedOnly && (!appliedPolicy.d_name || appliedPolicy.d_name->empty()) && dc->d_policyTags.empty())) {
1662 pbMessage->setBytes(packet.size());
1663 pbMessage->setResponseCode(pw.getHeader()->rcode);
1664 if (appliedPolicy.d_name) {
1665 pbMessage->setAppliedPolicy(*appliedPolicy.d_name);
1666 pbMessage->setAppliedPolicyType(appliedPolicy.d_type);
1667 }
1668 pbMessage->setPolicyTags(dc->d_policyTags);
1669 if (g_useKernelTimestamp && dc->d_kernelTimestamp.tv_sec) {
1670 pbMessage->setQueryTime(dc->d_kernelTimestamp.tv_sec, dc->d_kernelTimestamp.tv_usec);
1671 }
1672 else {
1673 pbMessage->setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1674 }
1675 pbMessage->setRequestorId(dq.requestorId);
1676 pbMessage->setDeviceId(dq.deviceId);
1677 pbMessage->setDeviceName(dq.deviceName);
1678 #ifdef NOD_ENABLED
1679 if (g_nodEnabled) {
1680 if (nod) {
1681 pbMessage->setNOD(true);
1682 pbMessage->addPolicyTag(g_nod_pbtag);
1683 }
1684 if (hasUDR) {
1685 pbMessage->addPolicyTag(g_udr_pbtag);
1686 }
1687 }
1688 #endif /* NOD_ENABLED */
1689 if (dc->d_logResponse) {
1690 protobufLogResponse(*pbMessage);
1691 }
1692 #ifdef NOD_ENABLED
1693 if (g_nodEnabled) {
1694 pbMessage->setNOD(false);
1695 pbMessage->clearUDR();
1696 if (nod)
1697 pbMessage->removePolicyTag(g_nod_pbtag);
1698 if (hasUDR)
1699 pbMessage->removePolicyTag(g_udr_pbtag);
1700 }
1701 #endif /* NOD_ENABLED */
1702 }
1703 #endif
1704 if(!dc->d_tcp) {
1705 struct msghdr msgh;
1706 struct iovec iov;
1707 cmsgbuf_aligned cbuf;
1708 fillMSGHdr(&msgh, &iov, &cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
1709 msgh.msg_control=NULL;
1710
1711 if(g_fromtosockets.count(dc->d_socket)) {
1712 addCMsgSrcAddr(&msgh, &cbuf, &dc->d_local, 0);
1713 }
1714 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors) {
1715 int err = errno;
1716 g_log << Logger::Warning << "Sending UDP reply to client " << dc->getRemote() << " failed with: "
1717 << strerror(err) << endl;
1718 }
1719
1720 if(variableAnswer || sr.wasVariable()) {
1721 g_stats.variableResponses++;
1722 }
1723 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
1724 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, std::move(dc->d_query), dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
1725 string((const char*)&*packet.begin(), packet.size()),
1726 g_now.tv_sec,
1727 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
1728 min(minTTL,SyncRes::s_packetcachettl),
1729 dq.validationState,
1730 dc->d_ecsBegin,
1731 dc->d_ecsEnd,
1732 std::move(pbMessage));
1733 }
1734 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
1735 }
1736 else {
1737 char buf[2];
1738 buf[0]=packet.size()/256;
1739 buf[1]=packet.size()%256;
1740
1741 Utility::iovec iov[2];
1742
1743 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1744 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
1745
1746 int wret=Utility::writev(dc->d_socket, iov, 2);
1747 bool hadError=true;
1748
1749 if(wret == 0)
1750 g_log<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
1751 else if(wret < 0 ) {
1752 int err = errno;
1753 g_log << Logger::Error << "Error writing TCP answer to " << dc->getRemote() << ": " << strerror(err) << endl;
1754 } else if((unsigned int)wret != 2 + packet.size())
1755 g_log<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
1756 else
1757 hadError=false;
1758
1759 // update tcp connection status, closing if needed and doing the fd multiplexer accounting
1760 if (dc->d_tcpConnection->d_requestsInFlight > 0) {
1761 dc->d_tcpConnection->d_requestsInFlight--;
1762 }
1763
1764 // In the code below, we try to remove the fd from the set, but
1765 // we don't know if another mthread already did the remove, so we can get a
1766 // "Tried to remove unlisted fd" exception. Not that an inflight < limit test
1767 // will not work since we do not know if the other mthread got an error or not.
1768 if(hadError) {
1769 try {
1770 t_fdm->removeReadFD(dc->d_socket);
1771 }
1772 catch (FDMultiplexerException &) {
1773 }
1774 dc->d_socket = -1;
1775 }
1776 else {
1777 dc->d_tcpConnection->queriesCount++;
1778 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1779 try {
1780 t_fdm->removeReadFD(dc->d_socket);
1781 }
1782 catch (FDMultiplexerException &) {
1783 }
1784 dc->d_socket = -1;
1785 }
1786 else {
1787 Utility::gettimeofday(&g_now, 0); // needs to be updated
1788 struct timeval ttd = g_now;
1789 // If we cross from max to max-1 in flight requests, the fd was not listened to, add it back
1790 if (dc->d_tcpConnection->d_requestsInFlight == TCPConnection::s_maxInFlight - 1) {
1791 // A read error might have happened. If we add the fd back, it will most likely error again.
1792 // This is not a big issue, the next handleTCPClientReadable() will see another read error
1793 // and take action.
1794 ttd.tv_sec += g_tcpTimeout;
1795 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection, &ttd);
1796 } else {
1797 // fd might have been removed by read error code, so expect an exception
1798 try {
1799 t_fdm->setReadTTD(dc->d_socket, ttd, g_tcpTimeout);
1800 }
1801 catch (FDMultiplexerException &) {
1802 }
1803 }
1804 }
1805 }
1806 }
1807 float spent=makeFloat(sr.getNow()-dc->d_now);
1808 if(!g_quiet) {
1809 g_log<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1810 g_log<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
1811 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
1812 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
1813
1814 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1815 g_log<< ", dnssec="<<vStates[sr.getValidationState()];
1816 }
1817
1818 g_log<<endl;
1819
1820 }
1821
1822 if (sr.d_outqueries || sr.d_authzonequeries) {
1823 t_RC->cacheMisses++;
1824 }
1825 else {
1826 t_RC->cacheHits++;
1827 }
1828
1829 if(spent < 0.001)
1830 g_stats.answers0_1++;
1831 else if(spent < 0.010)
1832 g_stats.answers1_10++;
1833 else if(spent < 0.1)
1834 g_stats.answers10_100++;
1835 else if(spent < 1.0)
1836 g_stats.answers100_1000++;
1837 else
1838 g_stats.answersSlow++;
1839
1840 uint64_t newLat=(uint64_t)(spent*1000000);
1841 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
1842 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
1843 // no worries, we do this for packet cache hits elsewhere
1844
1845 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
1846 if(ourtime < 1)
1847 g_stats.ourtime0_1++;
1848 else if(ourtime < 2)
1849 g_stats.ourtime1_2++;
1850 else if(ourtime < 4)
1851 g_stats.ourtime2_4++;
1852 else if(ourtime < 8)
1853 g_stats.ourtime4_8++;
1854 else if(ourtime < 16)
1855 g_stats.ourtime8_16++;
1856 else if(ourtime < 32)
1857 g_stats.ourtime16_32++;
1858 else {
1859 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1860 g_stats.ourtimeSlow++;
1861 }
1862 if(ourtime >= 0.0) {
1863 newLat=ourtime*1000; // usec
1864 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
1865 }
1866 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
1867 }
1868 catch(PDNSException &ae) {
1869 g_log<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
1870 }
1871 catch(const MOADNSException &mde) {
1872 g_log<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<mde.what()<<endl;
1873 }
1874 catch(std::exception& e) {
1875 g_log<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
1876
1877 // Luawrapper nests the exception from Lua, so we unnest it here
1878 try {
1879 std::rethrow_if_nested(e);
1880 } catch(const std::exception& ne) {
1881 g_log<<". Extra info: "<<ne.what();
1882 } catch(...) {}
1883
1884 g_log<<endl;
1885 }
1886 catch(...) {
1887 g_log<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
1888 }
1889
1890 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
1891 }
1892
1893 static void makeControlChannelSocket(int processNum=-1)
1894 {
1895 string sockname=::arg()["socket-dir"]+"/"+s_programname;
1896 if(processNum >= 0)
1897 sockname += "."+std::to_string(processNum);
1898 sockname+=".controlsocket";
1899 s_rcc.listen(sockname);
1900
1901 int sockowner = -1;
1902 int sockgroup = -1;
1903
1904 if (!::arg().isEmpty("socket-group"))
1905 sockgroup=::arg().asGid("socket-group");
1906 if (!::arg().isEmpty("socket-owner"))
1907 sockowner=::arg().asUid("socket-owner");
1908
1909 if (sockgroup > -1 || sockowner > -1) {
1910 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1911 unixDie("Failed to chown control socket");
1912 }
1913 }
1914
1915 // do mode change if socket-mode is given
1916 if(!::arg().isEmpty("socket-mode")) {
1917 mode_t sockmode=::arg().asMode("socket-mode");
1918 if(chmod(sockname.c_str(), sockmode) < 0) {
1919 unixDie("Failed to chmod control socket");
1920 }
1921 }
1922 }
1923
1924 static void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass,
1925 bool& foundECS, EDNSSubnetOpts* ednssubnet, EDNSOptionViewMap* options,
1926 bool& foundXPF, ComboAddress* xpfSource, ComboAddress* xpfDest)
1927 {
1928 const bool lookForXPF = xpfSource != nullptr && g_xpfRRCode != 0;
1929 const bool lookForECS = ednssubnet != nullptr;
1930 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(question.c_str());
1931 size_t questionLen = question.length();
1932 unsigned int consumed=0;
1933 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1934
1935 size_t pos= sizeof(dnsheader)+consumed+4;
1936 const size_t headerSize = /* root */ 1 + sizeof(dnsrecordheader);
1937 const uint16_t arcount = ntohs(dh->arcount);
1938
1939 for (uint16_t arpos = 0; arpos < arcount && questionLen > (pos + headerSize) && ((lookForECS && !foundECS) || (lookForXPF && !foundXPF)); arpos++) {
1940 if (question.at(pos) != 0) {
1941 /* not an OPT or a XPF, bye. */
1942 return;
1943 }
1944
1945 pos += 1;
1946 const dnsrecordheader* drh = reinterpret_cast<const dnsrecordheader*>(&question.at(pos));
1947 pos += sizeof(dnsrecordheader);
1948
1949 if (pos >= questionLen) {
1950 return;
1951 }
1952
1953 /* OPT root label (1) followed by type (2) */
1954 if(lookForECS && ntohs(drh->d_type) == QType::OPT) {
1955 if (!options) {
1956 char* ecsStart = nullptr;
1957 size_t ecsLen = 0;
1958 /* we need to pass the record len */
1959 int res = getEDNSOption(const_cast<char*>(reinterpret_cast<const char*>(&question.at(pos - sizeof(drh->d_clen)))), questionLen - pos + sizeof(drh->d_clen), EDNSOptionCode::ECS, &ecsStart, &ecsLen);
1960 if (res == 0 && ecsLen > 4) {
1961 EDNSSubnetOpts eso;
1962 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1963 *ednssubnet=eso;
1964 foundECS = true;
1965 }
1966 }
1967 }
1968 else {
1969 /* we need to pass the record len */
1970 int res = getEDNSOptions(reinterpret_cast<const char*>(&question.at(pos -sizeof(drh->d_clen))), questionLen - pos + (sizeof(drh->d_clen)), *options);
1971 if (res == 0) {
1972 const auto& it = options->find(EDNSOptionCode::ECS);
1973 if (it != options->end() && !it->second.values.empty() && it->second.values.at(0).content != nullptr && it->second.values.at(0).size > 0) {
1974 EDNSSubnetOpts eso;
1975 if(getEDNSSubnetOptsFromString(it->second.values.at(0).content, it->second.values.at(0).size, &eso)) {
1976 *ednssubnet=eso;
1977 foundECS = true;
1978 }
1979 }
1980 }
1981 }
1982 }
1983 else if (lookForXPF && ntohs(drh->d_type) == g_xpfRRCode && ntohs(drh->d_class) == QClass::IN && drh->d_ttl == 0) {
1984 if ((questionLen - pos) < ntohs(drh->d_clen)) {
1985 return;
1986 }
1987
1988 foundXPF = parseXPFPayload(reinterpret_cast<const char*>(&question.at(pos)), ntohs(drh->d_clen), *xpfSource, xpfDest);
1989 }
1990
1991 pos += ntohs(drh->d_clen);
1992 }
1993 }
1994
1995 static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
1996 {
1997 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
1998
1999 if(conn->state==TCPConnection::BYTE0) {
2000 ssize_t bytes=recv(conn->getFD(), &conn->data[0], 2, 0);
2001 if(bytes==1)
2002 conn->state=TCPConnection::BYTE1;
2003 if(bytes==2) {
2004 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2005 conn->data.resize(conn->qlen);
2006 conn->bytesread=0;
2007 conn->state=TCPConnection::GETQUESTION;
2008 }
2009 if(!bytes || bytes < 0) {
2010 t_fdm->removeReadFD(fd);
2011 return;
2012 }
2013 }
2014 else if(conn->state==TCPConnection::BYTE1) {
2015 ssize_t bytes=recv(conn->getFD(), &conn->data[1], 1, 0);
2016 if(bytes==1) {
2017 conn->state=TCPConnection::GETQUESTION;
2018 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2019 conn->data.resize(conn->qlen);
2020 conn->bytesread=0;
2021 }
2022 if(!bytes || bytes < 0) {
2023 if(g_logCommonErrors)
2024 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected after first byte"<<endl;
2025 t_fdm->removeReadFD(fd);
2026 return;
2027 }
2028 }
2029 else if(conn->state==TCPConnection::GETQUESTION) {
2030 ssize_t bytes=recv(conn->getFD(), &conn->data[conn->bytesread], conn->qlen - conn->bytesread, 0);
2031 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
2032 if(g_logCommonErrors) {
2033 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected while reading question body"<<endl;
2034 }
2035 t_fdm->removeReadFD(fd);
2036 return;
2037 }
2038 conn->bytesread+=(uint16_t)bytes;
2039 if(conn->bytesread==conn->qlen) {
2040 conn->state = TCPConnection::BYTE0;
2041 std::unique_ptr<DNSComboWriter> dc;
2042 try {
2043 dc=std::unique_ptr<DNSComboWriter>(new DNSComboWriter(conn->data, g_now));
2044 }
2045 catch(const MOADNSException &mde) {
2046 g_stats.clientParseError++;
2047 if(g_logCommonErrors)
2048 g_log<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
2049 return;
2050 }
2051 dc->d_tcpConnection = conn; // carry the torch
2052 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
2053 dc->d_tcp=true;
2054 dc->setRemote(conn->d_remote);
2055 dc->setSource(conn->d_remote);
2056 ComboAddress dest;
2057 dest.reset();
2058 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
2059 socklen_t len = dest.getSocklen();
2060 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
2061 dc->setLocal(dest);
2062 dc->setDestination(dest);
2063 DNSName qname;
2064 uint16_t qtype=0;
2065 uint16_t qclass=0;
2066 bool needECS = false;
2067 bool needXPF = g_XPFAcl.match(conn->d_remote);
2068 string requestorId;
2069 string deviceId;
2070 string deviceName;
2071 bool logQuery = false;
2072 #ifdef HAVE_PROTOBUF
2073 auto luaconfsLocal = g_luaconfs.getLocal();
2074 if (checkProtobufExport(luaconfsLocal)) {
2075 needECS = true;
2076 }
2077 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
2078 dc->d_logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
2079 #endif /* HAVE_PROTOBUF */
2080
2081 #ifdef HAVE_FSTRM
2082 checkFrameStreamExport(luaconfsLocal);
2083 #endif
2084
2085 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag_ffi || t_pdl->d_gettag))) {
2086
2087 try {
2088 EDNSOptionViewMap ednsOptions;
2089 bool xpfFound = false;
2090 dc->d_ecsParsed = true;
2091 dc->d_ecsFound = false;
2092 getQNameAndSubnet(conn->data, &qname, &qtype, &qclass,
2093 dc->d_ecsFound, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2094 xpfFound, needXPF ? &dc->d_source : nullptr, needXPF ? &dc->d_destination : nullptr);
2095
2096 if(t_pdl) {
2097 try {
2098 if (t_pdl->d_gettag_ffi) {
2099 dc->d_tag = t_pdl->gettag_ffi(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_records, dc->d_data, ednsOptions, true, requestorId, deviceId, deviceName, dc->d_rcode, dc->d_ttlCap, dc->d_variable, logQuery, dc->d_logResponse, dc->d_followCNAMERecords);
2100 }
2101 else if (t_pdl->d_gettag) {
2102 dc->d_tag = t_pdl->gettag(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, deviceName);
2103 }
2104 }
2105 catch(const std::exception& e) {
2106 if(g_logCommonErrors)
2107 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
2108 }
2109 }
2110 }
2111 catch(const std::exception& e)
2112 {
2113 if(g_logCommonErrors)
2114 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
2115 }
2116 }
2117
2118 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(&conn->data[0]);
2119
2120 #ifdef HAVE_PROTOBUF
2121 if(t_protobufServers || t_outgoingProtobufServers) {
2122 dc->d_requestorId = requestorId;
2123 dc->d_deviceId = deviceId;
2124 dc->d_deviceName = deviceName;
2125 dc->d_uuid = getUniqueID();
2126 }
2127
2128 if(t_protobufServers) {
2129 try {
2130
2131 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && dc->d_policyTags.empty())) {
2132 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, dc->d_source, dc->d_destination, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId, dc->d_deviceName);
2133 }
2134 }
2135 catch(std::exception& e) {
2136 if(g_logCommonErrors)
2137 g_log<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
2138 }
2139 }
2140 #endif
2141 if(t_pdl) {
2142 if(t_pdl->ipfilter(dc->d_source, dc->d_destination, *dh)) {
2143 if(!g_quiet)
2144 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED TCP question from "<<dc->d_source.toStringWithPort()<<(dc->d_source != dc->d_remote ? " (via "+dc->d_remote.toStringWithPort()+")" : "")<<" based on policy"<<endl;
2145 g_stats.policyDrops++;
2146 return;
2147 }
2148 }
2149
2150 if(dc->d_mdp.d_header.qr) {
2151 g_stats.ignoredCount++;
2152 if(g_logCommonErrors) {
2153 g_log<<Logger::Error<<"Ignoring answer from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
2154 }
2155 return;
2156 }
2157 if(dc->d_mdp.d_header.opcode) {
2158 g_stats.ignoredCount++;
2159 if(g_logCommonErrors) {
2160 g_log<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
2161 }
2162 return;
2163 }
2164 else if (dh->qdcount == 0) {
2165 g_stats.emptyQueriesCount++;
2166 if(g_logCommonErrors) {
2167 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<< dc->getRemote() <<" on server socket!"<<endl;
2168 }
2169 return;
2170 }
2171 else {
2172 ++g_stats.qcounter;
2173 ++g_stats.tcpqcounter;
2174 ++conn->d_requestsInFlight;
2175 if (conn->d_requestsInFlight >= TCPConnection::s_maxInFlight) {
2176 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
2177 } else {
2178 Utility::gettimeofday(&g_now, 0); // needed?
2179 struct timeval ttd = g_now;
2180 t_fdm->setReadTTD(fd, ttd, g_tcpTimeout);
2181 }
2182 MT->makeThread(startDoResolve, dc.release()); // deletes dc
2183 return;
2184 }
2185 }
2186 }
2187 }
2188
2189 //! Handle new incoming TCP connection
2190 static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
2191 {
2192 ComboAddress addr;
2193 socklen_t addrlen=sizeof(addr);
2194 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
2195 if(newsock>=0) {
2196 if(MT->numProcesses() > g_maxMThreads) {
2197 g_stats.overCapacityDrops++;
2198 try {
2199 closesocket(newsock);
2200 }
2201 catch(const PDNSException& e) {
2202 g_log<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
2203 }
2204 return;
2205 }
2206
2207 if(t_remotes)
2208 t_remotes->push_back(addr);
2209 if(t_allowFrom && !t_allowFrom->match(&addr)) {
2210 if(!g_quiet)
2211 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2212
2213 g_stats.unauthorizedTCP++;
2214 try {
2215 closesocket(newsock);
2216 }
2217 catch(const PDNSException& e) {
2218 g_log<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
2219 }
2220 return;
2221 }
2222 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
2223 g_stats.tcpClientOverflow++;
2224 try {
2225 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
2226 }
2227 catch(const PDNSException& e) {
2228 g_log<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
2229 }
2230 return;
2231 }
2232
2233 setNonBlocking(newsock);
2234 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
2235 tc->state=TCPConnection::BYTE0;
2236
2237 struct timeval ttd;
2238 Utility::gettimeofday(&ttd, 0);
2239 ttd.tv_sec += g_tcpTimeout;
2240
2241 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc, &ttd);
2242 }
2243 }
2244
2245 static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
2246 {
2247 gettimeofday(&g_now, 0);
2248 if (tv.tv_sec) {
2249 struct timeval diff = g_now - tv;
2250 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
2251
2252 if(delta > 1000.0) {
2253 g_stats.tooOldDrops++;
2254 return nullptr;
2255 }
2256 }
2257
2258 ++g_stats.qcounter;
2259 if(fromaddr.sin4.sin_family==AF_INET6)
2260 g_stats.ipv6qcounter++;
2261
2262 string response;
2263 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
2264 unsigned int ctag=0;
2265 uint32_t qhash = 0;
2266 bool needECS = false;
2267 bool needXPF = g_XPFAcl.match(fromaddr);
2268 std::vector<std::string> policyTags;
2269 LuaContext::LuaObject data;
2270 ComboAddress source = fromaddr;
2271 ComboAddress destination = destaddr;
2272 string requestorId;
2273 string deviceId;
2274 string deviceName;
2275 bool logQuery = false;
2276 bool logResponse = false;
2277 #ifdef HAVE_PROTOBUF
2278 boost::uuids::uuid uniqueId;
2279 auto luaconfsLocal = g_luaconfs.getLocal();
2280 if (checkProtobufExport(luaconfsLocal)) {
2281 uniqueId = getUniqueID();
2282 needECS = true;
2283 } else if (checkOutgoingProtobufExport(luaconfsLocal)) {
2284 uniqueId = getUniqueID();
2285 }
2286 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
2287 logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
2288 #endif
2289 #ifdef HAVE_FSTRM
2290 checkFrameStreamExport(luaconfsLocal);
2291 #endif
2292 EDNSSubnetOpts ednssubnet;
2293 bool ecsFound = false;
2294 bool ecsParsed = false;
2295 uint16_t ecsBegin = 0;
2296 uint16_t ecsEnd = 0;
2297 std::vector<DNSRecord> records;
2298 boost::optional<int> rcode = boost::none;
2299 uint32_t ttlCap = std::numeric_limits<uint32_t>::max();
2300 bool variable = false;
2301 bool followCNAMEs = false;
2302 try {
2303 DNSName qname;
2304 uint16_t qtype=0;
2305 uint16_t qclass=0;
2306 uint32_t age;
2307 bool qnameParsed=false;
2308 #ifdef MALLOC_TRACE
2309 /*
2310 static uint64_t last=0;
2311 if(!last)
2312 g_mtracer->clearAllocators();
2313 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
2314 last=g_mtracer->getAllocs();
2315 cout<<g_mtracer->topAllocatorsString()<<endl;
2316 g_mtracer->clearAllocators();
2317 */
2318 #endif
2319
2320 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag || t_pdl->d_gettag_ffi))) {
2321 try {
2322 EDNSOptionViewMap ednsOptions;
2323 bool xpfFound = false;
2324
2325 ecsFound = false;
2326
2327 getQNameAndSubnet(question, &qname, &qtype, &qclass,
2328 ecsFound, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2329 xpfFound, needXPF ? &source : nullptr, needXPF ? &destination : nullptr);
2330
2331 qnameParsed = true;
2332 ecsParsed = true;
2333
2334 if(t_pdl) {
2335 try {
2336 if (t_pdl->d_gettag_ffi) {
2337 ctag = t_pdl->gettag_ffi(source, ednssubnet.source, destination, qname, qtype, &policyTags, records, data, ednsOptions, false, requestorId, deviceId, deviceName, rcode, ttlCap, variable, logQuery, logResponse, followCNAMEs);
2338 }
2339 else if (t_pdl->d_gettag) {
2340 ctag = t_pdl->gettag(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, deviceName);
2341 }
2342 }
2343 catch(const std::exception& e) {
2344 if(g_logCommonErrors)
2345 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
2346 }
2347 }
2348 }
2349 catch(const std::exception& e)
2350 {
2351 if(g_logCommonErrors)
2352 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
2353 }
2354 }
2355
2356 bool cacheHit = false;
2357 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
2358 #ifdef HAVE_PROTOBUF
2359 if (t_protobufServers) {
2360 pbMessage = RecProtoBufMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
2361 pbMessage->setServerIdentity(SyncRes::s_serverID);
2362 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && policyTags.empty())) {
2363 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, source, destination, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId, deviceName);
2364 }
2365 }
2366 #endif /* HAVE_PROTOBUF */
2367
2368 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
2369 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
2370 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
2371 vState valState;
2372 if (qnameParsed) {
2373 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
2374 }
2375 else {
2376 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, &qtype, &qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
2377 }
2378
2379 if (cacheHit) {
2380 if(valState == Bogus) {
2381 if(t_bogusremotes)
2382 t_bogusremotes->push_back(source);
2383 if(t_bogusqueryring)
2384 t_bogusqueryring->push_back(make_pair(qname, qtype));
2385 }
2386
2387 #ifdef HAVE_PROTOBUF
2388 if(t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && pbMessage->getAppliedPolicy().empty() && pbMessage->getPolicyTags().empty())) {
2389 Netmask requestorNM(source, source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
2390 ComboAddress requestor = requestorNM.getMaskedNetwork();
2391 requestor.setPort(source.getPort());
2392 pbMessage->update(uniqueId, &requestor, &destination, false, dh->id);
2393 pbMessage->setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIPv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
2394 if (g_useKernelTimestamp && tv.tv_sec) {
2395 pbMessage->setQueryTime(tv.tv_sec, tv.tv_usec);
2396 }
2397 else {
2398 pbMessage->setQueryTime(g_now.tv_sec, g_now.tv_usec);
2399 }
2400 pbMessage->setRequestorId(requestorId);
2401 pbMessage->setDeviceId(deviceId);
2402 pbMessage->setDeviceName(deviceName);
2403 protobufLogResponse(*pbMessage);
2404 }
2405 #endif /* HAVE_PROTOBUF */
2406 if(!g_quiet)
2407 g_log<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<endl;
2408
2409 g_stats.packetCacheHits++;
2410 SyncRes::s_queries++;
2411 ageDNSPacket(response, age);
2412 struct msghdr msgh;
2413 struct iovec iov;
2414 cmsgbuf_aligned cbuf;
2415 fillMSGHdr(&msgh, &iov, &cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
2416 msgh.msg_control=NULL;
2417
2418 if(g_fromtosockets.count(fd)) {
2419 addCMsgSrcAddr(&msgh, &cbuf, &destaddr, 0);
2420 }
2421 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors) {
2422 int err = errno;
2423 g_log << Logger::Warning << "Sending UDP reply to client " << source.toStringWithPort()
2424 << (source != fromaddr ? " (via " + fromaddr.toStringWithPort() + ")" : "") << " failed with: "
2425 << strerror(err) << endl;
2426 }
2427 if(response.length() >= sizeof(struct dnsheader)) {
2428 struct dnsheader tmpdh;
2429 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
2430 updateResponseStats(tmpdh.rcode, source, response.length(), 0, 0);
2431 }
2432 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
2433 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
2434 return 0;
2435 }
2436 }
2437 catch(std::exception& e) {
2438 g_log<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
2439 return 0;
2440 }
2441
2442 if(t_pdl) {
2443 if(t_pdl->ipfilter(source, destination, *dh)) {
2444 if(!g_quiet)
2445 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" based on policy"<<endl;
2446 g_stats.policyDrops++;
2447 return 0;
2448 }
2449 }
2450
2451 if(MT->numProcesses() > g_maxMThreads) {
2452 if(!g_quiet)
2453 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<", over capacity"<<endl;
2454
2455 g_stats.overCapacityDrops++;
2456 return 0;
2457 }
2458
2459 auto dc = std::unique_ptr<DNSComboWriter>(new DNSComboWriter(question, g_now, std::move(policyTags), std::move(data), std::move(records)));
2460 dc->setSocket(fd);
2461 dc->d_tag=ctag;
2462 dc->d_qhash=qhash;
2463 dc->setRemote(fromaddr);
2464 dc->setSource(source);
2465 dc->setLocal(destaddr);
2466 dc->setDestination(destination);
2467 dc->d_tcp=false;
2468 dc->d_ecsFound = ecsFound;
2469 dc->d_ecsParsed = ecsParsed;
2470 dc->d_ecsBegin = ecsBegin;
2471 dc->d_ecsEnd = ecsEnd;
2472 dc->d_ednssubnet = ednssubnet;
2473 dc->d_ttlCap = ttlCap;
2474 dc->d_variable = variable;
2475 dc->d_followCNAMERecords = followCNAMEs;
2476 dc->d_rcode = rcode;
2477 dc->d_logResponse = logResponse;
2478 #ifdef HAVE_PROTOBUF
2479 if (t_protobufServers || t_outgoingProtobufServers) {
2480 dc->d_uuid = std::move(uniqueId);
2481 }
2482 dc->d_requestorId = requestorId;
2483 dc->d_deviceId = deviceId;
2484 dc->d_deviceName = deviceName;
2485 dc->d_kernelTimestamp = tv;
2486 #endif
2487
2488 MT->makeThread(startDoResolve, (void*) dc.release()); // deletes dc
2489 return 0;
2490 }
2491
2492
2493 static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
2494 {
2495 ssize_t len;
2496 static const size_t maxIncomingQuerySize = 512;
2497 static thread_local std::string data;
2498 ComboAddress fromaddr;
2499 struct msghdr msgh;
2500 struct iovec iov;
2501 cmsgbuf_aligned cbuf;
2502 bool firstQuery = true;
2503
2504 for(size_t queriesCounter = 0; queriesCounter < s_maxUDPQueriesPerRound; queriesCounter++) {
2505 data.resize(maxIncomingQuerySize);
2506 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
2507 fillMSGHdr(&msgh, &iov, &cbuf, sizeof(cbuf), &data[0], data.size(), &fromaddr);
2508
2509 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
2510
2511 firstQuery = false;
2512
2513 if (static_cast<size_t>(len) < sizeof(dnsheader)) {
2514 g_stats.ignoredCount++;
2515 if (!g_quiet) {
2516 g_log<<Logger::Error<<"Ignoring too-short ("<<std::to_string(len)<<") query from "<<fromaddr.toString()<<endl;
2517 }
2518 return;
2519 }
2520
2521 if (msgh.msg_flags & MSG_TRUNC) {
2522 g_stats.truncatedDrops++;
2523 if (!g_quiet) {
2524 g_log<<Logger::Error<<"Ignoring truncated query from "<<fromaddr.toString()<<endl;
2525 }
2526 return;
2527 }
2528
2529 if(t_remotes) {
2530 t_remotes->push_back(fromaddr);
2531 }
2532
2533 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
2534 if(!g_quiet) {
2535 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2536 }
2537
2538 g_stats.unauthorizedUDP++;
2539 return;
2540 }
2541 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
2542 if(!fromaddr.sin4.sin_port) { // also works for IPv6
2543 if(!g_quiet) {
2544 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
2545 }
2546
2547 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
2548 return;
2549 }
2550
2551 try {
2552 data.resize(static_cast<size_t>(len));
2553 dnsheader* dh=(dnsheader*)&data[0];
2554
2555 if(dh->qr) {
2556 g_stats.ignoredCount++;
2557 if(g_logCommonErrors) {
2558 g_log<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
2559 }
2560 }
2561 else if(dh->opcode) {
2562 g_stats.ignoredCount++;
2563 if(g_logCommonErrors) {
2564 g_log<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
2565 }
2566 }
2567 else if (dh->qdcount == 0) {
2568 g_stats.emptyQueriesCount++;
2569 if(g_logCommonErrors) {
2570 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<<fromaddr.toString()<<" on server socket!"<<endl;
2571 }
2572 }
2573 else {
2574 struct timeval tv={0,0};
2575 HarvestTimestamp(&msgh, &tv);
2576 ComboAddress dest;
2577 dest.reset(); // this makes sure we ignore this address if not returned by recvmsg above
2578 auto loc = rplookup(g_listenSocketsAddresses, fd);
2579 if(HarvestDestinationAddress(&msgh, &dest)) {
2580 // but.. need to get port too
2581 if(loc) {
2582 dest.sin4.sin_port = loc->sin4.sin_port;
2583 }
2584 }
2585 else {
2586 if(loc) {
2587 dest = *loc;
2588 }
2589 else {
2590 dest.sin4.sin_family = fromaddr.sin4.sin_family;
2591 socklen_t slen = dest.getSocklen();
2592 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
2593 }
2594 }
2595
2596 if(g_weDistributeQueries) {
2597 distributeAsyncFunction(data, boost::bind(doProcessUDPQuestion, data, fromaddr, dest, tv, fd));
2598 }
2599 else {
2600 ++s_threadInfos[t_id].numberOfDistributedQueries;
2601 doProcessUDPQuestion(data, fromaddr, dest, tv, fd);
2602 }
2603 }
2604 }
2605 catch(const MOADNSException &mde) {
2606 g_stats.clientParseError++;
2607 if(g_logCommonErrors) {
2608 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
2609 }
2610 }
2611 catch(const std::runtime_error& e) {
2612 g_stats.clientParseError++;
2613 if(g_logCommonErrors) {
2614 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
2615 }
2616 }
2617 }
2618 else {
2619 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
2620 if(firstQuery && errno == EAGAIN) {
2621 g_stats.noPacketError++;
2622 }
2623
2624 break;
2625 }
2626 }
2627 }
2628
2629 static void makeTCPServerSockets(deferredAdd_t& deferredAdds, std::set<int>& tcpSockets)
2630 {
2631 int fd;
2632 vector<string>locals;
2633 stringtok(locals,::arg()["local-address"]," ,");
2634
2635 if(locals.empty())
2636 throw PDNSException("No local address specified");
2637
2638 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
2639 ServiceTuple st;
2640 st.port=::arg().asNum("local-port");
2641 parseService(*i, st);
2642
2643 ComboAddress sin;
2644
2645 sin.reset();
2646 sin.sin4.sin_family = AF_INET;
2647 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
2648 sin.sin6.sin6_family = AF_INET6;
2649 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
2650 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
2651 }
2652
2653 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
2654 if(fd<0)
2655 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
2656
2657 setCloseOnExec(fd);
2658
2659 int tmp=1;
2660 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
2661 g_log<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
2662 exit(1);
2663 }
2664 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
2665 int err = errno;
2666 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(err)<<endl;
2667 }
2668
2669 #ifdef TCP_DEFER_ACCEPT
2670 if(setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
2671 if(i==locals.begin())
2672 g_log<<Logger::Info<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
2673 }
2674 #endif
2675
2676 if( ::arg().mustDo("non-local-bind") )
2677 Utility::setBindAny(AF_INET, fd);
2678
2679 #ifdef SO_REUSEPORT
2680 if(g_reusePort) {
2681 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
2682 throw PDNSException("SO_REUSEPORT: "+stringerror());
2683 }
2684 #endif
2685
2686 if (::arg().asNum("tcp-fast-open") > 0) {
2687 #ifdef TCP_FASTOPEN
2688 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
2689 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
2690 int err = errno;
2691 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(err)<<endl;
2692 }
2693 #else
2694 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
2695 #endif
2696 }
2697
2698 sin.sin4.sin_port = htons(st.port);
2699 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
2700 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
2701 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
2702
2703 setNonBlocking(fd);
2704 setSocketSendBuffer(fd, 65000);
2705 listen(fd, 128);
2706 deferredAdds.push_back(make_pair(fd, handleNewTCPQuestion));
2707 tcpSockets.insert(fd);
2708
2709 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
2710 // - fd is not that which we know here, but returned from accept()
2711 if(sin.sin4.sin_family == AF_INET)
2712 g_log<<Logger::Info<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
2713 else
2714 g_log<<Logger::Info<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
2715 }
2716 }
2717
2718 static void makeUDPServerSockets(deferredAdd_t& deferredAdds)
2719 {
2720 int one=1;
2721 vector<string>locals;
2722 stringtok(locals,::arg()["local-address"]," ,");
2723
2724 if(locals.empty())
2725 throw PDNSException("No local address specified");
2726
2727 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
2728 ServiceTuple st;
2729 st.port=::arg().asNum("local-port");
2730 parseService(*i, st);
2731
2732 ComboAddress sin;
2733
2734 sin.reset();
2735 sin.sin4.sin_family = AF_INET;
2736 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
2737 sin.sin6.sin6_family = AF_INET6;
2738 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
2739 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
2740 }
2741
2742 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
2743 if(fd < 0) {
2744 throw PDNSException("Making a UDP server socket for resolver: "+stringerror());
2745 }
2746 if (!setSocketTimestamps(fd))
2747 g_log<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
2748
2749 if(IsAnyAddress(sin)) {
2750 if(sin.sin4.sin_family == AF_INET)
2751 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
2752 g_fromtosockets.insert(fd);
2753 #ifdef IPV6_RECVPKTINFO
2754 if(sin.sin4.sin_family == AF_INET6)
2755 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
2756 g_fromtosockets.insert(fd);
2757 #endif
2758 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
2759 int err = errno;
2760 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(err)<<endl;
2761 }
2762 }
2763 if( ::arg().mustDo("non-local-bind") )
2764 Utility::setBindAny(AF_INET6, fd);
2765
2766 setCloseOnExec(fd);
2767
2768 setSocketReceiveBuffer(fd, 250000);
2769 sin.sin4.sin_port = htons(st.port);
2770
2771
2772 #ifdef SO_REUSEPORT
2773 if(g_reusePort) {
2774 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
2775 throw PDNSException("SO_REUSEPORT: "+stringerror());
2776 }
2777 #endif
2778
2779 if (sin.isIPv4()) {
2780 try {
2781 setSocketIgnorePMTU(fd);
2782 }
2783 catch(const std::exception& e) {
2784 g_log<<Logger::Warning<<"Failed to set IP_MTU_DISCOVER on UDP server socket: "<<e.what()<<endl;
2785 }
2786 }
2787
2788 socklen_t socklen=sin.getSocklen();
2789 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
2790 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
2791
2792 setNonBlocking(fd);
2793
2794 deferredAdds.push_back(make_pair(fd, handleNewUDPQuestion));
2795 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
2796 if(sin.sin4.sin_family == AF_INET)
2797 g_log<<Logger::Info<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
2798 else
2799 g_log<<Logger::Info<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
2800 }
2801 }
2802
2803 static void daemonize(void)
2804 {
2805 if(fork())
2806 exit(0); // bye bye
2807
2808 setsid();
2809
2810 int i=open("/dev/null",O_RDWR); /* open stdin */
2811 if(i < 0)
2812 g_log<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
2813 else {
2814 dup2(i,0); /* stdin */
2815 dup2(i,1); /* stderr */
2816 dup2(i,2); /* stderr */
2817 close(i);
2818 }
2819 }
2820
2821 static void termIntHandler(int)
2822 {
2823 doExit();
2824 }
2825
2826 static void usr1Handler(int)
2827 {
2828 statsWanted=true;
2829 }
2830
2831 static void usr2Handler(int)
2832 {
2833 g_quiet= !g_quiet;
2834 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
2835 ::arg().set("quiet")=g_quiet ? "" : "no";
2836 }
2837
2838 static void doStats(void)
2839 {
2840 static time_t lastOutputTime;
2841 static uint64_t lastQueryCount;
2842
2843 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
2844 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
2845
2846 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
2847 g_log<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
2848 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
2849 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
2850 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
2851
2852 g_log<<Logger::Notice<<"stats: throttle map: "
2853 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
2854 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<", failed ns: "
2855 << broadcastAccFunction<uint64_t>(pleaseGetFailedServersSize)<<", ednsmap: "
2856 <<broadcastAccFunction<uint64_t>(pleaseGetEDNSStatusesSize)<<endl;
2857 g_log<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2858 g_log<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
2859 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
2860 g_log<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
2861 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
2862
2863 //g_log<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
2864 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
2865
2866 g_log<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
2867 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
2868
2869 size_t idx = 0;
2870 for (const auto& threadInfo : s_threadInfos) {
2871 if(threadInfo.isWorker) {
2872 g_log<<Logger::Notice<<"stats: thread "<<idx<<" has been distributed "<<threadInfo.numberOfDistributedQueries<<" queries"<<endl;
2873 ++idx;
2874 }
2875 }
2876
2877 time_t now = time(0);
2878 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
2879 g_log<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
2880 }
2881 lastOutputTime = now;
2882 lastQueryCount = SyncRes::s_queries;
2883 }
2884 else if(statsWanted)
2885 g_log<<Logger::Notice<<"stats: no stats yet!"<<endl;
2886
2887 statsWanted=false;
2888 }
2889
2890 static void houseKeeping(void *)
2891 {
2892 static thread_local time_t last_rootupdate, last_secpoll, last_trustAnchorUpdate{0};
2893 static thread_local timeval last_prune;
2894 static thread_local int cleanCounter=0;
2895 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
2896 auto luaconfsLocal = g_luaconfs.getLocal();
2897
2898 if (last_trustAnchorUpdate == 0 && !luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0) {
2899 // Loading the Lua config file already "refreshed" the TAs
2900 last_trustAnchorUpdate = g_now.tv_sec + luaconfsLocal->trustAnchorFileInfo.interval * 3600;
2901 }
2902
2903 try {
2904 if(s_running) {
2905 return;
2906 }
2907 s_running=true;
2908
2909 struct timeval now, past;
2910 Utility::gettimeofday(&now, nullptr);
2911 past = now;
2912 past.tv_sec -= 5;
2913 if (last_prune < past) {
2914 t_RC->doPrune(g_maxCacheEntries / g_numThreads); // this function is local to a thread, so fine anyhow
2915 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
2916
2917 SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
2918
2919 time_t limit;
2920 if(!((cleanCounter++)%40)) { // this is a full scan!
2921 limit=now.tv_sec-300;
2922 SyncRes::pruneNSSpeeds(limit);
2923 }
2924 limit = now.tv_sec - SyncRes::s_serverdownthrottletime * 10;
2925 SyncRes::pruneFailedServers(limit);
2926 limit = now.tv_sec - 2*3600;
2927 SyncRes::pruneEDNSStatuses(limit);
2928 SyncRes::pruneThrottledServers();
2929 Utility::gettimeofday(&last_prune, nullptr);
2930 }
2931
2932 if(now.tv_sec - last_rootupdate > 7200) {
2933 int res = SyncRes::getRootNS(g_now, nullptr);
2934 if (!res) {
2935 last_rootupdate=now.tv_sec;
2936 primeRootNSZones(g_dnssecmode != DNSSECMode::Off);
2937 }
2938 }
2939
2940 if(isHandlerThread()) {
2941
2942 if(now.tv_sec - last_secpoll >= 3600) {
2943 try {
2944 doSecPoll(&last_secpoll);
2945 }
2946 catch(const std::exception& e)
2947 {
2948 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
2949 }
2950 catch(const PDNSException& e)
2951 {
2952 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
2953 }
2954 catch(const ImmediateServFailException &e)
2955 {
2956 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
2957 }
2958 catch(const PolicyHitException& e) {
2959 g_log<<Logger::Error<<"Policy hit while performing security poll"<<endl;
2960 }
2961 catch(...)
2962 {
2963 g_log<<Logger::Error<<"Exception while performing security poll"<<endl;
2964 }
2965 }
2966
2967 if (!luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0 &&
2968 g_now.tv_sec - last_trustAnchorUpdate >= (luaconfsLocal->trustAnchorFileInfo.interval * 3600)) {
2969 g_log<<Logger::Debug<<"Refreshing Trust Anchors from file"<<endl;
2970 try {
2971 map<DNSName, dsmap_t> dsAnchors;
2972 if (updateTrustAnchorsFromFile(luaconfsLocal->trustAnchorFileInfo.fname, dsAnchors)) {
2973 g_luaconfs.modify([&dsAnchors](LuaConfigItems& lci) {
2974 lci.dsAnchors = dsAnchors;
2975 });
2976 }
2977 last_trustAnchorUpdate = now.tv_sec;
2978 } catch (const PDNSException &pe) {
2979 g_log<<Logger::Error<<"Unable to update Trust Anchors: "<<pe.reason<<endl;
2980 }
2981 }
2982 }
2983 s_running=false;
2984 }
2985 catch(PDNSException& ae)
2986 {
2987 s_running=false;
2988 g_log<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
2989 throw;
2990 }
2991 }
2992
2993 static void makeThreadPipes()
2994 {
2995 auto pipeBufferSize = ::arg().asNum("distribution-pipe-buffer-size");
2996 if (pipeBufferSize > 0) {
2997 g_log<<Logger::Info<<"Resizing the buffer of the distribution pipe to "<<pipeBufferSize<<endl;
2998 }
2999
3000 /* thread 0 is the handler / SNMP, we start at 1 */
3001 for(unsigned int n = 1; n <= (g_numWorkerThreads + g_numDistributorThreads); ++n) {
3002 auto& threadInfos = s_threadInfos.at(n);
3003
3004 int fd[2];
3005 if(pipe(fd) < 0)
3006 unixDie("Creating pipe for inter-thread communications");
3007
3008 threadInfos.pipes.readToThread = fd[0];
3009 threadInfos.pipes.writeToThread = fd[1];
3010
3011 if(pipe(fd) < 0)
3012 unixDie("Creating pipe for inter-thread communications");
3013
3014 threadInfos.pipes.readFromThread = fd[0];
3015 threadInfos.pipes.writeFromThread = fd[1];
3016
3017 if(pipe(fd) < 0)
3018 unixDie("Creating pipe for inter-thread communications");
3019
3020 threadInfos.pipes.readQueriesToThread = fd[0];
3021 threadInfos.pipes.writeQueriesToThread = fd[1];
3022
3023 if (pipeBufferSize > 0) {
3024 if (!setPipeBufferSize(threadInfos.pipes.writeQueriesToThread, pipeBufferSize)) {
3025 int err = errno;
3026 g_log<<Logger::Warning<<"Error resizing the buffer of the distribution pipe for thread "<<n<<" to "<<pipeBufferSize<<": "<<strerror(err)<<endl;
3027 auto existingSize = getPipeBufferSize(threadInfos.pipes.writeQueriesToThread);
3028 if (existingSize > 0) {
3029 g_log<<Logger::Warning<<"The current size of the distribution pipe's buffer for thread "<<n<<" is "<<existingSize<<endl;
3030 }
3031 }
3032 }
3033
3034 if (!setNonBlocking(threadInfos.pipes.writeQueriesToThread)) {
3035 unixDie("Making pipe for inter-thread communications non-blocking");
3036 }
3037 }
3038 }
3039
3040 struct ThreadMSG
3041 {
3042 pipefunc_t func;
3043 bool wantAnswer;
3044 };
3045
3046 void broadcastFunction(const pipefunc_t& func)
3047 {
3048 /* This function might be called by the worker with t_id 0 during startup
3049 for the initialization of ACLs and domain maps. After that it should only
3050 be called by the handler. */
3051
3052 if (s_threadInfos.empty() && isHandlerThread()) {
3053 /* the handler and distributors will call themselves below, but
3054 during startup we get called while s_threadInfos has not been
3055 populated yet to update the ACL or domain maps, so we need to
3056 handle that case.
3057 */
3058 func();
3059 }
3060
3061 unsigned int n = 0;
3062 for (const auto& threadInfo : s_threadInfos) {
3063 if(n++ == t_id) {
3064 func(); // don't write to ourselves!
3065 continue;
3066 }
3067
3068 ThreadMSG* tmsg = new ThreadMSG();
3069 tmsg->func = func;
3070 tmsg->wantAnswer = true;
3071 if(write(threadInfo.pipes.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
3072 delete tmsg;
3073
3074 unixDie("write to thread pipe returned wrong size or error");
3075 }
3076
3077 string* resp = nullptr;
3078 if(read(threadInfo.pipes.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
3079 unixDie("read from thread pipe returned wrong size or error");
3080
3081 if(resp) {
3082 delete resp;
3083 resp = nullptr;
3084 }
3085 }
3086 }
3087
3088 static bool trySendingQueryToWorker(unsigned int target, ThreadMSG* tmsg)
3089 {
3090 auto& targetInfo = s_threadInfos[target];
3091 if(!targetInfo.isWorker) {
3092 g_log<<Logger::Error<<"distributeAsyncFunction() tried to assign a query to a non-worker thread"<<endl;
3093 exit(1);
3094 }
3095
3096 const auto& tps = targetInfo.pipes;
3097
3098 ssize_t written = write(tps.writeQueriesToThread, &tmsg, sizeof(tmsg));
3099 if (written > 0) {
3100 if (static_cast<size_t>(written) != sizeof(tmsg)) {
3101 delete tmsg;
3102 unixDie("write to thread pipe returned wrong size or error");
3103 }
3104 }
3105 else {
3106 int error = errno;
3107 if (error == EAGAIN || error == EWOULDBLOCK) {
3108 return false;
3109 } else {
3110 delete tmsg;
3111 unixDie("write to thread pipe returned wrong size or error:" + std::to_string(error));
3112 }
3113 }
3114
3115 ++targetInfo.numberOfDistributedQueries;
3116
3117 return true;
3118 }
3119
3120 static unsigned int getWorkerLoad(size_t workerIdx)
3121 {
3122 const auto mt = s_threadInfos[/* skip handler */ 1 + g_numDistributorThreads + workerIdx].mt;
3123 if (mt != nullptr) {
3124 return mt->numProcesses();
3125 }
3126 return 0;
3127 }
3128
3129 static unsigned int selectWorker(unsigned int hash)
3130 {
3131 if (s_balancingFactor == 0) {
3132 return /* skip handler */ 1 + g_numDistributorThreads + (hash % g_numWorkerThreads);
3133 }
3134
3135 /* we start with one, representing the query we are currently handling */
3136 double currentLoad = 1;
3137 std::vector<unsigned int> load(g_numWorkerThreads);
3138 for (size_t idx = 0; idx < g_numWorkerThreads; idx++) {
3139 load[idx] = getWorkerLoad(idx);
3140 currentLoad += load[idx];
3141 // cerr<<"load for worker "<<idx<<" is "<<load[idx]<<endl;
3142 }
3143
3144 double targetLoad = (currentLoad / g_numWorkerThreads) * s_balancingFactor;
3145 // cerr<<"total load is "<<currentLoad<<", number of workers is "<<g_numWorkerThreads<<", target load is "<<targetLoad<<endl;
3146
3147 unsigned int worker = hash % g_numWorkerThreads;
3148 /* at least one server has to be at or below the average load */
3149 if (load[worker] > targetLoad) {
3150 ++g_stats.rebalancedQueries;
3151 do {
3152 // cerr<<"worker "<<worker<<" is above the target load, selecting another one"<<endl;
3153 worker = (worker + 1) % g_numWorkerThreads;
3154 }
3155 while(load[worker] > targetLoad);
3156 }
3157
3158 return /* skip handler */ 1 + g_numDistributorThreads + worker;
3159 }
3160
3161 // This function is only called by the distributor threads, when pdns-distributes-queries is set
3162 void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
3163 {
3164 if (!isDistributorThread()) {
3165 g_log<<Logger::Error<<"distributeAsyncFunction() has been called by a worker ("<<t_id<<")"<<endl;
3166 exit(1);
3167 }
3168
3169 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
3170 unsigned int target = selectWorker(hash);
3171
3172 ThreadMSG* tmsg = new ThreadMSG();
3173 tmsg->func = func;
3174 tmsg->wantAnswer = false;
3175
3176 if (!trySendingQueryToWorker(target, tmsg)) {
3177 /* if this function failed but did not raise an exception, it means that the pipe
3178 was full, let's try another one */
3179 unsigned int newTarget = 0;
3180 do {
3181 newTarget = /* skip handler */ 1 + g_numDistributorThreads + dns_random(g_numWorkerThreads);
3182 } while (newTarget == target);
3183
3184 if (!trySendingQueryToWorker(newTarget, tmsg)) {
3185 g_stats.queryPipeFullDrops++;
3186 delete tmsg;
3187 }
3188 }
3189 }
3190
3191 static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
3192 {
3193 ThreadMSG* tmsg = nullptr;
3194
3195 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread || fd == readQueriesToThread
3196 unixDie("read from thread pipe returned wrong size or error");
3197 }
3198
3199 void *resp=0;
3200 try {
3201 resp = tmsg->func();
3202 }
3203 catch(std::exception& e) {
3204 if(g_logCommonErrors)
3205 g_log<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
3206 }
3207 catch(PDNSException& e) {
3208 if(g_logCommonErrors)
3209 g_log<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
3210 }
3211 if(tmsg->wantAnswer) {
3212 const auto& threadInfo = s_threadInfos.at(t_id);
3213 if(write(threadInfo.pipes.writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
3214 delete tmsg;
3215 unixDie("write to thread pipe returned wrong size or error");
3216 }
3217 }
3218
3219 delete tmsg;
3220 }
3221
3222 template<class T> void *voider(const boost::function<T*()>& func)
3223 {
3224 return func();
3225 }
3226
3227 vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
3228 {
3229 a.insert(a.end(), b.begin(), b.end());
3230 return a;
3231 }
3232
3233 vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
3234 {
3235 a.insert(a.end(), b.begin(), b.end());
3236 return a;
3237 }
3238
3239 vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
3240 {
3241 a.insert(a.end(), b.begin(), b.end());
3242 return a;
3243 }
3244
3245
3246 /*
3247 This function should only be called by the handler to gather metrics, wipe the cache,
3248 reload the Lua script (not the Lua config) or change the current trace regex,
3249 and by the SNMP thread to gather metrics. */
3250 template<class T> T broadcastAccFunction(const boost::function<T*()>& func)
3251 {
3252 if (!isHandlerThread()) {
3253 g_log<<Logger::Error<<"broadcastAccFunction has been called by a worker ("<<t_id<<")"<<endl;
3254 exit(1);
3255 }
3256
3257 unsigned int n = 0;
3258 T ret=T();
3259 for (const auto& threadInfo : s_threadInfos) {
3260 if (n++ == t_id) {
3261 continue;
3262 }
3263
3264 const auto& tps = threadInfo.pipes;
3265 ThreadMSG* tmsg = new ThreadMSG();
3266 tmsg->func = boost::bind(voider<T>, func);
3267 tmsg->wantAnswer = true;
3268
3269 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
3270 delete tmsg;
3271 unixDie("write to thread pipe returned wrong size or error");
3272 }
3273
3274 T* resp = nullptr;
3275 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
3276 unixDie("read from thread pipe returned wrong size or error");
3277
3278 if(resp) {
3279 ret += *resp;
3280 delete resp;
3281 resp = nullptr;
3282 }
3283 }
3284 return ret;
3285 }
3286
3287 template string broadcastAccFunction(const boost::function<string*()>& fun); // explicit instantiation
3288 template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun); // explicit instantiation
3289 template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun); // explicit instantiation
3290 template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun); // explicit instantiation
3291 template ThreadTimes broadcastAccFunction(const boost::function<ThreadTimes*()>& fun);
3292
3293 static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
3294 {
3295 try {
3296 string remote;
3297 string msg=s_rcc.recv(&remote);
3298 RecursorControlParser rcp;
3299 RecursorControlParser::func_t* command;
3300
3301 string answer=rcp.getAnswer(msg, &command);
3302
3303 // If we are inside a chroot, we need to strip
3304 if (!arg()["chroot"].empty()) {
3305 size_t len = arg()["chroot"].length();
3306 remote = remote.substr(len);
3307 }
3308
3309 s_rcc.send(answer, &remote);
3310 command();
3311 }
3312 catch(const std::exception& e) {
3313 g_log<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
3314 }
3315 catch(const PDNSException& ae) {
3316 g_log<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
3317 }
3318 }
3319
3320 static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
3321 {
3322 PacketID* pident=any_cast<PacketID>(&var);
3323 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
3324
3325 shared_array<char> buffer(new char[pident->inNeeded]);
3326
3327 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
3328 if(ret > 0) {
3329 pident->inMSG.append(&buffer[0], &buffer[ret]);
3330 pident->inNeeded-=(size_t)ret;
3331 if(!pident->inNeeded || pident->inIncompleteOkay) {
3332 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
3333 PacketID pid=*pident;
3334 string msg=pident->inMSG;
3335
3336 t_fdm->removeReadFD(fd);
3337 MT->sendEvent(pid, &msg);
3338 }
3339 else {
3340 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
3341 }
3342 }
3343 else {
3344 PacketID tmp=*pident;
3345 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
3346 string empty;
3347 MT->sendEvent(tmp, &empty); // this conveys error status
3348 }
3349 }
3350
3351 static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
3352 {
3353 PacketID* pid=any_cast<PacketID>(&var);
3354 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
3355 if(ret > 0) {
3356 pid->outPos+=(ssize_t)ret;
3357 if(pid->outPos==pid->outMSG.size()) {
3358 PacketID tmp=*pid;
3359 t_fdm->removeWriteFD(fd);
3360 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
3361 }
3362 }
3363 else { // error or EOF
3364 PacketID tmp(*pid);
3365 t_fdm->removeWriteFD(fd);
3366 string sent;
3367 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
3368 }
3369 }
3370
3371 // resend event to everybody chained onto it
3372 static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
3373 {
3374 if(iter->key.chain.empty())
3375 return;
3376 // cerr<<"doResends called!\n";
3377 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
3378 resend.fd=-1;
3379 resend.id=*i;
3380 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
3381
3382 MT->sendEvent(resend, &content);
3383 g_stats.chainResends++;
3384 }
3385 }
3386
3387 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
3388 {
3389 PacketID pid=any_cast<PacketID>(var);
3390 ssize_t len;
3391 std::string packet;
3392 packet.resize(g_outgoingEDNSBufsize);
3393 ComboAddress fromaddr;
3394 socklen_t addrlen=sizeof(fromaddr);
3395
3396 len=recvfrom(fd, &packet.at(0), packet.size(), 0, (sockaddr *)&fromaddr, &addrlen);
3397
3398 if(len < (ssize_t) sizeof(dnsheader)) {
3399 if(len < 0)
3400 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
3401 else {
3402 g_stats.serverParseError++;
3403 if(g_logCommonErrors)
3404 g_log<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
3405 ": packet smaller than DNS header"<<endl;
3406 }
3407
3408 t_udpclientsocks->returnSocket(fd);
3409 string empty;
3410
3411 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3412 if(iter != MT->d_waiters.end())
3413 doResends(iter, pid, empty);
3414
3415 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
3416 return;
3417 }
3418
3419 packet.resize(len);
3420 dnsheader dh;
3421 memcpy(&dh, &packet.at(0), sizeof(dh));
3422
3423 PacketID pident;
3424 pident.remote=fromaddr;
3425 pident.id=dh.id;
3426 pident.fd=fd;
3427
3428 if(!dh.qr && g_logCommonErrors) {
3429 g_log<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
3430 }
3431
3432 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
3433 !dh.qr) { // one weird server
3434 pident.domain.clear();
3435 pident.type = 0;
3436 }
3437 else {
3438 try {
3439 if(len > 12)
3440 pident.domain=DNSName(&packet.at(0), len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
3441 }
3442 catch(std::exception& e) {
3443 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
3444 g_log<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
3445 return;
3446 }
3447 }
3448
3449 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
3450 if(iter != MT->d_waiters.end()) {
3451 doResends(iter, pident, packet);
3452 }
3453
3454 retryWithName:
3455
3456 if(!MT->sendEvent(pident, &packet)) {
3457 /* we did not find a match for this response, something is wrong */
3458
3459 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
3460 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
3461 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
3462 pident.domain == mthread->key.domain) {
3463 mthread->key.nearMisses++;
3464 }
3465
3466 // be a bit paranoid here since we're weakening our matching
3467 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
3468 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
3469 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
3470 pident.domain = mthread->key.domain;
3471 pident.type = mthread->key.type;
3472 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
3473 }
3474 }
3475 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
3476 if(g_logCommonErrors) {
3477 g_log<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
3478 }
3479 }
3480 else if(fd >= 0) {
3481 /* we either found a waiter (1) or encountered an issue (-1), it's up to us to clean the socket anyway */
3482 t_udpclientsocks->returnSocket(fd);
3483 }
3484 }
3485
3486 FDMultiplexer* getMultiplexer()
3487 {
3488 FDMultiplexer* ret;
3489 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
3490 try {
3491 ret=i.second();
3492 return ret;
3493 }
3494 catch(FDMultiplexerException &fe) {
3495 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
3496 }
3497 catch(...) {
3498 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
3499 }
3500 }
3501 g_log<<Logger::Error<<"No working multiplexer found!"<<endl;
3502 exit(1);
3503 }
3504
3505
3506 static string* doReloadLuaScript()
3507 {
3508 string fname= ::arg()["lua-dns-script"];
3509 try {
3510 if(fname.empty()) {
3511 t_pdl.reset();
3512 g_log<<Logger::Info<<t_id<<" Unloaded current lua script"<<endl;
3513 return new string("unloaded\n");
3514 }
3515 else {
3516 t_pdl = std::make_shared<RecursorLua4>();
3517 t_pdl->loadFile(fname);
3518 }
3519 }
3520 catch(std::exception& e) {
3521 g_log<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
3522 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
3523 }
3524
3525 g_log<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
3526 return new string("(re)loaded '"+fname+"'\n");
3527 }
3528
3529 string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3530 {
3531 if(begin != end)
3532 ::arg().set("lua-dns-script") = *begin;
3533
3534 return broadcastAccFunction<string>(doReloadLuaScript);
3535 }
3536
3537 static string* pleaseUseNewTraceRegex(const std::string& newRegex)
3538 try
3539 {
3540 if(newRegex.empty()) {
3541 t_traceRegex.reset();
3542 return new string("unset\n");
3543 }
3544 else {
3545 t_traceRegex = std::make_shared<Regex>(newRegex);
3546 return new string("ok\n");
3547 }
3548 }
3549 catch(PDNSException& ae)
3550 {
3551 return new string(ae.reason+"\n");
3552 }
3553
3554 string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3555 {
3556 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
3557 }
3558
3559 static void checkLinuxIPv6Limits()
3560 {
3561 #ifdef __linux__
3562 string line;
3563 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
3564 int lim=std::stoi(line);
3565 if(lim < 16384) {
3566 g_log<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
3567 }
3568 }
3569 #endif
3570 }
3571 static void checkOrFixFDS()
3572 {
3573 unsigned int availFDs=getFilenumLimit();
3574 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
3575
3576 if(wantFDs > availFDs) {
3577 unsigned int hardlimit= getFilenumLimit(true);
3578 if(hardlimit >= wantFDs) {
3579 setFilenumLimit(wantFDs);
3580 g_log<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
3581 }
3582 else {
3583 int newval = (hardlimit - 25) / g_numWorkerThreads;
3584 g_log<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
3585 g_maxMThreads = newval;
3586 setFilenumLimit(hardlimit);
3587 }
3588 }
3589 }
3590
3591 static void* recursorThread(unsigned int tid, const string& threadName);
3592
3593 static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
3594 {
3595 t_allowFrom = ng;
3596 return nullptr;
3597 }
3598
3599 int g_argc;
3600 char** g_argv;
3601
3602 void parseACLs()
3603 {
3604 static bool l_initialized;
3605
3606 if(l_initialized) { // only reload configuration file on second call
3607 string configname=::arg()["config-dir"]+"/recursor.conf";
3608 if(::arg()["config-name"]!="") {
3609 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3610 }
3611 cleanSlashes(configname);
3612
3613 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
3614 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
3615 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
3616 ::arg().preParseFile(configname.c_str(), "include-dir");
3617 ::arg().preParse(g_argc, g_argv, "include-dir");
3618
3619 // then process includes
3620 std::vector<std::string> extraConfigs;
3621 ::arg().gatherIncludes(extraConfigs);
3622
3623 for(const std::string& fn : extraConfigs) {
3624 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
3625 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
3626 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
3627 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
3628 }
3629
3630 ::arg().preParse(g_argc, g_argv, "allow-from-file");
3631 ::arg().preParse(g_argc, g_argv, "allow-from");
3632 }
3633
3634 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
3635 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
3636
3637 if(!::arg()["allow-from-file"].empty()) {
3638 string line;
3639 ifstream ifs(::arg()["allow-from-file"].c_str());
3640 if(!ifs) {
3641 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
3642 }
3643
3644 string::size_type pos;
3645 while(getline(ifs,line)) {
3646 pos=line.find('#');
3647 if(pos!=string::npos)
3648 line.resize(pos);
3649 trim(line);
3650 if(line.empty())
3651 continue;
3652
3653 allowFrom->addMask(line);
3654 }
3655 g_log<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
3656 }
3657 else if(!::arg()["allow-from"].empty()) {
3658 vector<string> ips;
3659 stringtok(ips, ::arg()["allow-from"], ", ");
3660
3661 g_log<<Logger::Warning<<"Only allowing queries from: ";
3662 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
3663 allowFrom->addMask(*i);
3664 if(i!=ips.begin())
3665 g_log<<Logger::Warning<<", ";
3666 g_log<<Logger::Warning<<*i;
3667 }
3668 g_log<<Logger::Warning<<endl;
3669 }
3670 else {
3671 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
3672 g_log<<Logger::Warning<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
3673 allowFrom = nullptr;
3674 }
3675
3676 g_initialAllowFrom = allowFrom;
3677 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
3678 oldAllowFrom = nullptr;
3679
3680 l_initialized = true;
3681 }
3682
3683
3684 static void setupDelegationOnly()
3685 {
3686 vector<string> parts;
3687 stringtok(parts, ::arg()["delegation-only"], ", \t");
3688 for(const auto& p : parts) {
3689 SyncRes::addDelegationOnly(DNSName(p));
3690 }
3691 }
3692
3693 static std::map<unsigned int, std::set<int> > parseCPUMap()
3694 {
3695 std::map<unsigned int, std::set<int> > result;
3696
3697 const std::string value = ::arg()["cpu-map"];
3698
3699 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
3700 g_log<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
3701 return result;
3702 }
3703
3704 std::vector<std::string> parts;
3705
3706 stringtok(parts, value, " \t");
3707
3708 for(const auto& part : parts) {
3709 if (part.find('=') == string::npos)
3710 continue;
3711
3712 try {
3713 auto headers = splitField(part, '=');
3714 trim(headers.first);
3715 trim(headers.second);
3716
3717 unsigned int threadId = pdns_stou(headers.first);
3718 std::vector<std::string> cpus;
3719
3720 stringtok(cpus, headers.second, ",");
3721
3722 for(const auto& cpu : cpus) {
3723 int cpuId = std::stoi(cpu);
3724
3725 result[threadId].insert(cpuId);
3726 }
3727 }
3728 catch(const std::exception& e) {
3729 g_log<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
3730 }
3731 }
3732
3733 return result;
3734 }
3735
3736 static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
3737 {
3738 const auto& cpuMapping = cpusMap.find(n);
3739 if (cpuMapping != cpusMap.cend()) {
3740 int rc = mapThreadToCPUList(tid, cpuMapping->second);
3741 if (rc == 0) {
3742 g_log<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
3743 for (const auto cpu : cpuMapping->second) {
3744 g_log<<Logger::Info<<" "<<cpu;
3745 }
3746 g_log<<Logger::Info<<endl;
3747 }
3748 else {
3749 g_log<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
3750 for (const auto cpu : cpuMapping->second) {
3751 g_log<<Logger::Info<<" "<<cpu;
3752 }
3753 g_log<<Logger::Info<<strerror(rc)<<endl;
3754 }
3755 }
3756 }
3757
3758 #ifdef NOD_ENABLED
3759 static void setupNODThread()
3760 {
3761 if (g_nodEnabled) {
3762 uint32_t num_cells = ::arg().asNum("new-domain-db-size");
3763 t_nodDBp = std::make_shared<nod::NODDB>(num_cells);
3764 try {
3765 t_nodDBp->setCacheDir(::arg()["new-domain-history-dir"]);
3766 }
3767 catch (const PDNSException& e) {
3768 g_log<<Logger::Error<<"new-domain-history-dir (" << ::arg()["new-domain-history-dir"] << ") is not readable or does not exist"<<endl;
3769 _exit(1);
3770 }
3771 if (!t_nodDBp->init()) {
3772 g_log<<Logger::Error<<"Could not initialize domain tracking"<<endl;
3773 _exit(1);
3774 }
3775 std::thread t(nod::NODDB::startHousekeepingThread, t_nodDBp, std::this_thread::get_id());
3776 t.detach();
3777 g_nod_pbtag = ::arg()["new-domain-pb-tag"];
3778 }
3779 if (g_udrEnabled) {
3780 uint32_t num_cells = ::arg().asNum("unique-response-db-size");
3781 t_udrDBp = std::make_shared<nod::UniqueResponseDB>(num_cells);
3782 try {
3783 t_udrDBp->setCacheDir(::arg()["unique-response-history-dir"]);
3784 }
3785 catch (const PDNSException& e) {
3786 g_log<<Logger::Error<<"unique-response-history-dir (" << ::arg()["unique-response-history-dir"] << ") is not readable or does not exist"<<endl;
3787 _exit(1);
3788 }
3789 if (!t_udrDBp->init()) {
3790 g_log<<Logger::Error<<"Could not initialize unique response tracking"<<endl;
3791 _exit(1);
3792 }
3793 std::thread t(nod::UniqueResponseDB::startHousekeepingThread, t_udrDBp, std::this_thread::get_id());
3794 t.detach();
3795 g_udr_pbtag = ::arg()["unique-response-pb-tag"];
3796 }
3797 }
3798
3799 void parseNODWhitelist(const std::string& wlist)
3800 {
3801 vector<string> parts;
3802 stringtok(parts, wlist, ",; ");
3803 for(const auto& a : parts) {
3804 g_nodDomainWL.add(DNSName(a));
3805 }
3806 }
3807
3808 static void setupNODGlobal()
3809 {
3810 // Setup NOD subsystem
3811 g_nodEnabled = ::arg().mustDo("new-domain-tracking");
3812 g_nodLookupDomain = DNSName(::arg()["new-domain-lookup"]);
3813 g_nodLog = ::arg().mustDo("new-domain-log");
3814 parseNODWhitelist(::arg()["new-domain-whitelist"]);
3815
3816 // Setup Unique DNS Response subsystem
3817 g_udrEnabled = ::arg().mustDo("unique-response-tracking");
3818 g_udrLog = ::arg().mustDo("unique-response-log");
3819 }
3820 #endif /* NOD_ENABLED */
3821
3822 static int serviceMain(int argc, char*argv[])
3823 {
3824 g_log.setName(s_programname);
3825 g_log.disableSyslog(::arg().mustDo("disable-syslog"));
3826 g_log.setTimestamps(::arg().mustDo("log-timestamp"));
3827
3828 if(!::arg()["logging-facility"].empty()) {
3829 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
3830 if(val >= 0)
3831 g_log.setFacility(val);
3832 else
3833 g_log<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
3834 }
3835
3836 showProductVersion();
3837
3838 g_disthashseed=dns_random(0xffffffff);
3839
3840 checkLinuxIPv6Limits();
3841 try {
3842 vector<string> addrs;
3843 if(!::arg()["query-local-address6"].empty()) {
3844 SyncRes::s_doIPv6=true;
3845 g_log<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
3846
3847 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
3848 for(const string& addr : addrs) {
3849 g_localQueryAddresses6.push_back(ComboAddress(addr));
3850 }
3851 }
3852 else {
3853 g_log<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
3854 }
3855 addrs.clear();
3856 stringtok(addrs, ::arg()["query-local-address"], ", ;");
3857 for(const string& addr : addrs) {
3858 g_localQueryAddresses4.push_back(ComboAddress(addr));
3859 }
3860 }
3861 catch(std::exception& e) {
3862 g_log<<Logger::Error<<"Assigning local query addresses: "<<e.what();
3863 exit(99);
3864 }
3865
3866 // keep this ABOVE loadRecursorLuaConfig!
3867 if(::arg()["dnssec"]=="off")
3868 g_dnssecmode=DNSSECMode::Off;
3869 else if(::arg()["dnssec"]=="process-no-validate")
3870 g_dnssecmode=DNSSECMode::ProcessNoValidate;
3871 else if(::arg()["dnssec"]=="process")
3872 g_dnssecmode=DNSSECMode::Process;
3873 else if(::arg()["dnssec"]=="validate")
3874 g_dnssecmode=DNSSECMode::ValidateAll;
3875 else if(::arg()["dnssec"]=="log-fail")
3876 g_dnssecmode=DNSSECMode::ValidateForLog;
3877 else {
3878 g_log<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
3879 exit(1);
3880 }
3881
3882 g_signatureInceptionSkew = ::arg().asNum("signature-inception-skew");
3883 if (g_signatureInceptionSkew < 0) {
3884 g_log<<Logger::Error<<"A negative value for 'signature-inception-skew' is not allowed"<<endl;
3885 exit(1);
3886 }
3887
3888 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
3889 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
3890
3891 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
3892 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
3893
3894 luaConfigDelayedThreads delayedLuaThreads;
3895 try {
3896 loadRecursorLuaConfig(::arg()["lua-config-file"], delayedLuaThreads);
3897 }
3898 catch (PDNSException &e) {
3899 g_log<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
3900 exit(1);
3901 }
3902
3903 parseACLs();
3904 initPublicSuffixList(::arg()["public-suffix-list-file"]);
3905
3906 if(!::arg()["dont-query"].empty()) {
3907 vector<string> ips;
3908 stringtok(ips, ::arg()["dont-query"], ", ");
3909 ips.push_back("0.0.0.0");
3910 ips.push_back("::");
3911
3912 g_log<<Logger::Warning<<"Will not send queries to: ";
3913 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
3914 SyncRes::addDontQuery(*i);
3915 if(i!=ips.begin())
3916 g_log<<Logger::Warning<<", ";
3917 g_log<<Logger::Warning<<*i;
3918 }
3919 g_log<<Logger::Warning<<endl;
3920 }
3921
3922 g_quiet=::arg().mustDo("quiet");
3923
3924 /* this needs to be done before parseACLs(), which call broadcastFunction() */
3925 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
3926 if(g_weDistributeQueries) {
3927 g_log<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
3928 }
3929
3930 setupDelegationOnly();
3931 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
3932
3933 if(::arg()["trace"]=="fail") {
3934 SyncRes::setDefaultLogMode(SyncRes::Store);
3935 }
3936 else if(::arg().mustDo("trace")) {
3937 SyncRes::setDefaultLogMode(SyncRes::Log);
3938 ::arg().set("quiet")="no";
3939 g_quiet=false;
3940 g_dnssecLOG=true;
3941 }
3942 string myHostname = getHostname();
3943 if (myHostname == "UNKNOWN"){
3944 g_log<<Logger::Warning<<"Unable to get the hostname, NSID and id.server values will be empty"<<endl;
3945 myHostname = "";
3946 }
3947
3948 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
3949 SyncRes::s_minimumECSTTL = ::arg().asNum("ecs-minimum-ttl-override");
3950
3951 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
3952
3953 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
3954 SyncRes::s_maxbogusttl=::arg().asNum("max-cache-bogus-ttl");
3955 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
3956 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
3957 // Cap the packetcache-servfail-ttl to the packetcache-ttl
3958 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
3959 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
3960 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
3961 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
3962 SyncRes::s_serverID=::arg()["server-id"];
3963 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
3964 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
3965 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
3966 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
3967 if(SyncRes::s_serverID.empty()) {
3968 SyncRes::s_serverID = myHostname;
3969 }
3970
3971 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
3972 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
3973 SyncRes::clearECSStats();
3974 SyncRes::s_ecsipv4cachelimit = ::arg().asNum("ecs-ipv4-cache-bits");
3975 SyncRes::s_ecsipv6cachelimit = ::arg().asNum("ecs-ipv6-cache-bits");
3976 SyncRes::s_ecscachelimitttl = ::arg().asNum("ecs-cache-limit-ttl");
3977
3978 SyncRes::s_qnameminimization = ::arg().mustDo("qname-minimization");
3979
3980 if (SyncRes::s_qnameminimization) {
3981 // With an empty cache, a rev ipv6 query with dnssec enabled takes
3982 // almost 100 queries. Default maxqperq is 60.
3983 SyncRes::s_maxqperq = std::max(SyncRes::s_maxqperq, static_cast<unsigned int>(100));
3984 }
3985
3986 SyncRes::s_hardenNXD = SyncRes::HardenNXD::DNSSEC;
3987 string value = ::arg()["nothing-below-nxdomain"];
3988 if (value == "yes") {
3989 SyncRes::s_hardenNXD = SyncRes::HardenNXD::Yes;
3990 } else if (value == "no") {
3991 SyncRes::s_hardenNXD = SyncRes::HardenNXD::No;
3992 } else if (value != "dnssec") {
3993 g_log << Logger::Error << "Unknown nothing-below-nxdomain mode: " << value << endl;
3994 exit(1);
3995 }
3996
3997 if (!::arg().isEmpty("ecs-scope-zero-address")) {
3998 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
3999 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
4000 }
4001 else {
4002 bool found = false;
4003 for (const auto& addr : g_localQueryAddresses4) {
4004 if (!IsAnyAddress(addr)) {
4005 SyncRes::setECSScopeZeroAddress(Netmask(addr, 32));
4006 found = true;
4007 break;
4008 }
4009 }
4010 if (!found) {
4011 for (const auto& addr : g_localQueryAddresses6) {
4012 if (!IsAnyAddress(addr)) {
4013 SyncRes::setECSScopeZeroAddress(Netmask(addr, 128));
4014 found = true;
4015 break;
4016 }
4017 }
4018 if (!found) {
4019 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
4020 }
4021 }
4022 }
4023
4024 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
4025 SyncRes::parseEDNSSubnetAddFor(::arg()["ecs-add-for"]);
4026 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
4027
4028 g_XPFAcl.toMasks(::arg()["xpf-allow-from"]);
4029 g_xpfRRCode = ::arg().asNum("xpf-rr-code");
4030
4031 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
4032
4033 g_initialDomainMap = parseAuthAndForwards();
4034
4035 g_latencyStatSize=::arg().asNum("latency-statistic-size");
4036
4037 g_logCommonErrors=::arg().mustDo("log-common-errors");
4038 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
4039
4040 g_anyToTcp = ::arg().mustDo("any-to-tcp");
4041 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
4042
4043 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
4044
4045 g_numDistributorThreads = ::arg().asNum("distributor-threads");
4046 g_numWorkerThreads = ::arg().asNum("threads");
4047 if (g_numWorkerThreads < 1) {
4048 g_log<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
4049 g_numWorkerThreads = 1;
4050 }
4051
4052 g_numThreads = g_numDistributorThreads + g_numWorkerThreads;
4053 g_maxMThreads = ::arg().asNum("max-mthreads");
4054
4055
4056 int64_t maxInFlight = ::arg().asNum("max-concurrent-requests-per-tcp-connection");
4057 if (maxInFlight < 1 || maxInFlight > USHRT_MAX || maxInFlight >= g_maxMThreads) {
4058 g_log<<Logger::Warning<<"Asked to run with illegal max-concurrent-requests-per-tcp-connection, setting to default (10)"<<endl;
4059 TCPConnection::s_maxInFlight = 10;
4060 } else {
4061 TCPConnection::s_maxInFlight = maxInFlight;
4062 }
4063
4064
4065 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
4066
4067 g_statisticsInterval = ::arg().asNum("statistics-interval");
4068
4069 {
4070 SuffixMatchNode dontThrottleNames;
4071 vector<string> parts;
4072 stringtok(parts, ::arg()["dont-throttle-names"], " ,");
4073 for (const auto &p : parts) {
4074 dontThrottleNames.add(DNSName(p));
4075 }
4076 g_dontThrottleNames.setState(std::move(dontThrottleNames));
4077
4078 NetmaskGroup dontThrottleNetmasks;
4079 stringtok(parts, ::arg()["dont-throttle-netmasks"], " ,");
4080 for (const auto &p : parts) {
4081 dontThrottleNetmasks.addMask(Netmask(p));
4082 }
4083 g_dontThrottleNetmasks.setState(std::move(dontThrottleNetmasks));
4084 }
4085
4086 s_balancingFactor = ::arg().asDouble("distribution-load-factor");
4087 if (s_balancingFactor != 0.0 && s_balancingFactor < 1.0) {
4088 s_balancingFactor = 0.0;
4089 g_log<<Logger::Warning<<"Asked to run with a distribution-load-factor below 1.0, disabling it instead"<<endl;
4090 }
4091
4092 #ifdef SO_REUSEPORT
4093 g_reusePort = ::arg().mustDo("reuseport");
4094 #endif
4095
4096 s_threadInfos.resize(g_numDistributorThreads + g_numWorkerThreads + /* handler */ 1);
4097
4098 if (g_reusePort) {
4099 if (g_weDistributeQueries) {
4100 /* first thread is the handler, then distributors */
4101 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
4102 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
4103 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
4104 makeUDPServerSockets(deferredAdds);
4105 makeTCPServerSockets(deferredAdds, tcpSockets);
4106 }
4107 }
4108 else {
4109 /* first thread is the handler, there is no distributor here and workers are accepting queries */
4110 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
4111 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
4112 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
4113 makeUDPServerSockets(deferredAdds);
4114 makeTCPServerSockets(deferredAdds, tcpSockets);
4115 }
4116 }
4117 }
4118 else {
4119 std::set<int> tcpSockets;
4120 /* we don't have reuseport so we can only open one socket per
4121 listening addr:port and everyone will listen on it */
4122 makeUDPServerSockets(g_deferredAdds);
4123 makeTCPServerSockets(g_deferredAdds, tcpSockets);
4124
4125 /* every listener (so distributor if g_weDistributeQueries, workers otherwise)
4126 needs to listen to the shared sockets */
4127 if (g_weDistributeQueries) {
4128 /* first thread is the handler, then distributors */
4129 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
4130 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
4131 }
4132 }
4133 else {
4134 /* first thread is the handler, there is no distributor here and workers are accepting queries */
4135 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
4136 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
4137 }
4138 }
4139 }
4140
4141 #ifdef NOD_ENABLED
4142 // Setup newly observed domain globals
4143 setupNODGlobal();
4144 #endif /* NOD_ENABLED */
4145
4146 int forks;
4147 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
4148 if(!fork()) // we are child
4149 break;
4150 }
4151
4152 if(::arg().mustDo("daemon")) {
4153 g_log<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
4154 g_log.toConsole(Logger::Critical);
4155 daemonize();
4156 }
4157 if(Utility::getpid() == 1) {
4158 /* We are running as pid 1, register sigterm and sigint handler
4159
4160 The Linux kernel will handle SIGTERM and SIGINT for all processes, except PID 1.
4161 It assumes that the processes running as pid 1 is an "init" like system.
4162 For years, this was a safe assumption, but containers change that: in
4163 most (all?) container implementations, the application itself is running
4164 as pid 1. This means that sending signals to those applications, will not
4165 be handled by default. Results might be "your container not responsing
4166 when asking it to stop", or "ctrl-c not working even when the app is
4167 running in the foreground inside a container".
4168
4169 So TL;DR: If we're running pid 1 (container), we should handle SIGTERM and SIGINT ourselves */
4170
4171 signal(SIGTERM,termIntHandler);
4172 signal(SIGINT,termIntHandler);
4173 }
4174
4175 signal(SIGUSR1,usr1Handler);
4176 signal(SIGUSR2,usr2Handler);
4177 signal(SIGPIPE,SIG_IGN);
4178
4179 checkOrFixFDS();
4180
4181 #ifdef HAVE_LIBSODIUM
4182 if (sodium_init() == -1) {
4183 g_log<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
4184 exit(99);
4185 }
4186 #endif
4187
4188 openssl_thread_setup();
4189 openssl_seed();
4190 /* setup rng before chroot */
4191 dns_random_init();
4192
4193 if(::arg()["server-id"].empty()) {
4194 ::arg().set("server-id") = myHostname;
4195 }
4196
4197 int newgid=0;
4198 if(!::arg()["setgid"].empty())
4199 newgid = strToGID(::arg()["setgid"]);
4200 int newuid=0;
4201 if(!::arg()["setuid"].empty())
4202 newuid = strToUID(::arg()["setuid"]);
4203
4204 Utility::dropGroupPrivs(newuid, newgid);
4205
4206 if (!::arg()["chroot"].empty()) {
4207 #ifdef HAVE_SYSTEMD
4208 char *ns;
4209 ns = getenv("NOTIFY_SOCKET");
4210 if (ns != nullptr) {
4211 g_log<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
4212 exit(1);
4213 }
4214 #endif
4215 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
4216 int err = errno;
4217 g_log<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (err)<<", exiting"<<endl;
4218 exit(1);
4219 }
4220 else
4221 g_log<<Logger::Info<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
4222 }
4223
4224 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
4225 if(!s_pidfname.empty())
4226 unlink(s_pidfname.c_str()); // remove possible old pid file
4227 writePid();
4228
4229 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
4230
4231 Utility::dropUserPrivs(newuid);
4232 try {
4233 /* we might still have capabilities remaining, for example if we have been started as root
4234 without --setuid (please don't do that) or as an unprivileged user with ambient capabilities
4235 like CAP_NET_BIND_SERVICE.
4236 */
4237 dropCapabilities();
4238 }
4239 catch(const std::exception& e) {
4240 g_log<<Logger::Warning<<e.what()<<endl;
4241 }
4242
4243 startLuaConfigDelayedThreads(delayedLuaThreads, g_luaconfs.getCopy().generation);
4244
4245 makeThreadPipes();
4246
4247 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
4248 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
4249 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
4250 s_maxUDPQueriesPerRound=::arg().asNum("max-udp-queries-per-round");
4251
4252 g_useKernelTimestamp = ::arg().mustDo("protobuf-use-kernel-timestamp");
4253
4254 blacklistStats(StatComponent::API, ::arg()["stats-api-blacklist"]);
4255 blacklistStats(StatComponent::Carbon, ::arg()["stats-carbon-blacklist"]);
4256 blacklistStats(StatComponent::RecControl, ::arg()["stats-rec-control-blacklist"]);
4257 blacklistStats(StatComponent::SNMP, ::arg()["stats-snmp-blacklist"]);
4258
4259 if (::arg().mustDo("snmp-agent")) {
4260 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
4261 g_snmpAgent->run();
4262 }
4263
4264 int port = ::arg().asNum("udp-source-port-min");
4265 if(port < 1024 || port > 65535){
4266 g_log<<Logger::Error<<"Unable to launch, udp-source-port-min is not a valid port number"<<endl;
4267 exit(99); // this isn't going to fix itself either
4268 }
4269 s_minUdpSourcePort = port;
4270 port = ::arg().asNum("udp-source-port-max");
4271 if(port < 1024 || port > 65535 || port < s_minUdpSourcePort){
4272 g_log<<Logger::Error<<"Unable to launch, udp-source-port-max is not a valid port number or is smaller than udp-source-port-min"<<endl;
4273 exit(99); // this isn't going to fix itself either
4274 }
4275 s_maxUdpSourcePort = port;
4276 std::vector<string> parts {};
4277 stringtok(parts, ::arg()["udp-source-port-avoid"], ", ");
4278 for (const auto &part : parts)
4279 {
4280 port = std::stoi(part);
4281 if(port < 1024 || port > 65535){
4282 g_log<<Logger::Error<<"Unable to launch, udp-source-port-avoid contains an invalid port number: "<<part<<endl;
4283 exit(99); // this isn't going to fix itself either
4284 }
4285 s_avoidUdpSourcePorts.insert(port);
4286 }
4287
4288 unsigned int currentThreadId = 1;
4289 const auto cpusMap = parseCPUMap();
4290
4291 if(g_numThreads == 1) {
4292 g_log<<Logger::Warning<<"Operating unthreaded"<<endl;
4293 #ifdef HAVE_SYSTEMD
4294 sd_notify(0, "READY=1");
4295 #endif
4296
4297 /* This thread handles the web server, carbon, statistics and the control channel */
4298 auto& handlerInfos = s_threadInfos.at(0);
4299 handlerInfos.isHandler = true;
4300 handlerInfos.thread = std::thread(recursorThread, 0, "main");
4301
4302 setCPUMap(cpusMap, currentThreadId, pthread_self());
4303
4304 auto& infos = s_threadInfos.at(currentThreadId);
4305 infos.isListener = true;
4306 infos.isWorker = true;
4307 recursorThread(currentThreadId++, "worker");
4308
4309 handlerInfos.thread.join();
4310 }
4311 else {
4312
4313
4314 if (g_weDistributeQueries) {
4315 for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
4316 auto& infos = s_threadInfos.at(currentThreadId + n);
4317 infos.isListener = true;
4318 }
4319 }
4320 for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
4321 auto& infos = s_threadInfos.at(currentThreadId + (g_weDistributeQueries ? g_numDistributorThreads : 0) + n);
4322 infos.isListener = !g_weDistributeQueries;
4323 infos.isWorker = true;
4324 }
4325
4326 if (g_weDistributeQueries) {
4327 g_log<<Logger::Warning<<"Launching "<< g_numDistributorThreads <<" distributor threads"<<endl;
4328 for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
4329 auto& infos = s_threadInfos.at(currentThreadId);
4330 infos.thread = std::thread(recursorThread, currentThreadId++, "distr");
4331 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
4332 }
4333 }
4334
4335 g_log<<Logger::Warning<<"Launching "<< g_numWorkerThreads <<" worker threads"<<endl;
4336
4337 for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
4338 auto& infos = s_threadInfos.at(currentThreadId);
4339 infos.thread = std::thread(recursorThread, currentThreadId++, "worker");
4340 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
4341 }
4342
4343 #ifdef HAVE_SYSTEMD
4344 sd_notify(0, "READY=1");
4345 #endif
4346
4347 /* This thread handles the web server, carbon, statistics and the control channel */
4348 auto& infos = s_threadInfos.at(0);
4349 infos.isHandler = true;
4350 infos.thread = std::thread(recursorThread, 0, "web+stat");
4351
4352 for (auto & ti : s_threadInfos) {
4353 ti.thread.join();
4354 }
4355 }
4356
4357 #ifdef HAVE_PROTOBUF
4358 google::protobuf::ShutdownProtobufLibrary();
4359 #endif /* HAVE_PROTOBUF */
4360 return 0;
4361 }
4362
4363 static void* recursorThread(unsigned int n, const string& threadName)
4364 try
4365 {
4366 t_id=n;
4367 auto& threadInfo = s_threadInfos.at(t_id);
4368
4369 static string threadPrefix = "pdns-r/";
4370 setThreadName(threadPrefix + threadName);
4371
4372 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
4373 SyncRes::setDomainMap(g_initialDomainMap);
4374 t_allowFrom = g_initialAllowFrom;
4375 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
4376 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
4377 primeHints();
4378
4379 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
4380
4381 g_log<<Logger::Warning<<"Done priming cache with root hints"<<endl;
4382
4383 #ifdef NOD_ENABLED
4384 if (threadInfo.isWorker)
4385 setupNODThread();
4386 #endif /* NOD_ENABLED */
4387
4388 /* the listener threads handle TCP queries */
4389 if(threadInfo.isWorker || threadInfo.isListener) {
4390 try {
4391 if(!::arg()["lua-dns-script"].empty()) {
4392 t_pdl = std::make_shared<RecursorLua4>();
4393 t_pdl->loadFile(::arg()["lua-dns-script"]);
4394 g_log<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
4395 }
4396 }
4397 catch(std::exception &e) {
4398 g_log<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
4399 _exit(99);
4400 }
4401 }
4402
4403 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
4404 if(ringsize) {
4405 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4406 if(g_weDistributeQueries)
4407 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries") / g_numDistributorThreads);
4408 else
4409 t_remotes->set_capacity(ringsize);
4410 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4411 t_servfailremotes->set_capacity(ringsize);
4412 t_bogusremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4413 t_bogusremotes->set_capacity(ringsize);
4414 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4415 t_largeanswerremotes->set_capacity(ringsize);
4416 t_timeouts = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4417 t_timeouts->set_capacity(ringsize);
4418
4419 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4420 t_queryring->set_capacity(ringsize);
4421 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4422 t_servfailqueryring->set_capacity(ringsize);
4423 t_bogusqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4424 t_bogusqueryring->set_capacity(ringsize);
4425 }
4426
4427 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
4428 threadInfo.mt = MT.get();
4429
4430 #ifdef HAVE_PROTOBUF
4431 /* start protobuf export threads if needed */
4432 auto luaconfsLocal = g_luaconfs.getLocal();
4433 checkProtobufExport(luaconfsLocal);
4434 checkOutgoingProtobufExport(luaconfsLocal);
4435 #endif /* HAVE_PROTOBUF */
4436 #ifdef HAVE_FSTRM
4437 checkFrameStreamExport(luaconfsLocal);
4438 #endif
4439
4440 PacketID pident;
4441
4442 t_fdm=getMultiplexer();
4443
4444 RecursorWebServer *rws = nullptr;
4445
4446 if(threadInfo.isHandler) {
4447 if(::arg().mustDo("webserver")) {
4448 g_log<<Logger::Warning << "Enabling web server" << endl;
4449 try {
4450 rws = new RecursorWebServer(t_fdm);
4451 }
4452 catch(PDNSException &e) {
4453 g_log<<Logger::Error<<"Exception: "<<e.reason<<endl;
4454 exit(99);
4455 }
4456 }
4457 g_log<<Logger::Info<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
4458 }
4459 else {
4460
4461 t_fdm->addReadFD(threadInfo.pipes.readToThread, handlePipeRequest);
4462 t_fdm->addReadFD(threadInfo.pipes.readQueriesToThread, handlePipeRequest);
4463
4464 if (threadInfo.isListener) {
4465 if (g_reusePort) {
4466 /* then every listener has its own FDs */
4467 for(const auto deferred : threadInfo.deferredAdds) {
4468 t_fdm->addReadFD(deferred.first, deferred.second);
4469 }
4470 }
4471 else {
4472 /* otherwise all listeners are listening on the same ones */
4473 for(const auto deferred : g_deferredAdds) {
4474 t_fdm->addReadFD(deferred.first, deferred.second);
4475 }
4476 }
4477 }
4478 }
4479
4480 registerAllStats();
4481
4482 if(threadInfo.isHandler) {
4483 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
4484 }
4485
4486 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
4487
4488 bool listenOnTCP(true);
4489
4490 time_t last_stat = 0;
4491 time_t last_carbon=0, last_lua_maintenance=0;
4492 time_t carbonInterval=::arg().asNum("carbon-interval");
4493 time_t luaMaintenanceInterval=::arg().asNum("lua-maintenance-interval");
4494 counter.store(0); // used to periodically execute certain tasks
4495
4496 while (!RecursorControlChannel::stop) {
4497 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
4498
4499 if(!(counter%500)) {
4500 MT->makeThread(houseKeeping, 0);
4501 }
4502
4503 if(!(counter%55)) {
4504 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
4505 expired_t expired=t_fdm->getTimeouts(g_now);
4506
4507 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
4508 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4509 if(g_logCommonErrors)
4510 g_log<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4511 t_fdm->removeReadFD(i->first);
4512 }
4513 }
4514
4515 counter++;
4516
4517 if(threadInfo.isHandler) {
4518 if(statsWanted || (g_statisticsInterval > 0 && (g_now.tv_sec - last_stat) >= g_statisticsInterval)) {
4519 doStats();
4520 last_stat = g_now.tv_sec;
4521 }
4522
4523 Utility::gettimeofday(&g_now, 0);
4524
4525 if((g_now.tv_sec - last_carbon) >= carbonInterval) {
4526 MT->makeThread(doCarbonDump, 0);
4527 last_carbon = g_now.tv_sec;
4528 }
4529 }
4530 if (t_pdl != nullptr) {
4531 // lua-dns-script directive is present, call the maintenance callback if needed
4532 /* remember that the listener threads handle TCP queries */
4533 if (threadInfo.isWorker || threadInfo.isListener) {
4534 // Only on threads processing queries
4535 if(g_now.tv_sec - last_lua_maintenance >= luaMaintenanceInterval) {
4536 t_pdl->maintenance();
4537 last_lua_maintenance = g_now.tv_sec;
4538 }
4539 }
4540 }
4541
4542 t_fdm->run(&g_now);
4543 // 'run' updates g_now for us
4544
4545 if(threadInfo.isListener) {
4546 if(listenOnTCP) {
4547 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
4548 for(const auto fd : threadInfo.tcpSockets) {
4549 t_fdm->removeReadFD(fd);
4550 }
4551 listenOnTCP=false;
4552 }
4553 }
4554 else {
4555 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
4556 for(const auto fd : threadInfo.tcpSockets) {
4557 t_fdm->addReadFD(fd, handleNewTCPQuestion);
4558 }
4559 listenOnTCP=true;
4560 }
4561 }
4562 }
4563 }
4564 delete rws;
4565 delete t_fdm;
4566 return 0;
4567 }
4568 catch(PDNSException &ae) {
4569 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
4570 return 0;
4571 }
4572 catch(std::exception &e) {
4573 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
4574 return 0;
4575 }
4576 catch(...) {
4577 g_log<<Logger::Error<<"any other exception in main: "<<endl;
4578 return 0;
4579 }
4580
4581
4582 int main(int argc, char **argv)
4583 {
4584 g_argc = argc;
4585 g_argv = argv;
4586 g_stats.startupTime=time(0);
4587 Utility::srandom();
4588 versionSetProduct(ProductRecursor);
4589 reportBasicTypes();
4590 reportOtherTypes();
4591
4592 int ret = EXIT_SUCCESS;
4593
4594 try {
4595 ::arg().set("stack-size","stack size per mthread")="200000";
4596 ::arg().set("soa-minimum-ttl","Don't change")="0";
4597 ::arg().set("no-shuffle","Don't change")="off";
4598 ::arg().set("local-port","port to listen on")="53";
4599 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
4600 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
4601 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
4602 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
4603 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
4604 ::arg().set("signature-inception-skew", "Allow the signature inception to be off by this number of seconds")="60";
4605 ::arg().set("daemon","Operate as a daemon")="no";
4606 ::arg().setSwitch("write-pid","Write a PID file")="yes";
4607 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
4608 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
4609 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
4610 ::arg().set("log-common-errors","If we should log rather common errors")="no";
4611 ::arg().set("chroot","switch to chroot jail")="";
4612 ::arg().set("setgid","If set, change group id to this gid for more security"
4613 #ifdef HAVE_SYSTEMD
4614 #define SYSTEMD_SETID_MSG ". When running inside systemd, use the User and Group settings in the unit-file!"
4615 SYSTEMD_SETID_MSG
4616 #endif
4617 )="";
4618 ::arg().set("setuid","If set, change user id to this uid for more security"
4619 #ifdef HAVE_SYSTEMD
4620 SYSTEMD_SETID_MSG
4621 #endif
4622 )="";
4623 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
4624 ::arg().set("threads", "Launch this number of threads")="2";
4625 ::arg().set("distributor-threads", "Launch this number of distributor threads, distributing queries to other threads")="0";
4626 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
4627 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
4628 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
4629 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
4630 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
4631 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
4632 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
4633 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
4634 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
4635 ::arg().set("webserver-loglevel", "Amount of logging in the webserver (none, normal, detailed)") = "normal";
4636 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
4637 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
4638 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
4639 ::arg().set("carbon-namespace", "If set overwrites the first part of the carbon string")="pdns";
4640 ::arg().set("carbon-instance", "If set overwrites the the instance name default")="recursor";
4641
4642 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
4643 ::arg().set("quiet","Suppress logging of questions and answers")="";
4644 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
4645 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
4646 ::arg().set("socket-owner","Owner of socket")="";
4647 ::arg().set("socket-group","Group of socket")="";
4648 ::arg().set("socket-mode", "Permissions for socket")="";
4649
4650 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+"/pdns-recursor when unset and not chrooted" )="";
4651 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
4652 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
4653 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
4654 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
4655 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
4656 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
4657 ::arg().set("max-concurrent-requests-per-tcp-connection", "Maximum number of requests handled concurrently per TCP connection") = "10";
4658 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
4659 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
4660 ::arg().set("dont-throttle-names", "Do not throttle nameservers with this name or suffix")="";
4661 ::arg().set("dont-throttle-netmasks", "Do not throttle nameservers with this IP netmask")="";
4662 ::arg().set("hint-file", "If set, load root hints from this file")="";
4663 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
4664 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
4665 ::arg().set("max-cache-bogus-ttl", "maximum number of seconds to keep a Bogus (positive or negative) cached entry in memory")="3600";
4666 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
4667 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
4668 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
4669 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
4670 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname, set custom or 'disabled'")="";
4671 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
4672 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
4673 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
4674 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
4675 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
4676 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4677 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
4678 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
4679 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4680 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
4681 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
4682 ::arg().set("lua-config-file", "More powerful configuration options")="";
4683 ::arg().setSwitch("allow-trust-anchor-query", "Allow queries for trustanchor.server CH TXT and negativetrustanchor.server CH TXT")="no";
4684
4685 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
4686 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
4687 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
4688 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
4689 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
4690 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
4691 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
4692 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
4693 ::arg().set("lua-maintenance-interval", "Number of seconds between calls to the lua user defined maintenance() function")="1";
4694 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
4695 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
4696 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
4697 ::arg().set("ecs-ipv4-cache-bits", "Maximum number of bits of IPv4 mask to cache ECS response")="24";
4698 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
4699 ::arg().set("ecs-ipv6-cache-bits", "Maximum number of bits of IPv6 mask to cache ECS response")="56";
4700 ::arg().set("ecs-minimum-ttl-override", "Set under adverse conditions, a minimum TTL for records in ECS-specific answers")="0";
4701 ::arg().set("ecs-cache-limit-ttl", "Minimum TTL to cache ECS response")="0";
4702 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
4703 ::arg().set("ecs-add-for", "List of client netmasks for which EDNS Client Subnet will be added")="0.0.0.0/0, ::/0, " LOCAL_NETS_INVERSE;
4704 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
4705 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
4706 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
4707 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
4708 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
4709 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
4710 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
4711 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1232";
4712 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1232";
4713 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
4714 ::arg().set("max-qperq", "Maximum outgoing queries per query")="60";
4715 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
4716 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
4717 ::arg().set("max-udp-queries-per-round", "Maximum number of UDP queries processed per recvmsg() round, before returning back to normal processing")="10000";
4718 ::arg().set("protobuf-use-kernel-timestamp", "Compute the latency of queries in protobuf messages by using the timestamp set by the kernel when the query was received (when available)")="";
4719 ::arg().set("distribution-pipe-buffer-size", "Size in bytes of the internal buffer of the pipe used by the distributor to pass incoming queries to a worker thread")="0";
4720
4721 ::arg().set("include-dir","Include *.conf files from this directory")="";
4722 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
4723
4724 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
4725
4726 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
4727 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
4728
4729 std::string defaultBlacklistedStats = "cache-bytes, packetcache-bytes, special-memory-usage";
4730 for (size_t idx = 0; idx < 32; idx++) {
4731 defaultBlacklistedStats += ", ecs-v4-response-bits-" + std::to_string(idx + 1);
4732 }
4733 for (size_t idx = 0; idx < 128; idx++) {
4734 defaultBlacklistedStats += ", ecs-v6-response-bits-" + std::to_string(idx + 1);
4735 }
4736 ::arg().set("stats-api-blacklist", "List of statistics that are disabled when retrieving the complete list of statistics via the API")=defaultBlacklistedStats;
4737 ::arg().set("stats-carbon-blacklist", "List of statistics that are prevented from being exported via Carbon")=defaultBlacklistedStats;
4738 ::arg().set("stats-rec-control-blacklist", "List of statistics that are prevented from being exported via rec_control get-all")=defaultBlacklistedStats;
4739 ::arg().set("stats-snmp-blacklist", "List of statistics that are prevented from being exported via SNMP")=defaultBlacklistedStats;
4740
4741 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
4742 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
4743
4744 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
4745
4746 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
4747
4748 ::arg().set("xpf-allow-from","XPF information is only processed from these subnets")="";
4749 ::arg().set("xpf-rr-code","XPF option code to use")="0";
4750
4751 ::arg().set("udp-source-port-min", "Minimum UDP port to bind on")="1024";
4752 ::arg().set("udp-source-port-max", "Maximum UDP port to bind on")="65535";
4753 ::arg().set("udp-source-port-avoid", "List of comma separated UDP port number to avoid")="11211";
4754 ::arg().set("rng", "Specify random number generator to use. Valid values are auto,sodium,openssl,getrandom,arc4random,urandom.")="auto";
4755 ::arg().set("public-suffix-list-file", "Path to the Public Suffix List file, if any")="";
4756 ::arg().set("distribution-load-factor", "The load factor used when PowerDNS is distributing queries to worker threads")="0.0";
4757
4758 ::arg().setSwitch("qname-minimization", "Use Query Name Minimization")="yes";
4759 ::arg().setSwitch("nothing-below-nxdomain", "When an NXDOMAIN exists in cache for a name with fewer labels than the qname, send NXDOMAIN without doing a lookup (see RFC 8020)")="dnssec";
4760 ::arg().set("max-generate-steps", "Maximum number of $GENERATE steps when loading a zone from a file")="0";
4761
4762 #ifdef NOD_ENABLED
4763 ::arg().set("new-domain-tracking", "Track newly observed domains (i.e. never seen before).")="no";
4764 ::arg().set("new-domain-log", "Log newly observed domains.")="yes";
4765 ::arg().set("new-domain-lookup", "Perform a DNS lookup newly observed domains as a subdomain of the configured domain")="";
4766 ::arg().set("new-domain-history-dir", "Persist new domain tracking data here to persist between restarts")=string(NODCACHEDIR)+"/nod";
4767 ::arg().set("new-domain-whitelist", "List of domains (and implicitly all subdomains) which will never be considered a new domain")="";
4768 ::arg().set("new-domain-db-size", "Size of the DB used to track new domains in terms of number of cells. Defaults to 67108864")="67108864";
4769 ::arg().set("new-domain-pb-tag", "If protobuf is configured, the tag to use for messages containing newly observed domains. Defaults to 'pdns-nod'")="pdns-nod";
4770 ::arg().set("unique-response-tracking", "Track unique responses (tuple of query name, type and RR).")="no";
4771 ::arg().set("unique-response-log", "Log unique responses")="yes";
4772 ::arg().set("unique-response-history-dir", "Persist unique response tracking data here to persist between restarts")=string(NODCACHEDIR)+"/udr";
4773 ::arg().set("unique-response-db-size", "Size of the DB used to track unique responses in terms of number of cells. Defaults to 67108864")="67108864";
4774 ::arg().set("unique-response-pb-tag", "If protobuf is configured, the tag to use for messages containing unique DNS responses. Defaults to 'pdns-udr'")="pdns-udr";
4775 #endif /* NOD_ENABLED */
4776 ::arg().setCmd("help","Provide a helpful message");
4777 ::arg().setCmd("version","Print version string");
4778 ::arg().setCmd("config","Output blank configuration");
4779 g_log.toConsole(Logger::Info);
4780 ::arg().laxParse(argc,argv); // do a lax parse
4781
4782 string configname=::arg()["config-dir"]+"/recursor.conf";
4783 if(::arg()["config-name"]!="") {
4784 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
4785 s_programname+="-"+::arg()["config-name"];
4786 }
4787 cleanSlashes(configname);
4788
4789 if(!::arg().getCommands().empty()) {
4790 cerr<<"Fatal: non-option";
4791 if (::arg().getCommands().size() > 1) {
4792 cerr<<"s";
4793 }
4794 cerr<<" (";
4795 bool first = true;
4796 for (auto const c : ::arg().getCommands()) {
4797 if (!first) {
4798 cerr<<", ";
4799 }
4800 first = false;
4801 cerr<<c;
4802 }
4803 cerr<<") on the command line, perhaps a '--setting=123' statement missed the '='?"<<endl;
4804 exit(99);
4805 }
4806
4807 if(::arg().mustDo("config")) {
4808 cout<<::arg().configstring()<<endl;
4809 exit(0);
4810 }
4811
4812 if(!::arg().file(configname.c_str()))
4813 g_log<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
4814
4815 ::arg().parse(argc,argv);
4816
4817 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() ) {
4818 g_log<<Logger::Error<<"Using chroot and enabling the API is not possible"<<endl;
4819 exit(EXIT_FAILURE);
4820 }
4821
4822 if (::arg()["socket-dir"].empty()) {
4823 if (::arg()["chroot"].empty())
4824 ::arg().set("socket-dir") = std::string(LOCALSTATEDIR) + "/pdns-recursor";
4825 else
4826 ::arg().set("socket-dir") = "/";
4827 }
4828
4829 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
4830
4831 if(::arg().asNum("threads")==1) {
4832 if (::arg().mustDo("pdns-distributes-queries")) {
4833 g_log<<Logger::Warning<<"Only one thread, no need to distribute queries ourselves"<<endl;
4834 ::arg().set("pdns-distributes-queries")="no";
4835 }
4836 }
4837
4838 if(::arg().mustDo("pdns-distributes-queries") && ::arg().asNum("distributor-threads") <= 0) {
4839 g_log<<Logger::Warning<<"Asked to run with pdns-distributes-queries set but no distributor threads, raising to 1"<<endl;
4840 ::arg().set("distributor-threads")="1";
4841 }
4842
4843 if (!::arg().mustDo("pdns-distributes-queries")) {
4844 ::arg().set("distributor-threads")="0";
4845 }
4846
4847 if(::arg().mustDo("help")) {
4848 cout<<"syntax:"<<endl<<endl;
4849 cout<<::arg().helpstring(::arg()["help"])<<endl;
4850 exit(0);
4851 }
4852 if(::arg().mustDo("version")) {
4853 showProductVersion();
4854 showBuildConfiguration();
4855 exit(0);
4856 }
4857
4858 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
4859
4860 if (logUrgency < Logger::Error)
4861 logUrgency = Logger::Error;
4862 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
4863 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
4864 }
4865 g_log.setLoglevel(logUrgency);
4866 g_log.toConsole(logUrgency);
4867
4868 serviceMain(argc, argv);
4869 }
4870 catch(PDNSException &ae) {
4871 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
4872 ret=EXIT_FAILURE;
4873 }
4874 catch(std::exception &e) {
4875 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
4876 ret=EXIT_FAILURE;
4877 }
4878 catch(...) {
4879 g_log<<Logger::Error<<"any other exception in main: "<<endl;
4880 ret=EXIT_FAILURE;
4881 }
4882
4883 return ret;
4884 }