]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/pdns_recursor.cc
Merge pull request #6923 from Habbie/ds-in-unsigned-zone
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
CommitLineData
288f4aa9 1/*
6edbf68a
PL
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
870a0fe4
AT
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
3e61e7f7 25
76473b92
KM
26#include <netdb.h>
27#include <sys/stat.h>
28#include <unistd.h>
f097141b 29#ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
b47026fd 30#include <boost/container/flat_set.hpp>
f097141b 31#endif
2470b36e 32#include "ws-recursor.hh"
49a699c4 33#include <pthread.h>
3ea54bf0 34#include "recpacketcache.hh"
3ddb9247 35#include "utility.hh"
51e2144e 36#include "dns_random.hh"
d1b28475
KM
37#ifdef HAVE_LIBSODIUM
38#include <sodium.h>
39#endif
3afde9b2 40#include "opensslsigners.hh"
288f4aa9
BH
41#include <iostream>
42#include <errno.h>
81859ba5 43#include <boost/static_assert.hpp>
288f4aa9
BH
44#include <map>
45#include <set>
97bb160b 46#include "recursor_cache.hh"
38c9ceaa 47#include "cachecleaner.hh"
288f4aa9 48#include <stdio.h>
c75a6a9e 49#include <signal.h>
288f4aa9 50#include <stdlib.h>
bb4bdbaf 51#include "misc.hh"
288f4aa9
BH
52#include "mtasker.hh"
53#include <utility>
288f4aa9
BH
54#include "arguments.hh"
55#include "syncres.hh"
88def049
BH
56#include <fcntl.h>
57#include <fstream>
3e61e7f7 58#include "sortlist.hh"
5c633640
BH
59#include "sstuff.hh"
60#include <boost/tuple/tuple.hpp>
61#include <boost/tuple/tuple_comparison.hpp>
72df400f 62#include <boost/shared_array.hpp>
7f1fa77d 63#include <boost/function.hpp>
5605c067 64#include <boost/algorithm/string.hpp>
8f7473d7 65#ifdef MALLOC_TRACE
66#include "malloctrace.hh"
67#endif
40a3dd64 68#include <netinet/tcp.h>
ea634573
BH
69#include "dnsparser.hh"
70#include "dnswriter.hh"
71#include "dnsrecords.hh"
f814d7c8 72#include "zoneparser-tng.hh"
1d5b3ce6 73#include "rec_channel.hh"
aaacf7f2 74#include "logger.hh"
c8ddb7c2 75#include "iputils.hh"
09e6702a 76#include "mplexer.hh"
c038218b 77#include "config.h"
808c5ef7 78#include "lua-recursor4.hh"
ba1a571d 79#include "version.hh"
79332bff 80#include "responsestats.hh"
d67620e4 81#include "secpoll-recursor.hh"
c5c066bf 82#include "dnsname.hh"
644dd1da 83#include "filterpo.hh"
84#include "rpzloader.hh"
b3f0ed10 85#include "validate-recursor.hh"
f3c18728 86#include "rec-lua-conf.hh"
5c3b5e7f 87#include "ednsoptions.hh"
85c7ca75 88#include "gettime.hh"
af1377b7
NC
89#ifdef NOD_ENABLED
90#include "nod.hh"
91#endif /* NOD_ENABLED */
f3c18728 92
d9d3f9c1 93#include "rec-protobuf.hh"
d705aad9 94#include "rec-snmp.hh"
aa7929a3 95
6b6720de
PL
96#ifdef HAVE_SYSTEMD
97#include <systemd/sd-daemon.h>
98#endif
99
d187038c
RG
100#include "namespaces.hh"
101
5cc8371b
RG
102#include "xpf.hh"
103
d187038c
RG
104typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
105
f26bf547 106static thread_local std::shared_ptr<RecursorLua4> t_pdl;
b243ca3b 107static thread_local unsigned int t_id = 0;
f26bf547
RG
108static thread_local std::shared_ptr<Regex> t_traceRegex;
109static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
63341e8d
RG
110#ifdef HAVE_PROTOBUF
111static thread_local std::shared_ptr<RemoteLogger> t_protobufServer{nullptr};
112static thread_local std::shared_ptr<RemoteLogger> t_outgoingProtobufServer{nullptr};
113#endif /* HAVE_PROTOBUF */
f26bf547
RG
114
115thread_local std::unique_ptr<MT_t> MT; // the big MTasker
116thread_local std::unique_ptr<MemRecursorCache> t_RC;
117thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
3337c2f7 118thread_local FDMultiplexer* t_fdm{nullptr};
be9078b3 119thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes, t_bogusremotes;
66f2e6ad 120thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring, t_bogusqueryring;
f26bf547 121thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
aa7929a3 122#ifdef HAVE_PROTOBUF
f26bf547 123thread_local std::unique_ptr<boost::uuids::random_generator> t_uuidGenerator;
aa7929a3 124#endif
af1377b7
NC
125#ifdef NOD_ENABLED
126thread_local std::shared_ptr<nod::NODDB> t_nodDBp;
127#endif /* NOD_ENABLED */
d187038c 128__thread struct timeval g_now; // timestamp, updated (too) frequently
d7dae798 129
b243ca3b
RG
130typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
131
d7dae798 132// for communicating with our threads
b243ca3b
RG
133// effectively readonly after startup
134struct RecThreadInfo
135{
136 struct ThreadPipeSet
137 {
138 int writeToThread{-1};
139 int readToThread{-1};
140 int writeFromThread{-1};
141 int readFromThread{-1};
142 int writeQueriesToThread{-1}; // this one is non-blocking
143 int readQueriesToThread{-1};
144 };
145
adb6cd72 146 /* FD corresponding to TCP sockets this thread is listening
c47f201b 147 on.
adb6cd72
RG
148 These FDs are also in deferredAdds when we have one
149 socket per listener, and in g_deferredAdds instead. */
150 std::set<int> tcpSockets;
b243ca3b
RG
151 /* FD corresponding to listening sockets if we have one socket per
152 listener (with reuseport), otherwise all listeners share the
153 same FD and g_deferredAdds is then used instead */
154 deferredAdd_t deferredAdds;
155 struct ThreadPipeSet pipes;
156 std::thread thread;
157 /* handle the web server, carbon, statistics and the control channel */
158 bool isHandler{false};
159 /* accept incoming queries (and distributes them to the workers if pdns-distributes-queries is set) */
160 bool isListener{false};
161 /* process queries */
162 bool isWorker{false};
49a699c4 163};
810ff705 164
b243ca3b
RG
165/* first we have the handler thread, t_id == 0 (some other
166 helper threads like SNMP might have t_id == 0 as well)
167 then the distributor threads if any
168 and finally the workers */
169static std::vector<RecThreadInfo> s_threadInfos;
170/* without reuseport, all listeners share the same sockets */
171static deferredAdd_t g_deferredAdds;
faf580f5 172
d187038c
RG
173typedef vector<int> tcpListenSockets_t;
174typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
3ea54bf0 175
d187038c 176static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
d187038c 177static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
d187038c
RG
178static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
179static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
180static AtomicCounter counter;
9065eb05 181static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
f26bf547 182static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
5cc8371b 183static NetmaskGroup g_XPFAcl;
d187038c 184static size_t g_tcpMaxQueriesPerConn;
a5886e6a 185static size_t s_maxUDPQueriesPerRound;
d187038c
RG
186static uint64_t g_latencyStatSize;
187static uint32_t g_disthashseed;
188static unsigned int g_maxTCPPerClient;
d187038c 189static unsigned int g_maxMThreads;
b243ca3b 190static unsigned int g_numDistributorThreads;
d187038c
RG
191static unsigned int g_numWorkerThreads;
192static int g_tcpTimeout;
193static uint16_t g_udpTruncationThreshold;
59cb4a79 194static uint16_t g_xpfRRCode{0};
d187038c
RG
195static std::atomic<bool> statsWanted;
196static std::atomic<bool> g_quiet;
197static bool g_logCommonErrors;
198static bool g_anyToTcp;
b243ca3b 199static bool g_weDistributeQueries; // if true, 1 or more threads listen on the incoming query sockets and distribute them to workers
810ff705 200static bool g_reusePort{false};
00b8cadc 201static bool g_gettagNeedsEDNSOptions{false};
0ec489bf 202static time_t g_statisticsInterval;
9065eb05 203static bool g_useIncomingECS;
a6f7f5fe 204std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
af1377b7
NC
205#ifdef NOD_ENABLED
206static bool g_nodEnabled;
207static DNSName g_nodLookupDomain;
208static bool g_nodLog;
209static SuffixMatchNode g_nodDomainWL;
210#endif /* NOD_ENABLED */
f097141b 211#ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
bf6f28ca 212static boost::container::flat_set<uint16_t> s_avoidUdpSourcePorts;
f097141b
CHB
213#else
214static std::set<uint16_t> s_avoidUdpSourcePorts;
215#endif
bf6f28ca
CHB
216static uint16_t s_minUdpSourcePort;
217static uint16_t s_maxUdpSourcePort;
49a699c4 218
b243ca3b 219RecursorControlChannel s_rcc; // only active in the handler thread
d187038c 220RecursorStats g_stats;
2d733c0f 221string s_programname="pdns_recursor";
d187038c 222string s_pidfname;
c1c29961 223bool g_lowercaseOutgoing;
bf19ccfd 224unsigned int g_networkTimeoutMsec;
d187038c
RG
225unsigned int g_numThreads;
226uint16_t g_outgoingEDNSBufsize;
98d36505 227bool g_logRPZChanges{false};
c3828c03 228
12cd44ee 229#define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
2fe3354d 230#define LOCAL_NETS_INVERSE "!127.0.0.0/8, !10.0.0.0/8, !100.64.0.0/10, !169.254.0.0/16, !192.168.0.0/16, !172.16.0.0/12, !::1/128, !fc00::/7, !fe80::/10"
12cd44ee 231// Bad Nets taken from both:
3ddb9247 232// http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
12cd44ee 233// and
234// http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
235// where such a network may not be considered a valid destination
236#define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
237#define DONT_QUERY LOCAL_NETS ", " BAD_NETS
49a699c4 238
d7dae798 239//! used to send information to a newborn mthread
ea634573 240struct DNSComboWriter {
78f56b38 241 DNSComboWriter(const std::string& query, const struct timeval& now): d_mdp(true, query), d_now(now)
2749c3fe
RG
242 {
243 }
5cc8371b 244
78f56b38 245 DNSComboWriter(const std::string& query, const struct timeval& now, std::vector<std::string>&& policyTags, LuaContext::LuaObject&& data): d_mdp(true, query), d_now(now), d_policyTags(std::move(policyTags)), d_data(std::move(data))
5164bac3
RG
246 {
247 }
248
5cc8371b
RG
249 void setRemote(const ComboAddress& sa)
250 {
251 d_remote=sa;
252 }
253
254 void setSource(const ComboAddress& sa)
ea634573 255 {
5cc8371b 256 d_source=sa;
ea634573
BH
257 }
258
b71b60ee 259 void setLocal(const ComboAddress& sa)
260 {
261 d_local=sa;
262 }
263
5cc8371b
RG
264 void setDestination(const ComboAddress& sa)
265 {
266 d_destination=sa;
267 }
b71b60ee 268
ea634573
BH
269 void setSocket(int sock)
270 {
271 d_socket=sock;
272 }
a1754c6a
BH
273
274 string getRemote() const
275 {
5cc8371b
RG
276 if (d_source == d_remote) {
277 return d_source.toStringWithPort();
278 }
279 return d_source.toStringWithPort() + " (proxied by " + d_remote.toStringWithPort() + ")";
a1754c6a
BH
280 }
281
5cc8371b 282 MOADNSParser d_mdp;
c9e9e5e0 283 struct timeval d_now;
5cc8371b
RG
284 /* Remote client, might differ from d_source
285 in case of XPF, in which case d_source holds
286 the IP of the client and d_remote of the proxy
287 */
288 ComboAddress d_remote;
289 ComboAddress d_source;
290 /* Destination address, might differ from
291 d_destination in case of XPF, in which case
292 d_destination holds the IP of the proxy and
293 d_local holds our own. */
294 ComboAddress d_local;
295 ComboAddress d_destination;
aa7929a3
RG
296#ifdef HAVE_PROTOBUF
297 boost::uuids::uuid d_uuid;
67e31ebe 298 string d_requestorId;
590388d2 299 string d_deviceId;
aa7929a3 300#endif
5164bac3
RG
301 std::vector<std::string> d_policyTags;
302 LuaContext::LuaObject d_data;
b40562da 303 EDNSSubnetOpts d_ednssubnet;
5164bac3 304 shared_ptr<TCPConnection> d_tcpConnection;
ea634573 305 int d_socket;
b673817a 306 unsigned int d_tag{0};
e9f63d47 307 uint32_t d_qhash{0};
70fb28d9
RG
308 uint32_t d_ttlCap{std::numeric_limits<uint32_t>::max()};
309 bool d_variable{false};
5164bac3
RG
310 bool d_ecsFound{false};
311 bool d_ecsParsed{false};
312 bool d_tcp;
ea634573
BH
313};
314
06857845
RG
315MT_t* getMT()
316{
317 return MT ? MT.get() : nullptr;
318}
ea634573 319
288f4aa9
BH
320ArgvMap &arg()
321{
322 static ArgvMap theArg;
323 return theArg;
324}
4ef015cd 325
8fb594ba 326unsigned int getRecursorThreadId()
b4015453 327{
30da2030 328 return t_id;
b4015453 329}
09e6702a 330
30ee601a
RG
331int getMTaskerTID()
332{
333 return MT->getTid();
334}
335
b243ca3b
RG
336static bool isDistributorThread()
337{
338 if (t_id == 0) {
339 return false;
340 }
341
342 return g_weDistributeQueries && s_threadInfos.at(t_id).isListener;
343}
344
345static bool isHandlerThread()
346{
347 if (t_id == 0) {
348 return true;
349 }
350
351 return s_threadInfos.at(t_id).isHandler;
352}
353
d187038c 354static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 355
50c81227 356// -1 is error, 0 is timeout, 1 is success
3ddb9247 357int asendtcp(const string& data, Socket* sock)
5c633640
BH
358{
359 PacketID pident;
360 pident.sock=sock;
361 pident.outMSG=data;
3ddb9247 362
bb4bdbaf 363 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
50c81227 364 string packet;
5c633640 365
5b0ddd18 366 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
23db0a09 367
9170fbaf 368 if(!ret || ret==-1) { // timeout
bb4bdbaf 369 t_fdm->removeWriteFD(sock->getHandle());
5c633640 370 }
50c81227
BH
371 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
372 return -1;
373 }
9170fbaf 374 return ret;
5c633640
BH
375}
376
d187038c 377static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 378
9170fbaf 379// -1 is error, 0 is timeout, 1 is success
a683e8bd 380int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
288f4aa9 381{
50c81227 382 data.clear();
5c633640
BH
383 PacketID pident;
384 pident.sock=sock;
385 pident.inNeeded=len;
825fa717 386 pident.inIncompleteOkay=incompleteOkay;
bb4bdbaf 387 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
5c633640 388
bb4bdbaf 389 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
9170fbaf 390 if(!ret || ret==-1) { // timeout
bb4bdbaf 391 t_fdm->removeReadFD(sock->getHandle());
288f4aa9 392 }
50c81227
BH
393 else if(data.empty()) {// error, EOF or other
394 return -1;
395 }
396
9170fbaf 397 return ret;
288f4aa9
BH
398}
399
d187038c 400static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
4465e941 401{
fba1e944 402 PacketID pident=*any_cast<PacketID>(&var);
4465e941 403 char resp[512];
7c77ce63
RG
404 ComboAddress fromaddr;
405 socklen_t addrlen=sizeof(fromaddr);
406
407 ssize_t ret=recvfrom(fd, resp, sizeof(resp), 0, (sockaddr *)&fromaddr, &addrlen);
408 if (fromaddr != pident.remote) {
e6a9dde5 409 g_log<<Logger::Notice<<"Response received from the wrong remote host ("<<fromaddr.toStringWithPort()<<" instead of "<<pident.remote.toStringWithPort()<<"), discarding"<<endl;
7c77ce63
RG
410
411 }
412
4465e941 413 t_fdm->removeReadFD(fd);
414 if(ret >= 0) {
a683e8bd 415 string data(resp, (size_t) ret);
fba1e944 416 MT->sendEvent(pident, &data);
4465e941 417 }
418 else {
fba1e944 419 string empty;
420 MT->sendEvent(pident, &empty);
421 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
4465e941 422 }
423}
fba1e944 424string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
4465e941 425{
4465e941 426 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
427 s.setNonBlocking();
428 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
429
430 s.bind(local);
431 s.connect(dest);
4465e941 432 s.send(query);
433
434 PacketID pident;
435 pident.sock=&s;
7c77ce63 436 pident.remote=dest;
4465e941 437 pident.type=0;
fba1e944 438 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
4465e941 439
440 string data;
fba1e944 441
4465e941 442 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
fba1e944 443
4465e941 444 if(!ret || ret==-1) { // timeout
4465e941 445 t_fdm->removeReadFD(s.getHandle());
446 }
447 else if(data.empty()) {// error, EOF or other
fba1e944 448 // we could special case this
4465e941 449 return data;
450 }
4465e941 451 return data;
452}
453
d7dae798 454//! pick a random query local address
1652a63e 455ComboAddress getQueryLocalAddress(int family, uint16_t port)
5a38281c 456{
1652a63e 457 ComboAddress ret;
5a38281c 458 if(family==AF_INET) {
3ddb9247 459 if(g_localQueryAddresses4.empty())
1652a63e 460 ret = g_local4;
3ddb9247 461 else
1652a63e
BH
462 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
463 ret.sin4.sin_port = htons(port);
5a38281c
BH
464 }
465 else {
466 if(g_localQueryAddresses6.empty())
1652a63e
BH
467 ret = g_local6;
468 else
469 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
3ddb9247 470
1652a63e 471 ret.sin6.sin6_port = htons(port);
5a38281c 472 }
1652a63e 473 return ret;
5a38281c 474}
4ef015cd 475
d187038c 476static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
09e6702a 477
d187038c 478static void setSocketBuffer(int fd, int optname, uint32_t size)
d7dae798
BH
479{
480 uint32_t psize=0;
481 socklen_t len=sizeof(psize);
3ddb9247 482
d7dae798 483 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
e6a9dde5 484 g_log<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
3ddb9247 485 return;
d7dae798
BH
486 }
487
488 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
e6a9dde5 489 g_log<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
d7dae798
BH
490}
491
492
493static void setSocketReceiveBuffer(int fd, uint32_t size)
494{
495 setSocketBuffer(fd, SO_RCVBUF, size);
496}
497
498static void setSocketSendBuffer(int fd, uint32_t size)
499{
500 setSocketBuffer(fd, SO_SNDBUF, size);
501}
502
503
4ef015cd
BH
504// you can ask this class for a UDP socket to send a query from
505// this socket is not yours, don't even think about deleting it
506// but after you call 'returnSocket' on it, don't assume anything anymore
507class UDPClientSocks
508{
4ef015cd 509 unsigned int d_numsocks;
4ef015cd 510public:
e2642526 511 UDPClientSocks() : d_numsocks(0)
4ef015cd
BH
512 {
513 }
514
996c89cc 515 typedef set<int> socks_t;
4ef015cd
BH
516 socks_t d_socks;
517
2ee280cf 518 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
d8f6d49f 519 int getSocket(const ComboAddress& toaddr, int* fd)
4ef015cd 520 {
d8f6d49f
BH
521 *fd=makeClientSocket(toaddr.sin4.sin_family);
522 if(*fd < 0) // temporary error - receive exception otherwise
2ee280cf 523 return -2;
d8f6d49f
BH
524
525 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
526 int err = errno;
41ff43f8 527 // returnSocket(*fd);
a7b68ae7
RG
528 try {
529 closesocket(*fd);
530 }
531 catch(const PDNSException& e) {
e6a9dde5 532 g_log<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
a7b68ae7
RG
533 }
534
d8f6d49f 535 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
4957a608 536 return -2;
998a4334 537 return -1;
d8f6d49f 538 }
998a4334 539
d8f6d49f 540 d_socks.insert(*fd);
998a4334 541 d_numsocks++;
d8f6d49f 542 return 0;
4ef015cd
BH
543 }
544
095c3045
BH
545 void returnSocket(int fd)
546 {
547 socks_t::iterator i=d_socks.find(fd);
34801ab1 548 if(i==d_socks.end()) {
335da0ba 549 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd)+") not in the pool");
34801ab1 550 }
bb4bdbaf 551 returnSocketLocked(i);
095c3045
BH
552 }
553
4ef015cd 554 // return a socket to the pool, or simply erase it
bb4bdbaf 555 void returnSocketLocked(socks_t::iterator& i)
4ef015cd 556 {
600fc20b 557 if(i==d_socks.end()) {
3f81d239 558 throw PDNSException("Trying to return a socket not in the pool");
600fc20b 559 }
80baf329 560 try {
bb4bdbaf 561 t_fdm->removeReadFD(*i);
80baf329
BH
562 }
563 catch(FDMultiplexerException& e) {
bb4bdbaf 564 // we sometimes return a socket that has not yet been assigned to t_fdm
80baf329 565 }
a7b68ae7
RG
566 try {
567 closesocket(*i);
568 }
569 catch(const PDNSException& e) {
e6a9dde5 570 g_log<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
a7b68ae7 571 }
3ddb9247 572
998a4334
BH
573 d_socks.erase(i++);
574 --d_numsocks;
4ef015cd 575 }
d8f6d49f
BH
576
577 // returns -1 for errors which might go away, throws for ones that won't
bb4bdbaf 578 static int makeClientSocket(int family)
d8f6d49f 579 {
a683e8bd 580 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
42c235e5 581
d8f6d49f
BH
582 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
583 return ret;
3ddb9247
PD
584
585 if(ret<0)
335da0ba 586 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
36855b53 587
7eb73ffa 588 // setCloseOnExec(ret); // we're not going to exec
5a38281c 589
d8f6d49f 590 int tries=10;
3aa91c3e 591 ComboAddress sin;
d8f6d49f 592 while(--tries) {
1652a63e 593 uint16_t port;
3ddb9247 594
d8f6d49f 595 if(tries==1) // fall back to kernel 'random'
4957a608 596 port = 0;
bf6f28ca
CHB
597 else {
598 do {
599 port = s_minUdpSourcePort + dns_random(s_maxUdpSourcePort - s_minUdpSourcePort + 1);
600 }
601 while (s_avoidUdpSourcePorts.count(port));
602 }
5a38281c 603
3aa91c3e 604 sin=getQueryLocalAddress(family, port); // does htons for us
5a38281c 605
3ddb9247 606 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
4957a608 607 break;
d8f6d49f
BH
608 }
609 if(!tries)
3aa91c3e 610 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
3ddb9247 611
3897b9e1 612 setNonBlocking(ret);
d8f6d49f
BH
613 return ret;
614 }
49a699c4
BH
615};
616
f26bf547 617static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
4ef015cd 618
288f4aa9 619/* these two functions are used by LWRes */
34801ab1 620// -2 is OS error, -1 is error that depends on the remote, > 0 is success
a683e8bd 621int asendto(const char *data, size_t len, int flags,
3ddb9247 622 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
288f4aa9 623{
34801ab1
BH
624
625 PacketID pident;
787e5eab
BH
626 pident.domain = domain;
627 pident.remote = toaddr;
628 pident.type = qtype;
34801ab1
BH
629
630 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
631 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
632
633 for(; chain.first != chain.second; chain.first++) {
634 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
e27e91a8 635 /*
4665c31e
BH
636 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
637 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
4957a608 638 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
e27e91a8 639 */
34801ab1
BH
640 chain.first->key.chain.insert(id); // we can chain
641 *fd=-1; // gets used in waitEvent / sendEvent later on
642 return 1;
643 }
644 }
645
49a699c4 646 int ret=t_udpclientsocks->getSocket(toaddr, fd);
d8f6d49f
BH
647 if(ret < 0)
648 return ret;
34801ab1 649
998a4334
BH
650 pident.fd=*fd;
651 pident.id=id;
3ddb9247 652
bb4bdbaf
BH
653 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
654 ret = send(*fd, data, len, 0);
655
5b0ddd18 656 int tmp = errno;
bb4bdbaf 657
7302ed0a 658 if(ret < 0)
49a699c4 659 t_udpclientsocks->returnSocket(*fd);
bb4bdbaf 660
5b0ddd18 661 errno = tmp; // this is for logging purposes only
7302ed0a 662 return ret;
288f4aa9
BH
663}
664
9170fbaf 665// -1 is error, 0 is timeout, 1 is success
f128d20d 666int arecvfrom(std::string& packet, int flags, const ComboAddress& fromaddr, size_t *d_len,
c5c066bf 667 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
288f4aa9 668{
0d5f0a9f 669 static optional<unsigned int> nearMissLimit;
3ddb9247 670 if(!nearMissLimit)
0d5f0a9f
BH
671 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
672
288f4aa9 673 PacketID pident;
4ef015cd 674 pident.fd=fd;
288f4aa9 675 pident.id=id;
0d5f0a9f 676 pident.domain=domain;
787e5eab 677 pident.type = qtype;
996c89cc 678 pident.remote=fromaddr;
b636533b 679
5b0ddd18 680 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
34801ab1 681
9170fbaf 682 if(ret > 0) {
996c89cc 683 if(packet.empty()) // means "error"
3ddb9247 684 return -1;
998a4334 685
a683e8bd 686 *d_len=packet.size();
f128d20d 687
0d5f0a9f 688 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
e6a9dde5 689 g_log<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
0d5f0a9f 690 g_stats.spoofCount++;
35ce8576
BH
691 return -1;
692 }
288f4aa9 693 }
09e6702a 694 else {
34801ab1 695 if(fd >= 0)
49a699c4 696 t_udpclientsocks->returnSocket(fd);
09e6702a 697 }
9170fbaf 698 return ret;
288f4aa9
BH
699}
700
88def049
BH
701static void writePid(void)
702{
191f2e47 703 if(!::arg().mustDo("write-pid"))
704 return;
18e7758c 705 ofstream of(s_pidfname.c_str(), std::ios_base::app);
88def049 706 if(of)
705f31ae 707 of<< Utility::getpid() <<endl;
88def049 708 else
e6a9dde5 709 g_log<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
88def049
BH
710}
711
2749c3fe 712TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : data(2, 0), d_remote(addr), d_fd(fd)
3ddb9247
PD
713{
714 ++s_currentConnections;
cd989c87 715 (*t_tcpClientCounts)[d_remote]++;
0e408828 716}
cd989c87
BH
717
718TCPConnection::~TCPConnection()
0e408828 719{
a7b68ae7
RG
720 try {
721 if(closesocket(d_fd) < 0)
e6a9dde5 722 g_log<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
a7b68ae7
RG
723 }
724 catch(const PDNSException& e) {
e6a9dde5 725 g_log<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
a7b68ae7
RG
726 }
727
3ddb9247 728 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
cd989c87 729 t_tcpClientCounts->erase(d_remote);
1bc9e6bd 730 --s_currentConnections;
0e408828 731}
0e9d9ce2 732
3ddb9247 733AtomicCounter TCPConnection::s_currentConnections;
d187038c
RG
734
735static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
6dcd28c3 736
92011b8f 737// the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
d187038c 738static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
2cc13433 739{
92011b8f 740 if(packetsize > 1000 && t_largeanswerremotes)
741 t_largeanswerremotes->push_back(remote);
2cc13433
BH
742 switch(res) {
743 case RCode::ServFail:
92011b8f 744 if(t_servfailremotes) {
745 t_servfailremotes->push_back(remote);
5af86fdc 746 if(query && t_servfailqueryring) // packet cache
92011b8f 747 t_servfailqueryring->push_back(make_pair(*query, qtype));
748 }
2cc13433
BH
749 g_stats.servFails++;
750 break;
751 case RCode::NXDomain:
752 g_stats.nxDomains++;
753 break;
754 case RCode::NoError:
755 g_stats.noErrors++;
756 break;
757 }
758}
759
5164bac3 760static string makeLoginfo(const DNSComboWriter* dc)
a903b39c 761try
762{
5cc8371b 763 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->getRemote())+")";
a903b39c 764}
765catch(...)
766{
767 return "Exception making error message for exception";
768}
769
aa7929a3 770#ifdef HAVE_PROTOBUF
590388d2 771static void protobufLogQuery(const std::shared_ptr<RemoteLogger>& logger, uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId)
aa7929a3 772{
e1c8a4bb
RG
773 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
774 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
775 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
a94bc5d7 776 message.setEDNSSubnet(ednssubnet, ednssubnet.isIpv4() ? maskV4 : maskV6);
67e31ebe 777 message.setRequestorId(requestorId);
590388d2 778 message.setDeviceId(deviceId);
02b47f43 779
02b47f43 780 if (!policyTags.empty()) {
d9d3f9c1 781 message.setPolicyTags(policyTags);
02b47f43 782 }
aa7929a3 783
d9d3f9c1 784// cerr <<message.toDebugString()<<endl;
aa7929a3 785 std::string str;
d9d3f9c1 786 message.serialize(str);
aa7929a3 787 logger->queueData(str);
aa7929a3
RG
788}
789
d9d3f9c1 790static void protobufLogResponse(const std::shared_ptr<RemoteLogger>& logger, const RecProtoBufMessage& message)
aa7929a3 791{
d9d3f9c1 792// cerr <<message.toDebugString()<<endl;
aa7929a3 793 std::string str;
d9d3f9c1 794 message.serialize(str);
aa7929a3 795 logger->queueData(str);
aa7929a3
RG
796}
797#endif
798
53508135
PL
799/**
800 * Chases the CNAME provided by the PolicyCustom RPZ policy.
801 *
802 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
803 * @param qtype: The QType of the original query
804 * @param sr: A SyncRes
805 * @param res: An integer that will contain the RCODE of the lookup we do
806 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
807 */
d187038c 808static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
53508135
PL
809{
810 if (spoofed.d_type == QType::CNAME) {
30ee601a
RG
811 bool oldWantsRPZ = sr.getWantsRPZ();
812 sr.setWantsRPZ(false);
53508135
PL
813 vector<DNSRecord> ans;
814 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, 1, ans);
815 for (const auto& rec : ans) {
816 if(rec.d_place == DNSResourceRecord::ANSWER) {
817 ret.push_back(rec);
818 }
819 }
820 // Reset the RPZ state of the SyncRes
30ee601a 821 sr.setWantsRPZ(oldWantsRPZ);
53508135
PL
822 }
823}
824
70fb28d9 825static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, uint32_t ttlCap, const uint16_t maxAnswerSize)
97c6d7e5 826{
70fb28d9 827 pw.startRecord(rec.d_name, rec.d_type, (rec.d_ttl > ttlCap ? ttlCap : rec.d_ttl), rec.d_class, rec.d_place);
97c6d7e5
RG
828
829 if(rec.d_type != QType::OPT) // their TTL ain't real
830 minTTL = min(minTTL, rec.d_ttl);
831
832 rec.d_content->toPacket(pw);
833 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
834 pw.rollback();
835 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
836 pw.getHeader()->tc=1;
837 pw.truncate();
838 }
839 return false;
840 }
841
842 return true;
843}
844
63341e8d
RG
845#ifdef HAVE_PROTOBUF
846static std::shared_ptr<RemoteLogger> startProtobufServer(const ProtobufExportConfig& config, uint64_t generation)
847{
848 std::shared_ptr<RemoteLogger> result = nullptr;
849 try {
850 result = std::make_shared<RemoteLogger>(config.server, config.timeout, config.maxQueuedEntries, config.reconnectWaitTime, config.asyncConnect);
851 result->setGeneration(generation);
852 }
853 catch(const std::exception& e) {
854 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<config.server<<": "<<e.what()<<endl;
855 }
856 catch(const PDNSException& e) {
857 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<config.server<<": "<<e.reason<<endl;
858 }
859
860 return result;
861}
862
863static bool checkProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
864{
865 if (!luaconfsLocal->protobufExportConfig.enabled) {
866 if (t_protobufServer != nullptr) {
867 t_protobufServer->stop();
868 t_protobufServer = nullptr;
869 }
870
871 return false;
872 }
873
874 /* if the server was not running, or if it was running according to a
875 previous configuration */
876 if (t_protobufServer == nullptr ||
877 t_protobufServer->getGeneration() < luaconfsLocal->generation) {
878
879 if (t_protobufServer) {
880 t_protobufServer->stop();
881 }
882
883 t_protobufServer = startProtobufServer(luaconfsLocal->protobufExportConfig, luaconfsLocal->generation);
884 }
885
886 return true;
887}
888
889static bool checkOutgoingProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
890{
891 if (!luaconfsLocal->outgoingProtobufExportConfig.enabled) {
892 if (t_outgoingProtobufServer != nullptr) {
893 t_outgoingProtobufServer->stop();
894 t_outgoingProtobufServer = nullptr;
895 }
896
897 return false;
898 }
899
900 /* if the server was not running, or if it was running according to a
901 previous configuration */
902 if (t_outgoingProtobufServer == nullptr ||
903 t_outgoingProtobufServer->getGeneration() < luaconfsLocal->generation) {
904
905 if (t_outgoingProtobufServer) {
906 t_outgoingProtobufServer->stop();
907 }
908
909 t_outgoingProtobufServer = startProtobufServer(luaconfsLocal->outgoingProtobufExportConfig, luaconfsLocal->generation);
910 }
911
912 return true;
913}
914#endif /* HAVE_PROTOBUF */
915
af1377b7
NC
916#ifdef NOD_ENABLED
917static void nodCheckNewDomain(const DNSName& dname)
918{
919 static const QType qt(QType::A);
920 static const uint16_t qc(QClass::IN);
921 // First check the (sub)domain isn't whitelisted for NOD purposes
922 if (!g_nodDomainWL.check(dname)) {
923 // Now check the NODDB (note this is probablistic so can have FNs/FPs)
924 if (t_nodDBp && t_nodDBp->isNewDomain(dname)) {
925 if (g_nodLog) {
926 // This should probably log to a dedicated log file
927 g_log<<Logger::Notice<<"Newly observed domain nod="<<dname.toLogString()<<endl;
928 }
929 if (!(g_nodLookupDomain.isRoot())) {
930 // Send a DNS A query to <domain>.g_nodLookupDomain
931 DNSName qname = dname;
932 vector<DNSRecord> dummy;
933 qname += g_nodLookupDomain;
934 directResolve(qname, qt, qc, dummy);
935 }
936 }
937 }
938}
939
940static void nodAddDomain(const DNSName& dname)
941{
942 // Don't bother adding domains on the nod whitelist
943 if (!g_nodDomainWL.check(dname)) {
944 if (t_nodDBp) {
945 // This keeps the nod info up to date
946 t_nodDBp->addDomain(dname);
947 }
948 }
949}
950#endif /* NOD_ENABLED */
951
d187038c 952static void startDoResolve(void *p)
288f4aa9 953{
7b1469bb 954 DNSComboWriter* dc=(DNSComboWriter *)p;
288f4aa9 955 try {
5af86fdc
RG
956 if (t_queryring)
957 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
92011b8f 958
32015748 959 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
7f7b8d55 960 EDNSOpts edo;
5164bac3 961 std::vector<pair<uint16_t, string> > ednsOpts;
eb9444be 962 bool variableAnswer = dc->d_variable;
8e079f3a 963 bool haveEDNS=false;
f1db0de2
PL
964 DNSPacketWriter::optvect_t returnedEdnsOptions; // Here we stuff all the options for the return packet
965 uint8_t ednsExtRCode = 0;
8e079f3a 966 if(getEDNSOpts(dc->d_mdp, &edo)) {
f1db0de2
PL
967 haveEDNS=true;
968 if (edo.d_version != 0) {
969 ednsExtRCode = ERCode::BADVERS;
970 }
971
32015748
RG
972 if(!dc->d_tcp) {
973 /* rfc6891 6.2.3:
974 "Values lower than 512 MUST be treated as equal to 512."
975 */
976 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
977 }
5164bac3 978 ednsOpts = edo.d_options;
8e079f3a 979 haveEDNS=true;
3af35968 980 maxanswersize -= 11; // EDNS header size
b40562da 981
1f691b94
PL
982 for (const auto& o : edo.d_options) {
983 if (o.first == EDNSOptionCode::ECS && g_useIncomingECS && !dc->d_ecsParsed) {
984 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
985 } else if (o.first == EDNSOptionCode::NSID) {
f1db0de2 986 const static string mode_server_id = ::arg()["server-id"];
8a42919a
PL
987 if(mode_server_id != "disabled" && !mode_server_id.empty() &&
988 maxanswersize > (2 + 2 + mode_server_id.size())) {
f1db0de2
PL
989 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::NSID, mode_server_id));
990 variableAnswer = true; // Can't packetcache an answer with NSID
991 // Option Code and Option Length are both 2
992 maxanswersize -= 2 + 2 + mode_server_id.size();
993 }
b40562da
RG
994 }
995 }
10321a98 996 }
b40562da
RG
997 /* perhaps there was no EDNS or no ECS but by now we looked */
998 dc->d_ecsParsed = true;
e325f20c 999 vector<DNSRecord> ret;
ea634573 1000 vector<uint8_t> packet;
b23b8614 1001
ad42489c 1002 auto luaconfsLocal = g_luaconfs.getLocal();
b8470add
PL
1003 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
1004 bool wantsRPZ(true);
1fbc6dc5 1005 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
f1c7929a 1006 bool logResponse = false;
aa7929a3 1007#ifdef HAVE_PROTOBUF
63341e8d 1008 if (checkProtobufExport(luaconfsLocal)) {
f1c7929a 1009 logResponse = t_protobufServer && luaconfsLocal->protobufExportConfig.logResponses;
5cc8371b 1010 Netmask requestorNM(dc->d_source, dc->d_source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
e1c8a4bb 1011 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
d362f7c1
RG
1012 pbMessage = RecProtoBufMessage(RecProtoBufMessage::Response);
1013 pbMessage->update(dc->d_uuid, &requestor, &dc->d_destination, dc->d_tcp, dc->d_mdp.d_header.id);
1014 pbMessage->setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1015 pbMessage->setQuestion(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
d9d3f9c1
RG
1016 }
1017#endif /* HAVE_PROTOBUF */
ad42489c 1018
3ddb9247 1019 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
ea634573
BH
1020
1021 pw.getHeader()->aa=0;
1022 pw.getHeader()->ra=1;
c154c8a4 1023 pw.getHeader()->qr=1;
bb4bdbaf 1024 pw.getHeader()->tc=0;
ea634573 1025 pw.getHeader()->id=dc->d_mdp.d_header.id;
10321a98 1026 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
57769f13 1027 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
ea634573 1028
70fb28d9
RG
1029 /* This is the lowest TTL seen in the records of the response,
1030 so we can't cache it for longer than this value.
1031 If we have a TTL cap, this value can't be larger than the
1032 cap no matter what. */
1033 uint32_t minTTL = dc->d_ttlCap;
904d3219
PD
1034
1035 SyncRes sr(dc->d_now);
0c43f455 1036
2e921ec6 1037 bool DNSSECOK=false;
3457a2a0 1038 if(t_pdl) {
f26bf547 1039 sr.setLuaEngine(t_pdl);
3457a2a0 1040 }
9eec8c98 1041 if(g_dnssecmode != DNSSECMode::Off) {
30ee601a 1042 sr.setDoDNSSEC(true);
9eec8c98
PL
1043
1044 // Does the requestor want DNSSEC records?
d6c335ab 1045 if(edo.d_extFlags & EDNSOpts::DNSSECOK) {
9eec8c98
PL
1046 DNSSECOK=true;
1047 g_stats.dnssecQueries++;
1048 }
1049 } else {
1050 // Ignore the client-set CD flag
1051 pw.getHeader()->cd=0;
5b9853c9 1052 }
0c43f455
RG
1053 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
1054
4898a348 1055#ifdef HAVE_PROTOBUF
30ee601a 1056 sr.setInitialRequestId(dc->d_uuid);
63341e8d 1057 sr.setOutgoingProtobufServer(t_outgoingProtobufServer);
4898a348 1058#endif
0c43f455 1059
2fe3354d 1060 sr.setQuerySource(dc->d_remote, g_useIncomingECS && !dc->d_ednssubnet.source.empty() ? boost::optional<const EDNSSubnetOpts&>(dc->d_ednssubnet) : boost::none);
57769f13 1061
904d3219 1062 bool tracedQuery=false; // we could consider letting Lua know about this too
9fc36e90 1063 bool shouldNotValidate = false;
904d3219 1064
ef3b6cd7
RG
1065 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
1066 int res = RCode::NoError;
1f1ca368 1067 DNSFilterEngine::Policy appliedPolicy;
39ec5d29 1068 DNSRecord spoofed;
f1c7929a 1069 RecursorLua4::DNSQuestion dq(dc->d_source, dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ, logResponse);
d6c335ab 1070 dq.ednsFlags = &edo.d_extFlags;
5164bac3 1071 dq.ednsOptions = &ednsOpts;
6e505c5e
RG
1072 dq.tag = dc->d_tag;
1073 dq.discardedPolicies = &sr.d_discardedPolicies;
1074 dq.policyTags = &dc->d_policyTags;
1075 dq.appliedPolicy = &appliedPolicy;
1076 dq.currentRecords = &ret;
1077 dq.dh = &dc->d_mdp.d_header;
05c74122 1078 dq.data = dc->d_data;
67e31ebe
RG
1079#ifdef HAVE_PROTOBUF
1080 dq.requestorId = dc->d_requestorId;
590388d2 1081 dq.deviceId = dc->d_deviceId;
67e31ebe 1082#endif
ba21fcfe 1083
6cf96227
PL
1084 if(ednsExtRCode != 0) {
1085 goto sendit;
1086 }
1087
e661a20b 1088 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
56b4d21b
PD
1089 pw.getHeader()->tc = 1;
1090 res = 0;
1091 variableAnswer = true;
e661a20b
PD
1092 goto sendit;
1093 }
1094
f26bf547 1095 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
77499b05
BH
1096 sr.setLogMode(SyncRes::Store);
1097 tracedQuery=true;
1098 }
3ddb9247 1099
8f7473d7 1100
976ec823 1101 if(!g_quiet || tracedQuery) {
e6a9dde5 1102 g_log<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
976ec823 1103 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
b40562da 1104 if(!dc->d_ednssubnet.source.empty()) {
e6a9dde5 1105 g_log<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
6e986f5e 1106 }
e6a9dde5 1107 g_log<<endl;
976ec823 1108 }
c75a6a9e 1109
fededf47 1110 sr.setId(MT->getTid());
67828389 1111 if(!dc->d_mdp.d_header.rd)
c836dc19
BH
1112 sr.setCacheOnly();
1113
f26bf547
RG
1114 if (t_pdl) {
1115 t_pdl->prerpz(dq, res);
0a273054
RG
1116 }
1117
db486de5 1118 // Check if the query has a policy attached to it
0a273054 1119 if (wantsRPZ) {
5cc8371b 1120 appliedPolicy = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_source, sr.d_discardedPolicies);
0a273054 1121 }
644dd1da 1122
54be222b 1123 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
f26bf547 1124 if(!t_pdl || !t_pdl->preresolve(dq, res)) {
b8470add 1125
30ee601a 1126 sr.setWantsRPZ(wantsRPZ);
b8470add
PL
1127 if(wantsRPZ) {
1128 switch(appliedPolicy.d_kind) {
1129 case DNSFilterEngine::PolicyKind::NoAction:
1130 break;
1131 case DNSFilterEngine::PolicyKind::Drop:
1132 g_stats.policyDrops++;
7a25883a 1133 g_stats.policyResults[appliedPolicy.d_kind]++;
b8470add
PL
1134 delete dc;
1135 dc=0;
1136 return;
1137 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1138 g_stats.policyResults[appliedPolicy.d_kind]++;
1139 res=RCode::NXDomain;
1140 goto haveAnswer;
1141 case DNSFilterEngine::PolicyKind::NODATA:
1142 g_stats.policyResults[appliedPolicy.d_kind]++;
1143 res=RCode::NoError;
db486de5 1144 goto haveAnswer;
b8470add
PL
1145 case DNSFilterEngine::PolicyKind::Custom:
1146 g_stats.policyResults[appliedPolicy.d_kind]++;
1147 res=RCode::NoError;
a9e029ee 1148 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
b8470add 1149 ret.push_back(spoofed);
53508135 1150 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
b8470add
PL
1151 goto haveAnswer;
1152 case DNSFilterEngine::PolicyKind::Truncate:
1153 if(!dc->d_tcp) {
1154 g_stats.policyResults[appliedPolicy.d_kind]++;
1155 res=RCode::NoError;
1156 pw.getHeader()->tc=1;
1157 goto haveAnswer;
1158 }
1159 break;
1160 }
db486de5
PL
1161 }
1162
b8470add 1163 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
44971ca0
PD
1164 try {
1165 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
9fc36e90 1166 shouldNotValidate = sr.wasOutOfBand();
44971ca0
PD
1167 }
1168 catch(ImmediateServFailException &e) {
854d44e3 1169 if(g_logCommonErrors)
e6a9dde5 1170 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
44971ca0
PD
1171 res = RCode::ServFail;
1172 }
4485aa35 1173
1921a4c2
RG
1174 dq.validationState = sr.getValidationState();
1175
b8470add
PL
1176 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
1177 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
1178 appliedPolicy = sr.d_appliedPolicy;
1179 g_stats.policyResults[appliedPolicy.d_kind]++;
1180 switch(appliedPolicy.d_kind) {
1181 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
1182 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
1183 case DNSFilterEngine::PolicyKind::Drop:
1184 g_stats.policyDrops++;
1185 delete dc;
1186 dc=0;
1187 return;
1188 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1189 ret.clear();
1190 res=RCode::NXDomain;
1191 goto haveAnswer;
1192
1193 case DNSFilterEngine::PolicyKind::NODATA:
1194 ret.clear();
1195 res=RCode::NoError;
1196 goto haveAnswer;
1197
1198 case DNSFilterEngine::PolicyKind::Truncate:
1199 if(!dc->d_tcp) {
1200 ret.clear();
1201 res=RCode::NoError;
1202 pw.getHeader()->tc=1;
1203 goto haveAnswer;
1204 }
1205 break;
1206
1207 case DNSFilterEngine::PolicyKind::Custom:
1208 ret.clear();
1209 res=RCode::NoError;
a9e029ee 1210 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
b8470add 1211 ret.push_back(spoofed);
53508135 1212 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
b8470add
PL
1213 goto haveAnswer;
1214 }
1215 }
1216
1217 if (wantsRPZ) {
1f1ca368 1218 appliedPolicy = luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies);
b8470add 1219 }
db486de5 1220
f26bf547 1221 if(t_pdl) {
db486de5
PL
1222 if(res == RCode::NoError) {
1223 auto i=ret.cbegin();
1224 for(; i!= ret.cend(); ++i)
1225 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
1226 break;
f26bf547 1227 if(i == ret.cend() && t_pdl->nodata(dq, res))
3ca4e735
PL
1228 shouldNotValidate = true;
1229
db486de5 1230 }
f26bf547 1231 else if(res == RCode::NXDomain && t_pdl->nxdomain(dq, res))
3ca4e735 1232 shouldNotValidate = true;
db486de5 1233
f26bf547 1234 if(t_pdl->postresolve(dq, res))
3ca4e735 1235 shouldNotValidate = true;
db486de5
PL
1236 }
1237
b8470add
PL
1238 if (wantsRPZ) { //XXX This block is repeated, see above
1239 g_stats.policyResults[appliedPolicy.d_kind]++;
1240 switch(appliedPolicy.d_kind) {
1241 case DNSFilterEngine::PolicyKind::NoAction:
1242 break;
1243 case DNSFilterEngine::PolicyKind::Drop:
1244 g_stats.policyDrops++;
1245 delete dc;
1246 dc=0;
1247 return;
1248 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1249 ret.clear();
1250 res=RCode::NXDomain;
1251 goto haveAnswer;
1252
1253 case DNSFilterEngine::PolicyKind::NODATA:
1254 ret.clear();
1255 res=RCode::NoError;
1256 goto haveAnswer;
1257
1258 case DNSFilterEngine::PolicyKind::Truncate:
1259 if(!dc->d_tcp) {
1260 ret.clear();
1261 res=RCode::NoError;
1262 pw.getHeader()->tc=1;
1263 goto haveAnswer;
1264 }
1265 break;
1266
1267 case DNSFilterEngine::PolicyKind::Custom:
1268 ret.clear();
1269 res=RCode::NoError;
a9e029ee 1270 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
b8470add 1271 ret.push_back(spoofed);
53508135 1272 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
b8470add
PL
1273 goto haveAnswer;
1274 }
644dd1da 1275 }
4485aa35 1276 }
644dd1da 1277 haveAnswer:;
3e8216c8 1278 if(res == PolicyDecision::DROP) {
e9c2ad3a 1279 g_stats.policyDrops++;
ae7e77ad 1280 delete dc;
1281 dc=0;
1282 return;
3ddb9247 1283 }
9cdfab64 1284 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1dc8f4d0 1285 {
85ffbc53
PD
1286 string trace(sr.getTrace());
1287 if(!trace.empty()) {
1288 vector<string> lines;
1289 boost::split(lines, trace, boost::is_any_of("\n"));
1dc8f4d0 1290 for(const string& line : lines) {
85ffbc53 1291 if(!line.empty())
e6a9dde5 1292 g_log<<Logger::Warning<< line << endl;
85ffbc53
PD
1293 }
1294 }
1295 }
3ddb9247 1296
9cdfab64 1297 if(res == -1) {
0fe1d080
PD
1298 pw.getHeader()->rcode=RCode::ServFail;
1299 // no commit here, because no record
1300 g_stats.servFails++;
1301 }
288f4aa9 1302 else {
ea634573 1303 pw.getHeader()->rcode=res;
92011b8f 1304
f3fe4ae6 1305 // Does the validation mode or query demand validation?
0c43f455 1306 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
b25cae9a 1307 try {
f3fe4ae6 1308 if(sr.doLog()) {
e6a9dde5 1309 g_log<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<endl;
2e921ec6 1310 }
4d2be65d
RG
1311
1312 auto state = sr.getValidationState();
1313
b25cae9a 1314 if(state == Secure) {
2e921ec6 1315 if(sr.doLog()) {
e6a9dde5 1316 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates correctly"<<endl;
2e921ec6 1317 }
b25cae9a 1318
1319 // Is the query source interested in the value of the ad-bit?
885c8881 1320 if (dc->d_mdp.d_header.ad || DNSSECOK)
b25cae9a 1321 pw.getHeader()->ad=1;
1322 }
1323 else if(state == Insecure) {
f3fe4ae6 1324 if(sr.doLog()) {
e6a9dde5 1325 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Insecure"<<endl;
12ce523e 1326 }
b25cae9a 1327
1328 pw.getHeader()->ad=0;
f3fe4ae6 1329 }
b25cae9a 1330 else if(state == Bogus) {
66f2e6ad
KM
1331 if(t_bogusremotes)
1332 t_bogusremotes->push_back(dc->d_source);
1333 if(t_bogusqueryring)
1334 t_bogusqueryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
c87e1876 1335 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
e6a9dde5 1336 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Bogus"<<endl;
b25cae9a 1337 }
1338
1339 // Does the query or validation mode sending out a SERVFAIL on validation errors?
885c8881 1340 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
b25cae9a 1341 if(sr.doLog()) {
e6a9dde5 1342 g_log<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
b25cae9a 1343 }
1344
1345 pw.getHeader()->rcode=RCode::ServFail;
1346 goto sendit;
1347 } else {
1348 if(sr.doLog()) {
e6a9dde5 1349 g_log<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
b25cae9a 1350 }
1351 }
1352 }
1353 }
1354 catch(ImmediateServFailException &e) {
1355 if(g_logCommonErrors)
e6a9dde5 1356 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
b25cae9a 1357 pw.getHeader()->rcode=RCode::ServFail;
1358 goto sendit;
f3fe4ae6 1359 }
b3f0ed10 1360 }
1361
c154c8a4 1362 if(ret.size()) {
92476c8b 1363 orderAndShuffle(ret);
5cc8371b 1364 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_source)) {
20d84f77 1365 stable_sort(ret.begin(), ret.end(), *sl);
3e61e7f7 1366 variableAnswer=true;
1367 }
8e079f3a 1368 }
0afa32d4
RG
1369
1370 bool needCommit = false;
8e079f3a 1371 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
3e80ebce
KM
1372 if( ! DNSSECOK &&
1373 ( i->d_type == QType::NSEC3 ||
1374 (
1375 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1376 (
1377 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1378 i->d_place != DNSResourceRecord::ANSWER
1379 )
1380 )
1381 )
1382 ) {
2e921ec6 1383 continue;
3e80ebce
KM
1384 }
1385
70fb28d9 1386 if (!addRecordToPacket(pw, *i, minTTL, dc->d_ttlCap, maxanswersize)) {
97c6d7e5
RG
1387 needCommit = false;
1388 break;
1389 }
1390 needCommit = true;
1391
aa7929a3 1392#ifdef HAVE_PROTOBUF
63341e8d 1393 if(t_protobufServer && (i->d_type == QType::A || i->d_type == QType::AAAA || i->d_type == QType::CNAME)) {
d362f7c1 1394 pbMessage->addRR(*i);
aa7929a3
RG
1395 }
1396#endif
ea634573 1397 }
0afa32d4 1398 if(needCommit)
8e079f3a 1399 pw.commit();
288f4aa9 1400 }
10321a98 1401 sendit:;
b3f0ed10 1402
97c6d7e5
RG
1403 if (haveEDNS) {
1404 /* we try to add the EDNS OPT RR even for truncated answers,
1405 as rfc6891 states:
1406 "The minimal response MUST be the DNS header, question section, and an
1407 OPT record. This MUST also occur when a truncated response (using
1408 the DNS header's TC bit) is returned."
1409 */
9b60fb71 1410 pw.addOpt(512, ednsExtRCode, DNSSECOK ? EDNSOpts::DNSSECOK : 0, returnedEdnsOptions);
1f691b94 1411 pw.commit();
97c6d7e5
RG
1412 }
1413
79332bff 1414 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
5cc8371b 1415 updateResponseStats(res, dc->d_source, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
aa7929a3 1416#ifdef HAVE_PROTOBUF
845cbf4c 1417 if (t_protobufServer && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && (!appliedPolicy.d_name || appliedPolicy.d_name->empty()) && dc->d_policyTags.empty())) {
d362f7c1
RG
1418 pbMessage->setBytes(packet.size());
1419 pbMessage->setResponseCode(pw.getHeader()->rcode);
0a273054 1420 if (appliedPolicy.d_name) {
d362f7c1
RG
1421 pbMessage->setAppliedPolicy(*appliedPolicy.d_name);
1422 pbMessage->setAppliedPolicyType(appliedPolicy.d_type);
0a273054 1423 }
d362f7c1
RG
1424 pbMessage->setPolicyTags(dc->d_policyTags);
1425 pbMessage->setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1426 pbMessage->setRequestorId(dq.requestorId);
1427 pbMessage->setDeviceId(dq.deviceId);
63341e8d 1428 protobufLogResponse(t_protobufServer, *pbMessage);
aa7929a3
RG
1429 }
1430#endif
ea634573 1431 if(!dc->d_tcp) {
b71b60ee 1432 struct msghdr msgh;
1433 struct iovec iov;
1434 char cbuf[256];
1435 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
2c0af54f
PD
1436 msgh.msg_control=NULL;
1437
cbc03320 1438 if(g_fromtosockets.count(dc->d_socket)) {
fbe2a2e0 1439 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
2c0af54f 1440 }
cbc03320 1441 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
e6a9dde5 1442 g_log<<Logger::Warning<<"Sending UDP reply to client "<<dc->getRemote()<<" failed with: "<<strerror(errno)<<endl;
70fb28d9 1443
3762e821 1444 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
e9f63d47 1445 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
76e2b9e3 1446 string((const char*)&*packet.begin(), packet.size()),
3ddb9247 1447 g_now.tv_sec,
76e2b9e3 1448 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
d9d3f9c1 1449 min(minTTL,SyncRes::s_packetcachettl),
88694a6a 1450 dq.validationState,
d362f7c1 1451 pbMessage);
1051f8a9 1452 }
3762e821 1453 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
feccc9fc 1454 }
9c495589
BH
1455 else {
1456 char buf[2];
ea634573
BH
1457 buf[0]=packet.size()/256;
1458 buf[1]=packet.size()%256;
feccc9fc 1459
c038218b 1460 Utility::iovec iov[2];
feccc9fc 1461
ea634573
BH
1462 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1463 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
feccc9fc 1464
dd079764 1465 int wret=Utility::writev(dc->d_socket, iov, 2);
0e9d9ce2 1466 bool hadError=true;
feccc9fc 1467
dd079764 1468 if(wret == 0)
e6a9dde5 1469 g_log<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
dd079764 1470 else if(wret < 0 )
e6a9dde5 1471 g_log<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
dd079764 1472 else if((unsigned int)wret != 2 + packet.size())
e6a9dde5 1473 g_log<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
0e9d9ce2 1474 else
18af64a8 1475 hadError=false;
3ddb9247 1476
09e6702a 1477 // update tcp connection status, either by closing or moving to 'BYTE0'
3ddb9247 1478
09e6702a 1479 if(hadError) {
18af64a8 1480 // no need to remove us from FDM, we weren't there
c36bc97a 1481 dc->d_socket = -1;
09e6702a 1482 }
a6ae6414 1483 else {
fde296a3
RG
1484 dc->d_tcpConnection->queriesCount++;
1485 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1486 dc->d_socket = -1;
1487 }
1488 else {
1489 dc->d_tcpConnection->state=TCPConnection::BYTE0;
1490 Utility::gettimeofday(&g_now, 0); // needs to be updated
1491 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection);
1492 t_fdm->setReadTTD(dc->d_socket, g_now, g_tcpTimeout);
1493 }
0e9d9ce2 1494 }
9c495589 1495 }
2c9119cd 1496 float spent=makeFloat(sr.getNow()-dc->d_now);
1d5b3ce6 1497 if(!g_quiet) {
e6a9dde5
PL
1498 g_log<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1499 g_log<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
2c9119cd 1500 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
1501 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
1502
1503 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
e6a9dde5 1504 g_log<< ", dnssec="<<vStates[sr.getValidationState()];
2c9119cd 1505 }
1506
e6a9dde5 1507 g_log<<endl;
2c9119cd 1508
c75a6a9e 1509 }
b23b8614 1510
f7b8cffa
RG
1511 if (sr.d_outqueries || sr.d_authzonequeries) {
1512 t_RC->cacheMisses++;
af1377b7
NC
1513#ifdef NOD_ENABLED
1514 if (g_nodEnabled) {
1515 nodCheckNewDomain(dc->d_mdp.d_qname);
1516 }
1517#endif /* NOD_ENABLED */
f7b8cffa
RG
1518 }
1519 else {
1520 t_RC->cacheHits++;
af1377b7
NC
1521#ifdef NOD_ENABLED
1522 if (g_nodEnabled) {
1523 nodAddDomain(dc->d_mdp.d_qname);
1524 }
1525#endif /* NOD_ENABLED */
f7b8cffa 1526 }
2c9119cd 1527
fe213470
BH
1528 if(spent < 0.001)
1529 g_stats.answers0_1++;
1530 else if(spent < 0.010)
1531 g_stats.answers1_10++;
1532 else if(spent < 0.1)
1533 g_stats.answers10_100++;
1534 else if(spent < 1.0)
1535 g_stats.answers100_1000++;
1536 else
1537 g_stats.answersSlow++;
1538
574af7ea 1539 uint64_t newLat=(uint64_t)(spent*1000000);
b841314c 1540 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
08f3f638 1541 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
0a6b1027 1542 // no worries, we do this for packet cache hits elsewhere
19178da9 1543
1544 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
1545 if(ourtime < 1)
1546 g_stats.ourtime0_1++;
1547 else if(ourtime < 2)
1548 g_stats.ourtime1_2++;
1549 else if(ourtime < 4)
1550 g_stats.ourtime2_4++;
1551 else if(ourtime < 8)
1552 g_stats.ourtime4_8++;
1553 else if(ourtime < 16)
1554 g_stats.ourtime8_16++;
1555 else if(ourtime < 32)
1556 g_stats.ourtime16_32++;
1557 else {
1558 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1559 g_stats.ourtimeSlow++;
1560 }
042da1a1 1561 if(ourtime >= 0.0) {
1562 newLat=ourtime*1000; // usec
1563 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
1564 }
c6d04bdc 1565 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
ea634573 1566 delete dc;
c36bc97a 1567 dc=0;
288f4aa9 1568 }
3f81d239 1569 catch(PDNSException &ae) {
e6a9dde5 1570 g_log<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
c36bc97a 1571 delete dc;
288f4aa9 1572 }
7b1469bb 1573 catch(MOADNSException& e) {
e6a9dde5 1574 g_log<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<e.what()<<endl;
c36bc97a 1575 delete dc;
7b1469bb 1576 }
fdbf35ac 1577 catch(std::exception& e) {
e6a9dde5 1578 g_log<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
068c7634
PD
1579
1580 // Luawrapper nests the exception from Lua, so we unnest it here
1581 try {
1582 std::rethrow_if_nested(e);
2010ac95 1583 } catch(const std::exception& ne) {
e6a9dde5 1584 g_log<<". Extra info: "<<ne.what();
068c7634
PD
1585 } catch(...) {}
1586
e6a9dde5 1587 g_log<<endl;
c36bc97a 1588 delete dc;
c154c8a4 1589 }
288f4aa9 1590 catch(...) {
e6a9dde5 1591 g_log<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
288f4aa9 1592 }
3ddb9247 1593
ec6eacbc 1594 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
288f4aa9
BH
1595}
1596
d187038c 1597static void makeControlChannelSocket(int processNum=-1)
1d5b3ce6 1598{
2d733c0f 1599 string sockname=::arg()["socket-dir"]+"/"+s_programname;
677e2a46 1600 if(processNum >= 0)
335da0ba 1601 sockname += "."+std::to_string(processNum);
677e2a46 1602 sockname+=".controlsocket";
41f7a068 1603 s_rcc.listen(sockname);
3ddb9247 1604
387de317
BH
1605 int sockowner = -1;
1606 int sockgroup = -1;
1607
1608 if (!::arg().isEmpty("socket-group"))
1609 sockgroup=::arg().asGid("socket-group");
1610 if (!::arg().isEmpty("socket-owner"))
1611 sockowner=::arg().asUid("socket-owner");
3ddb9247 1612
f838ad8d
BH
1613 if (sockgroup > -1 || sockowner > -1) {
1614 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1615 unixDie("Failed to chown control socket");
1616 }
1617 }
387de317
BH
1618
1619 // do mode change if socket-mode is given
1620 if(!::arg().isEmpty("socket-mode")) {
1621 mode_t sockmode=::arg().asMode("socket-mode");
34c513f9
RG
1622 if(chmod(sockname.c_str(), sockmode) < 0) {
1623 unixDie("Failed to chmod control socket");
1624 }
387de317 1625 }
1d5b3ce6
BH
1626}
1627
5cc8371b 1628static void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass,
29e6303a 1629 bool& foundECS, EDNSSubnetOpts* ednssubnet, EDNSOptionViewMap* options,
5cc8371b 1630 bool& foundXPF, ComboAddress* xpfSource, ComboAddress* xpfDest)
02b47f43 1631{
59cb4a79 1632 const bool lookForXPF = xpfSource != nullptr && g_xpfRRCode != 0;
5cc8371b
RG
1633 const bool lookForECS = ednssubnet != nullptr;
1634 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(question.c_str());
02b47f43
RG
1635 size_t questionLen = question.length();
1636 unsigned int consumed=0;
1637 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1638
1639 size_t pos= sizeof(dnsheader)+consumed+4;
5cc8371b
RG
1640 const size_t headerSize = /* root */ 1 + sizeof(dnsrecordheader);
1641 const uint16_t arcount = ntohs(dh->arcount);
1642
1643 for (uint16_t arpos = 0; arpos < arcount && questionLen > (pos + headerSize) && ((lookForECS && !foundECS) || (lookForXPF && !foundXPF)); arpos++) {
1644 if (question.at(pos) != 0) {
1645 /* not an OPT or a XPF, bye. */
1646 return;
1647 }
1648
1649 pos += 1;
1650 const dnsrecordheader* drh = reinterpret_cast<const dnsrecordheader*>(&question.at(pos));
1651 pos += sizeof(dnsrecordheader);
1652
1653 if (pos >= questionLen) {
1654 return;
1655 }
1656
02b47f43 1657 /* OPT root label (1) followed by type (2) */
5cc8371b 1658 if(lookForECS && ntohs(drh->d_type) == QType::OPT) {
00b8cadc
RG
1659 if (!options) {
1660 char* ecsStart = nullptr;
1661 size_t ecsLen = 0;
5cc8371b
RG
1662 /* we need to pass the record len */
1663 int res = getEDNSOption(const_cast<char*>(reinterpret_cast<const char*>(&question.at(pos - sizeof(drh->d_clen)))), questionLen - pos + sizeof(drh->d_clen), EDNSOptionCode::ECS, &ecsStart, &ecsLen);
00b8cadc
RG
1664 if (res == 0 && ecsLen > 4) {
1665 EDNSSubnetOpts eso;
1666 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1667 *ednssubnet=eso;
5cc8371b 1668 foundECS = true;
00b8cadc
RG
1669 }
1670 }
1671 }
1672 else {
5cc8371b
RG
1673 /* we need to pass the record len */
1674 int res = getEDNSOptions(reinterpret_cast<const char*>(&question.at(pos -sizeof(drh->d_clen))), questionLen - pos + (sizeof(drh->d_clen)), *options);
00b8cadc
RG
1675 if (res == 0) {
1676 const auto& it = options->find(EDNSOptionCode::ECS);
29e6303a 1677 if (it != options->end() && !it->second.values.empty() && it->second.values.at(0).content != nullptr && it->second.values.at(0).size > 0) {
00b8cadc 1678 EDNSSubnetOpts eso;
29e6303a 1679 if(getEDNSSubnetOptsFromString(it->second.values.at(0).content, it->second.values.at(0).size, &eso)) {
00b8cadc 1680 *ednssubnet=eso;
5cc8371b 1681 foundECS = true;
00b8cadc
RG
1682 }
1683 }
02b47f43
RG
1684 }
1685 }
1686 }
59cb4a79 1687 else if (lookForXPF && ntohs(drh->d_type) == g_xpfRRCode && ntohs(drh->d_class) == QClass::IN && drh->d_ttl == 0) {
5cc8371b
RG
1688 if ((questionLen - pos) < ntohs(drh->d_clen)) {
1689 return;
1690 }
1691
1692 foundXPF = parseXPFPayload(reinterpret_cast<const char*>(&question.at(pos)), ntohs(drh->d_clen), *xpfSource, xpfDest);
1693 }
1694
1695 pos += ntohs(drh->d_clen);
02b47f43
RG
1696 }
1697}
1698
d187038c 1699static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1700{
cd989c87 1701 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
c038218b 1702
879b3f70 1703 if(conn->state==TCPConnection::BYTE0) {
2749c3fe 1704 ssize_t bytes=recv(conn->getFD(), &conn->data[0], 2, 0);
09e6702a 1705 if(bytes==1)
667f7e60 1706 conn->state=TCPConnection::BYTE1;
3ddb9247 1707 if(bytes==2) {
a0aa4f64 1708 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2749c3fe 1709 conn->data.resize(conn->qlen);
667f7e60
BH
1710 conn->bytesread=0;
1711 conn->state=TCPConnection::GETQUESTION;
09e6702a
BH
1712 }
1713 if(!bytes || bytes < 0) {
bb4bdbaf 1714 t_fdm->removeReadFD(fd);
09e6702a
BH
1715 return;
1716 }
1717 }
667f7e60 1718 else if(conn->state==TCPConnection::BYTE1) {
2749c3fe 1719 ssize_t bytes=recv(conn->getFD(), &conn->data[1], 1, 0);
09e6702a 1720 if(bytes==1) {
667f7e60 1721 conn->state=TCPConnection::GETQUESTION;
a0aa4f64 1722 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2749c3fe 1723 conn->data.resize(conn->qlen);
667f7e60 1724 conn->bytesread=0;
09e6702a
BH
1725 }
1726 if(!bytes || bytes < 0) {
1727 if(g_logCommonErrors)
e6a9dde5 1728 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected after first byte"<<endl;
bb4bdbaf 1729 t_fdm->removeReadFD(fd);
09e6702a
BH
1730 return;
1731 }
1732 }
667f7e60 1733 else if(conn->state==TCPConnection::GETQUESTION) {
2749c3fe 1734 ssize_t bytes=recv(conn->getFD(), &conn->data[conn->bytesread], conn->qlen - conn->bytesread, 0);
f9d67b41 1735 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
c0f9be19
RG
1736 if(g_logCommonErrors) {
1737 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected while reading question body"<<endl;
1738 }
bb4bdbaf 1739 t_fdm->removeReadFD(fd);
09e6702a
BH
1740 return;
1741 }
b841314c 1742 conn->bytesread+=(uint16_t)bytes;
667f7e60 1743 if(conn->bytesread==conn->qlen) {
bb4bdbaf 1744 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
879b3f70 1745
f26bf547 1746 DNSComboWriter* dc=nullptr;
09e6702a 1747 try {
2749c3fe 1748 dc=new DNSComboWriter(conn->data, g_now);
09e6702a
BH
1749 }
1750 catch(MOADNSException &mde) {
3ddb9247 1751 g_stats.clientParseError++;
4957a608 1752 if(g_logCommonErrors)
e6a9dde5 1753 g_log<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4957a608 1754 return;
09e6702a 1755 }
cd989c87
BH
1756 dc->d_tcpConnection = conn; // carry the torch
1757 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
09e6702a 1758 dc->d_tcp=true;
5cc8371b
RG
1759 dc->setRemote(conn->d_remote);
1760 dc->setSource(conn->d_remote);
a6147cd2 1761 ComboAddress dest;
d38e2ba9 1762 dest.reset();
a6147cd2 1763 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1764 socklen_t len = dest.getSocklen();
1765 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1766 dc->setLocal(dest);
5cc8371b 1767 dc->setDestination(dest);
33dcceba
RG
1768 DNSName qname;
1769 uint16_t qtype=0;
1770 uint16_t qclass=0;
1771 bool needECS = false;
5cc8371b 1772 bool needXPF = g_XPFAcl.match(conn->d_remote);
67e31ebe 1773 string requestorId;
590388d2 1774 string deviceId;
16bbc6e3 1775 bool logQuery = false;
aa7929a3 1776#ifdef HAVE_PROTOBUF
02b47f43 1777 auto luaconfsLocal = g_luaconfs.getLocal();
63341e8d 1778 if (checkProtobufExport(luaconfsLocal)) {
33dcceba
RG
1779 needECS = true;
1780 }
16bbc6e3 1781 logQuery = t_protobufServer && luaconfsLocal->protobufExportConfig.logQueries;
33dcceba
RG
1782#endif
1783
70fb28d9 1784 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag_ffi || t_pdl->d_gettag))) {
33dcceba
RG
1785
1786 try {
29e6303a 1787 EDNSOptionViewMap ednsOptions;
5cc8371b 1788 bool xpfFound = false;
b40562da 1789 dc->d_ecsParsed = true;
5cc8371b 1790 dc->d_ecsFound = false;
2749c3fe 1791 getQNameAndSubnet(conn->data, &qname, &qtype, &qclass,
5cc8371b
RG
1792 dc->d_ecsFound, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
1793 xpfFound, needXPF ? &dc->d_source : nullptr, needXPF ? &dc->d_destination : nullptr);
02b47f43 1794
70fb28d9 1795 if(t_pdl) {
33dcceba 1796 try {
70fb28d9 1797 if (t_pdl->d_gettag_ffi) {
f1c7929a 1798 dc->d_tag = t_pdl->gettag_ffi(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, dc->d_ttlCap, dc->d_variable, logQuery);
70fb28d9
RG
1799 }
1800 else if (t_pdl->d_gettag) {
1801 dc->d_tag = t_pdl->gettag(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId);
1802 }
33dcceba 1803 }
70fb28d9 1804 catch(const std::exception& e) {
33dcceba 1805 if(g_logCommonErrors)
e6a9dde5 1806 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
33dcceba
RG
1807 }
1808 }
1809 }
70fb28d9 1810 catch(const std::exception& e)
33dcceba
RG
1811 {
1812 if(g_logCommonErrors)
e6a9dde5 1813 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
33dcceba
RG
1814 }
1815 }
f52177c3
RG
1816
1817 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(&conn->data[0]);
1818
33dcceba 1819#ifdef HAVE_PROTOBUF
63341e8d 1820 if(t_protobufServer || t_outgoingProtobufServer) {
67e31ebe 1821 dc->d_requestorId = requestorId;
590388d2 1822 dc->d_deviceId = deviceId;
02b47f43 1823 dc->d_uuid = (*t_uuidGenerator)();
4898a348 1824 }
02b47f43 1825
63341e8d 1826 if(t_protobufServer) {
02b47f43 1827 try {
02b47f43 1828
845cbf4c 1829 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && dc->d_policyTags.empty())) {
63341e8d 1830 protobufLogQuery(t_protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, dc->d_source, dc->d_destination, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId);
b790ef3d 1831 }
02b47f43
RG
1832 }
1833 catch(std::exception& e) {
1834 if(g_logCommonErrors)
e6a9dde5 1835 g_log<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
02b47f43
RG
1836 }
1837 }
aa7929a3 1838#endif
879b3f70 1839 if(dc->d_mdp.d_header.qr) {
048f5db6 1840 g_stats.ignoredCount++;
c0f9be19
RG
1841 if(g_logCommonErrors) {
1842 g_log<<Logger::Error<<"Ignoring answer from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
1843 }
cf14c141 1844 delete dc;
4957a608 1845 return;
879b3f70 1846 }
3abcdab2 1847 if(dc->d_mdp.d_header.opcode) {
048f5db6 1848 g_stats.ignoredCount++;
c0f9be19
RG
1849 if(g_logCommonErrors) {
1850 g_log<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
1851 }
1852 delete dc;
1853 return;
1854 }
1855 else if (dh->qdcount == 0) {
1856 g_stats.emptyQueriesCount++;
1857 if(g_logCommonErrors) {
1858 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<< dc->getRemote() <<" on server socket!"<<endl;
1859 }
cf14c141 1860 delete dc;
3abcdab2
PD
1861 return;
1862 }
09e6702a 1863 else {
4957a608
BH
1864 ++g_stats.qcounter;
1865 ++g_stats.tcpqcounter;
50a5ef72 1866 MT->makeThread(startDoResolve, dc); // deletes dc, will set state to BYTE0 again
4957a608 1867 return;
09e6702a
BH
1868 }
1869 }
1870 }
1871}
1872
6dcd28c3 1873//! Handle new incoming TCP connection
d187038c 1874static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
09e6702a 1875{
37d3f960 1876 ComboAddress addr;
09e6702a 1877 socklen_t addrlen=sizeof(addr);
a683e8bd 1878 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
b841314c 1879 if(newsock>=0) {
85c32340
BH
1880 if(MT->numProcesses() > g_maxMThreads) {
1881 g_stats.overCapacityDrops++;
a7b68ae7
RG
1882 try {
1883 closesocket(newsock);
1884 }
1885 catch(const PDNSException& e) {
e6a9dde5 1886 g_log<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
a7b68ae7 1887 }
85c32340
BH
1888 return;
1889 }
1890
92011b8f 1891 if(t_remotes)
1892 t_remotes->push_back(addr);
49a699c4 1893 if(t_allowFrom && !t_allowFrom->match(&addr)) {
3ddb9247 1894 if(!g_quiet)
e6a9dde5 1895 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2914b022 1896
09e6702a 1897 g_stats.unauthorizedTCP++;
a7b68ae7
RG
1898 try {
1899 closesocket(newsock);
1900 }
1901 catch(const PDNSException& e) {
e6a9dde5 1902 g_log<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
a7b68ae7 1903 }
09e6702a
BH
1904 return;
1905 }
bd0289fc 1906 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
09e6702a 1907 g_stats.tcpClientOverflow++;
a7b68ae7
RG
1908 try {
1909 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
1910 }
1911 catch(const PDNSException& e) {
e6a9dde5 1912 g_log<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
a7b68ae7 1913 }
09e6702a
BH
1914 return;
1915 }
3ddb9247 1916
3897b9e1 1917 setNonBlocking(newsock);
f26bf547 1918 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
cd989c87 1919 tc->state=TCPConnection::BYTE0;
3ddb9247 1920
cd989c87 1921 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc);
c038218b 1922
0bff046b 1923 struct timeval now;
c038218b 1924 Utility::gettimeofday(&now, 0);
cd989c87 1925 t_fdm->setReadTTD(tc->getFD(), now, g_tcpTimeout);
09e6702a
BH
1926 }
1927}
3ddb9247 1928
d187038c 1929static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
1bc3c142 1930{
183eb877 1931 gettimeofday(&g_now, 0);
b71b60ee 1932 struct timeval diff = g_now - tv;
1933 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
183eb877 1934
22cf1fda 1935 if(tv.tv_sec && delta > 1000.0) {
b71b60ee 1936 g_stats.tooOldDrops++;
1937 return 0;
1938 }
1939
1bc3c142 1940 ++g_stats.qcounter;
d7f10541
BH
1941 if(fromaddr.sin4.sin_family==AF_INET6)
1942 g_stats.ipv6qcounter++;
1bc3c142
BH
1943
1944 string response;
93f0da94 1945 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
49a3500d 1946 unsigned int ctag=0;
f57486f1 1947 uint32_t qhash = 0;
12aff2e5 1948 bool needECS = false;
5cc8371b 1949 bool needXPF = g_XPFAcl.match(fromaddr);
02b47f43 1950 std::vector<std::string> policyTags;
5fd2577f 1951 LuaContext::LuaObject data;
5cc8371b
RG
1952 ComboAddress source = fromaddr;
1953 ComboAddress destination = destaddr;
67e31ebe 1954 string requestorId;
590388d2 1955 string deviceId;
16bbc6e3 1956 bool logQuery = false;
12aff2e5 1957#ifdef HAVE_PROTOBUF
02b47f43 1958 boost::uuids::uuid uniqueId;
02b47f43 1959 auto luaconfsLocal = g_luaconfs.getLocal();
63341e8d 1960 if (checkProtobufExport(luaconfsLocal)) {
4898a348 1961 uniqueId = (*t_uuidGenerator)();
02b47f43 1962 needECS = true;
63341e8d 1963 } else if (checkOutgoingProtobufExport(luaconfsLocal)) {
02b47f43
RG
1964 uniqueId = (*t_uuidGenerator)();
1965 }
16bbc6e3 1966 logQuery = t_protobufServer && luaconfsLocal->protobufExportConfig.logQueries;
f1c7929a 1967 bool logResponse = t_protobufServer && luaconfsLocal->protobufExportConfig.logResponses;
12aff2e5 1968#endif
b40562da
RG
1969 EDNSSubnetOpts ednssubnet;
1970 bool ecsFound = false;
1971 bool ecsParsed = false;
70fb28d9
RG
1972 uint32_t ttlCap = std::numeric_limits<uint32_t>::max();
1973 bool variable = false;
1bc3c142 1974 try {
02b47f43
RG
1975 DNSName qname;
1976 uint16_t qtype=0;
1977 uint16_t qclass=0;
1bc3c142 1978 uint32_t age;
c15ff3df 1979 bool qnameParsed=false;
8f7473d7 1980#ifdef MALLOC_TRACE
1981 /*
1982 static uint64_t last=0;
1983 if(!last)
1984 g_mtracer->clearAllocators();
1985 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
1986 last=g_mtracer->getAllocs();
1987 cout<<g_mtracer->topAllocatorsString()<<endl;
1988 g_mtracer->clearAllocators();
1989 */
1990#endif
55a1378f 1991
70fb28d9 1992 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag || t_pdl->d_gettag_ffi))) {
b2eacd67 1993 try {
29e6303a 1994 EDNSOptionViewMap ednsOptions;
5cc8371b
RG
1995 bool xpfFound = false;
1996
1997 ecsFound = false;
1998
1999 getQNameAndSubnet(question, &qname, &qtype, &qclass,
2000 ecsFound, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2001 xpfFound, needXPF ? &source : nullptr, needXPF ? &destination : nullptr);
2002
c15ff3df
RG
2003 qnameParsed = true;
2004 ecsParsed = true;
12aff2e5 2005
70fb28d9 2006 if(t_pdl) {
12aff2e5 2007 try {
70fb28d9 2008 if (t_pdl->d_gettag_ffi) {
f1c7929a 2009 ctag = t_pdl->gettag_ffi(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, ttlCap, variable, logQuery);
70fb28d9
RG
2010 }
2011 else if (t_pdl->d_gettag) {
2012 ctag = t_pdl->gettag(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId);
2013 }
12aff2e5 2014 }
70fb28d9 2015 catch(const std::exception& e) {
12aff2e5 2016 if(g_logCommonErrors)
e6a9dde5 2017 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 2018 }
8ea8c302 2019 }
b2eacd67 2020 }
70fb28d9 2021 catch(const std::exception& e)
b2eacd67 2022 {
2023 if(g_logCommonErrors)
e6a9dde5 2024 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 2025 }
12ce523e 2026 }
3ddb9247 2027
02b47f43 2028 bool cacheHit = false;
1fbc6dc5 2029 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
02b47f43 2030#ifdef HAVE_PROTOBUF
63341e8d 2031 if(t_protobufServer) {
d362f7c1 2032 pbMessage = RecProtoBufMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
845cbf4c 2033 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && policyTags.empty())) {
63341e8d 2034 protobufLogQuery(t_protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, source, destination, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId);
b790ef3d 2035 }
d9d3f9c1
RG
2036 }
2037#endif /* HAVE_PROTOBUF */
02b47f43 2038
70fb28d9
RG
2039 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
2040 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
2041 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
8467ec26 2042 vState valState;
c15ff3df 2043 if (qnameParsed) {
8467ec26 2044 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &valState, &qhash, pbMessage ? &(*pbMessage) : nullptr));
c15ff3df
RG
2045 }
2046 else {
8467ec26 2047 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, &qtype, &qclass, g_now.tv_sec, &response, &age, &valState, &qhash, pbMessage ? &(*pbMessage) : nullptr));
c15ff3df
RG
2048 }
2049
d9d3f9c1 2050 if (cacheHit) {
8467ec26
KM
2051 if(valState == Bogus) {
2052 if(t_bogusremotes)
2053 t_bogusremotes->push_back(source);
2054 if(t_bogusqueryring)
2055 t_bogusqueryring->push_back(make_pair(qname, qtype));
2056 }
2057
d9d3f9c1 2058#ifdef HAVE_PROTOBUF
845cbf4c 2059 if(t_protobufServer && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && pbMessage->getAppliedPolicy().empty() && pbMessage->getPolicyTags().empty())) {
5cc8371b 2060 Netmask requestorNM(source, source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
e1c8a4bb 2061 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
d362f7c1
RG
2062 pbMessage->update(uniqueId, &requestor, &destination, false, dh->id);
2063 pbMessage->setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
2064 pbMessage->setQueryTime(g_now.tv_sec, g_now.tv_usec);
2065 pbMessage->setRequestorId(requestorId);
2066 pbMessage->setDeviceId(deviceId);
63341e8d 2067 protobufLogResponse(t_protobufServer, *pbMessage);
02b47f43 2068 }
d9d3f9c1 2069#endif /* HAVE_PROTOBUF */
49a3500d 2070 if(!g_quiet)
e6a9dde5 2071 g_log<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<endl;
8f7473d7 2072
1bc3c142
BH
2073 g_stats.packetCacheHits++;
2074 SyncRes::s_queries++;
2075 ageDNSPacket(response, age);
b71b60ee 2076 struct msghdr msgh;
2077 struct iovec iov;
2078 char cbuf[256];
2079 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
2c0af54f
PD
2080 msgh.msg_control=NULL;
2081
cbc03320 2082 if(g_fromtosockets.count(fd)) {
fbe2a2e0 2083 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
b71b60ee 2084 }
cbc03320 2085 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
e6a9dde5 2086 g_log<<Logger::Warning<<"Sending UDP reply to client "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" failed with: "<<strerror(errno)<<endl;
b71b60ee 2087
97bee66d 2088 if(response.length() >= sizeof(struct dnsheader)) {
dd079764
RG
2089 struct dnsheader tmpdh;
2090 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
5cc8371b 2091 updateResponseStats(tmpdh.rcode, source, response.length(), 0, 0);
97bee66d 2092 }
08f3f638 2093 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
19178da9 2094 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
1bc3c142
BH
2095 return 0;
2096 }
3ddb9247 2097 }
1bc3c142 2098 catch(std::exception& e) {
e6a9dde5 2099 g_log<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1bc3c142
BH
2100 return 0;
2101 }
3ddb9247 2102
f26bf547 2103 if(t_pdl) {
5cc8371b 2104 if(t_pdl->ipfilter(source, destination, *dh)) {
4ea94941 2105 if(!g_quiet)
e6a9dde5 2106 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" based on policy"<<endl;
4ea94941 2107 g_stats.policyDrops++;
2108 return 0;
2109 }
2110 }
2111
1bc3c142 2112 if(MT->numProcesses() > g_maxMThreads) {
461df9d2 2113 if(!g_quiet)
e6a9dde5 2114 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<", over capacity"<<endl;
461df9d2 2115
1bc3c142
BH
2116 g_stats.overCapacityDrops++;
2117 return 0;
2118 }
3ddb9247 2119
5164bac3 2120 DNSComboWriter* dc = new DNSComboWriter(question, g_now, std::move(policyTags), std::move(data));
1bc3c142 2121 dc->setSocket(fd);
49a3500d 2122 dc->d_tag=ctag;
e9f63d47 2123 dc->d_qhash=qhash;
5cc8371b
RG
2124 dc->setRemote(fromaddr);
2125 dc->setSource(source);
b71b60ee 2126 dc->setLocal(destaddr);
5cc8371b 2127 dc->setDestination(destination);
1bc3c142 2128 dc->d_tcp=false;
b40562da
RG
2129 dc->d_ecsFound = ecsFound;
2130 dc->d_ecsParsed = ecsParsed;
2131 dc->d_ednssubnet = ednssubnet;
70fb28d9
RG
2132 dc->d_ttlCap = ttlCap;
2133 dc->d_variable = variable;
aa7929a3 2134#ifdef HAVE_PROTOBUF
63341e8d 2135 if (t_protobufServer || t_outgoingProtobufServer) {
5164bac3 2136 dc->d_uuid = std::move(uniqueId);
d9d3f9c1 2137 }
67e31ebe 2138 dc->d_requestorId = requestorId;
590388d2 2139 dc->d_deviceId = deviceId;
aa7929a3
RG
2140#endif
2141
1bc3c142
BH
2142 MT->makeThread(startDoResolve, (void*) dc); // deletes dc
2143 return 0;
3ddb9247
PD
2144}
2145
b71b60ee 2146
d187038c 2147static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
5db529f8 2148{
a683e8bd 2149 ssize_t len;
12c2f2b9 2150 static const size_t maxIncomingQuerySize = 512;
04896b99 2151 static thread_local std::string data;
5db529f8 2152 ComboAddress fromaddr;
b71b60ee 2153 struct msghdr msgh;
2154 struct iovec iov;
2155 char cbuf[256];
390f1dab 2156 bool firstQuery = true;
b71b60ee 2157
c0a00acd
RG
2158 for(size_t queriesCounter = 0; queriesCounter < s_maxUDPQueriesPerRound; queriesCounter++) {
2159 data.resize(maxIncomingQuerySize);
2160 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
2161 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), &data[0], data.size(), &fromaddr);
b71b60ee 2162
c0a00acd 2163 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
390f1dab 2164
c0a00acd 2165 firstQuery = false;
390f1dab 2166
c0a00acd
RG
2167 if (static_cast<size_t>(len) < sizeof(dnsheader)) {
2168 g_stats.ignoredCount++;
2169 if (!g_quiet) {
2170 g_log<<Logger::Error<<"Ignoring too-short ("<<std::to_string(len)<<") query from "<<fromaddr.toString()<<endl;
2171 }
2172 return;
04896b99 2173 }
04896b99 2174
c0a00acd
RG
2175 if (msgh.msg_flags & MSG_TRUNC) {
2176 g_stats.truncatedDrops++;
2177 if (!g_quiet) {
2178 g_log<<Logger::Error<<"Ignoring truncated query from "<<fromaddr.toString()<<endl;
2179 }
2180 return;
ba892c7f 2181 }
b23b8614 2182
c0a00acd
RG
2183 if(t_remotes) {
2184 t_remotes->push_back(fromaddr);
2185 }
81859ba5 2186
c0a00acd
RG
2187 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
2188 if(!g_quiet) {
2189 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2190 }
3ddb9247 2191
c0a00acd
RG
2192 g_stats.unauthorizedUDP++;
2193 return;
5db529f8 2194 }
c0a00acd
RG
2195 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
2196 if(!fromaddr.sin4.sin_port) { // also works for IPv6
2197 if(!g_quiet) {
2198 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
2199 }
2200
2201 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
2202 return;
3abcdab2 2203 }
c0a00acd
RG
2204
2205 try {
2206 data.resize(static_cast<size_t>(len));
2207 dnsheader* dh=(dnsheader*)&data[0];
2208
2209 if(dh->qr) {
2210 g_stats.ignoredCount++;
2211 if(g_logCommonErrors) {
2212 g_log<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
2213 }
2214 }
2215 else if(dh->opcode) {
2216 g_stats.ignoredCount++;
2217 if(g_logCommonErrors) {
2218 g_log<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
2219 }
a6147cd2 2220 }
c0f9be19
RG
2221 else if (dh->qdcount == 0) {
2222 g_stats.emptyQueriesCount++;
2223 if(g_logCommonErrors) {
2224 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<<fromaddr.toString()<<" on server socket!"<<endl;
2225 }
2226 }
a6147cd2 2227 else {
c0a00acd
RG
2228 struct timeval tv={0,0};
2229 HarvestTimestamp(&msgh, &tv);
2230 ComboAddress dest;
2231 dest.reset(); // this makes sure we ignore this address if not returned by recvmsg above
2232 auto loc = rplookup(g_listenSocketsAddresses, fd);
2233 if(HarvestDestinationAddress(&msgh, &dest)) {
2234 // but.. need to get port too
2235 if(loc) {
2236 dest.sin4.sin_port = loc->sin4.sin_port;
2237 }
a6147cd2 2238 }
2239 else {
c0a00acd
RG
2240 if(loc) {
2241 dest = *loc;
2242 }
2243 else {
2244 dest.sin4.sin_family = fromaddr.sin4.sin_family;
2245 socklen_t slen = dest.getSocklen();
2246 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
2247 }
2248 }
2249
2250 if(g_weDistributeQueries) {
2251 distributeAsyncFunction(data, boost::bind(doProcessUDPQuestion, data, fromaddr, dest, tv, fd));
2252 }
2253 else {
2254 doProcessUDPQuestion(data, fromaddr, dest, tv, fd);
a6147cd2 2255 }
2256 }
c0a00acd
RG
2257 }
2258 catch(const MOADNSException& mde) {
2259 g_stats.clientParseError++;
2260 if(g_logCommonErrors) {
2261 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
2262 }
2263 }
2264 catch(const std::runtime_error& e) {
2265 g_stats.clientParseError++;
2266 if(g_logCommonErrors) {
2267 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
2268 }
5db529f8
BH
2269 }
2270 }
c0a00acd
RG
2271 else {
2272 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
2273 if(firstQuery && errno == EAGAIN) {
2274 g_stats.noPacketError++;
2275 }
390f1dab 2276
c0a00acd
RG
2277 break;
2278 }
ac0e821b 2279 }
5db529f8
BH
2280}
2281
adb6cd72 2282static void makeTCPServerSockets(deferredAdd_t& deferredAdds, std::set<int>& tcpSockets)
9c495589 2283{
37d3f960 2284 int fd;
f28307ad 2285 vector<string>locals;
2e3d8a19 2286 stringtok(locals,::arg()["local-address"]," ,");
9c495589 2287
f28307ad 2288 if(locals.empty())
3f81d239 2289 throw PDNSException("No local address specified");
3ddb9247 2290
f28307ad 2291 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
2292 ServiceTuple st;
2293 st.port=::arg().asNum("local-port");
2294 parseService(*i, st);
3ddb9247 2295
32252594
BH
2296 ComboAddress sin;
2297
d38e2ba9 2298 sin.reset();
37d3f960 2299 sin.sin4.sin_family = AF_INET;
32252594 2300 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 2301 sin.sin6.sin6_family = AF_INET6;
f71bc087 2302 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 2303 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
37d3f960
BH
2304 }
2305
2306 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
3ddb9247 2307 if(fd<0)
3f81d239 2308 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
f28307ad 2309
3897b9e1 2310 setCloseOnExec(fd);
a903b39c 2311
f28307ad 2312 int tmp=1;
810ff705 2313 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
e6a9dde5 2314 g_log<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
c8ddb7c2 2315 exit(1);
f28307ad 2316 }
0dfa94ab 2317 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
e6a9dde5 2318 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
0dfa94ab 2319 }
2320
c8ddb7c2 2321#ifdef TCP_DEFER_ACCEPT
38ac0821 2322 if(setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
37d3f960 2323 if(i==locals.begin())
e6a9dde5 2324 g_log<<Logger::Error<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
c8ddb7c2
BH
2325 }
2326#endif
2327
fec7dd5a
SS
2328 if( ::arg().mustDo("non-local-bind") )
2329 Utility::setBindAny(AF_INET, fd);
2330
2332f42d 2331#ifdef SO_REUSEPORT
810ff705
RG
2332 if(g_reusePort) {
2333 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
2332f42d 2334 throw PDNSException("SO_REUSEPORT: "+stringerror());
2335 }
2336#endif
2337
0735b17e
RG
2338 if (::arg().asNum("tcp-fast-open") > 0) {
2339#ifdef TCP_FASTOPEN
2340 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
2341 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
e6a9dde5 2342 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno)<<endl;
0735b17e
RG
2343 }
2344#else
e6a9dde5 2345 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
0735b17e
RG
2346#endif
2347 }
2348
32252594 2349 sin.sin4.sin_port = htons(st.port);
a683e8bd 2350 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
3ddb9247 2351 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
3f81d239 2352 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
3ddb9247 2353
3897b9e1 2354 setNonBlocking(fd);
49a699c4 2355 setSocketSendBuffer(fd, 65000);
37d3f960 2356 listen(fd, 128);
b243ca3b 2357 deferredAdds.push_back(make_pair(fd, handleNewTCPQuestion));
adb6cd72
RG
2358 tcpSockets.insert(fd);
2359
84433b79 2360 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
2361 // - fd is not that which we know here, but returned from accept()
3ddb9247 2362 if(sin.sin4.sin_family == AF_INET)
e6a9dde5 2363 g_log<<Logger::Error<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 2364 else
e6a9dde5 2365 g_log<<Logger::Error<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 2366 }
9c495589
BH
2367}
2368
b243ca3b 2369static void makeUDPServerSockets(deferredAdd_t& deferredAdds)
288f4aa9 2370{
fec7dd5a 2371 int one=1;
f28307ad 2372 vector<string>locals;
2e3d8a19 2373 stringtok(locals,::arg()["local-address"]," ,");
288f4aa9 2374
f28307ad 2375 if(locals.empty())
3f81d239 2376 throw PDNSException("No local address specified");
3ddb9247 2377
f28307ad 2378 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
2379 ServiceTuple st;
2380 st.port=::arg().asNum("local-port");
2381 parseService(*i, st);
2382
37d3f960 2383 ComboAddress sin;
996c89cc 2384
d38e2ba9 2385 sin.reset();
37d3f960 2386 sin.sin4.sin_family = AF_INET;
32252594 2387 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 2388 sin.sin6.sin6_family = AF_INET6;
f71bc087 2389 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 2390 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
37d3f960 2391 }
3ddb9247 2392
bb4bdbaf 2393 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
d3b4137e 2394 if(fd < 0) {
3f81d239 2395 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
d3b4137e 2396 }
915b0c39 2397 if (!setSocketTimestamps(fd))
e6a9dde5 2398 g_log<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
0dfa94ab 2399
b71b60ee 2400 if(IsAnyAddress(sin)) {
cbc03320 2401 if(sin.sin4.sin_family == AF_INET)
2402 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
2403 g_fromtosockets.insert(fd);
757d3179 2404#ifdef IPV6_RECVPKTINFO
cbc03320 2405 if(sin.sin4.sin_family == AF_INET6)
2406 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
2407 g_fromtosockets.insert(fd);
757d3179 2408#endif
0dfa94ab 2409 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
e6a9dde5 2410 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
0dfa94ab 2411 }
b71b60ee 2412 }
fec7dd5a
SS
2413 if( ::arg().mustDo("non-local-bind") )
2414 Utility::setBindAny(AF_INET6, fd);
2415
3897b9e1 2416 setCloseOnExec(fd);
a903b39c 2417
4e9a20e6 2418 setSocketReceiveBuffer(fd, 250000);
32252594 2419 sin.sin4.sin_port = htons(st.port);
37d3f960 2420
2332f42d 2421
2573d4a6 2422#ifdef SO_REUSEPORT
810ff705 2423 if(g_reusePort) {
2332f42d 2424 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
2425 throw PDNSException("SO_REUSEPORT: "+stringerror());
2426 }
2427#endif
a683e8bd 2428 socklen_t socklen=sin.getSocklen();
3ddb9247 2429 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
335da0ba 2430 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
3ddb9247 2431
3897b9e1 2432 setNonBlocking(fd);
c2136bf0 2433
b243ca3b 2434 deferredAdds.push_back(make_pair(fd, handleNewUDPQuestion));
40a3dd64 2435 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
3ddb9247 2436 if(sin.sin4.sin_family == AF_INET)
e6a9dde5 2437 g_log<<Logger::Error<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 2438 else
e6a9dde5 2439 g_log<<Logger::Error<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 2440 }
c836dc19 2441}
caa6eefa 2442
d187038c 2443static void daemonize(void)
c836dc19
BH
2444{
2445 if(fork())
2446 exit(0); // bye bye
3ddb9247
PD
2447
2448 setsid();
c836dc19 2449
27a5ead5 2450 int i=open("/dev/null",O_RDWR); /* open stdin */
3ddb9247 2451 if(i < 0)
e6a9dde5 2452 g_log<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
27a5ead5
BH
2453 else {
2454 dup2(i,0); /* stdin */
2455 dup2(i,1); /* stderr */
2456 dup2(i,2); /* stderr */
2457 close(i);
2458 }
288f4aa9 2459}
caa6eefa 2460
d187038c 2461static void usr1Handler(int)
c75a6a9e
BH
2462{
2463 statsWanted=true;
2464}
ae1b2e98 2465
d187038c 2466static void usr2Handler(int)
9170fbaf 2467{
f1f34cc2 2468 g_quiet= !g_quiet;
2469 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
2470 ::arg().set("quiet")=g_quiet ? "" : "no";
9170fbaf
BH
2471}
2472
d187038c 2473static void doStats(void)
c75a6a9e 2474{
16beeaa4
BH
2475 static time_t lastOutputTime;
2476 static uint64_t lastQueryCount;
d299d4f5 2477
2478 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
2479 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
3ddb9247 2480
d299d4f5 2481 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
e6a9dde5 2482 g_log<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
3427fa8a
BH
2483 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
2484 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
3ddb9247
PD
2485 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
2486
e6a9dde5 2487 g_log<<Logger::Notice<<"stats: throttle map: "
3427fa8a 2488 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
3ddb9247 2489 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
e6a9dde5
PL
2490 g_log<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2491 g_log<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
525b8a7c 2492 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
e6a9dde5 2493 g_log<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
3427fa8a 2494 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
81883dcc 2495
e6a9dde5 2496 //g_log<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
16beeaa4 2497 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
3ddb9247 2498
e6a9dde5 2499 g_log<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
16beeaa4 2500 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
3ddb9247 2501
16beeaa4
BH
2502 time_t now = time(0);
2503 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
e6a9dde5 2504 g_log<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
16beeaa4
BH
2505 }
2506 lastOutputTime = now;
2507 lastQueryCount = SyncRes::s_queries;
c75a6a9e 2508 }
3ddb9247 2509 else if(statsWanted)
e6a9dde5 2510 g_log<<Logger::Notice<<"stats: no stats yet!"<<endl;
7becf07f 2511
c75a6a9e
BH
2512 statsWanted=false;
2513}
c836dc19 2514
29f0b1ce 2515static void houseKeeping(void *)
c836dc19 2516{
cb1523d1 2517 static thread_local time_t last_rootupdate, last_prune, last_secpoll;
3337c2f7
RG
2518 static thread_local int cleanCounter=0;
2519 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
cc59bce6 2520 try {
2521 if(s_running)
2522 return;
2523 s_running=true;
3ddb9247 2524
cc59bce6 2525 struct timeval now;
2526 Utility::gettimeofday(&now, 0);
3ddb9247
PD
2527
2528 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
cc59bce6 2529 DTime dt;
2530 dt.setTimeval(now);
a6f7f5fe 2531 t_RC->doPrune(g_maxCacheEntries / g_numThreads); // this function is local to a thread, so fine anyhow
2532 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
3ddb9247 2533
a6f7f5fe 2534 SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
3ddb9247 2535
cc59bce6 2536 if(!((cleanCounter++)%40)) { // this is a full scan!
2537 time_t limit=now.tv_sec-300;
a712cb56 2538 SyncRes::pruneNSSpeeds(limit);
cc59bce6 2539 }
2540 last_prune=time(0);
d67620e4 2541 }
3ddb9247 2542
cc59bce6 2543 if(now.tv_sec - last_rootupdate > 7200) {
30ee601a 2544 int res = SyncRes::getRootNS(g_now, nullptr);
7836f7b4
PL
2545 if (!res)
2546 last_rootupdate=now.tv_sec;
cc59bce6 2547 }
3ddb9247 2548
b243ca3b 2549 if(isHandlerThread()) {
3ddb9247 2550
cc59bce6 2551 if(now.tv_sec - last_secpoll >= 3600) {
2552 try {
2553 doSecPoll(&last_secpoll);
2554 }
581d4ea3 2555 catch(std::exception& e)
2556 {
e6a9dde5 2557 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
581d4ea3 2558 }
47e9b74f 2559 catch(PDNSException& e)
2560 {
e6a9dde5 2561 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
47e9b74f 2562 }
d0992a65
CH
2563 catch(ImmediateServFailException &e)
2564 {
e6a9dde5 2565 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
d0992a65 2566 }
47e9b74f 2567 catch(...)
2568 {
e6a9dde5 2569 g_log<<Logger::Error<<"Exception while performing security poll"<<endl;
47e9b74f 2570 }
18b73338 2571 }
d67620e4 2572 }
cc59bce6 2573 s_running=false;
d67620e4 2574 }
cc59bce6 2575 catch(PDNSException& ae)
2576 {
2577 s_running=false;
e6a9dde5 2578 g_log<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
cc59bce6 2579 throw;
2580 }
779828c4 2581}
d6d5dea7 2582
d187038c 2583static void makeThreadPipes()
49a699c4 2584{
b243ca3b
RG
2585 /* thread 0 is the handler / SNMP, we start at 1 */
2586 for(unsigned int n = 1; n <= (g_numWorkerThreads + g_numDistributorThreads); ++n) {
2587 auto& threadInfos = s_threadInfos.at(n);
2588
49a699c4
BH
2589 int fd[2];
2590 if(pipe(fd) < 0)
2591 unixDie("Creating pipe for inter-thread communications");
3ddb9247 2592
b243ca3b
RG
2593 threadInfos.pipes.readToThread = fd[0];
2594 threadInfos.pipes.writeToThread = fd[1];
3ddb9247 2595
49a699c4
BH
2596 if(pipe(fd) < 0)
2597 unixDie("Creating pipe for inter-thread communications");
b243ca3b
RG
2598
2599 threadInfos.pipes.readFromThread = fd[0];
2600 threadInfos.pipes.writeFromThread = fd[1];
3ddb9247 2601
cf8cda18
RG
2602 if(pipe(fd) < 0)
2603 unixDie("Creating pipe for inter-thread communications");
d10307c5 2604
b243ca3b
RG
2605 threadInfos.pipes.readQueriesToThread = fd[0];
2606 threadInfos.pipes.writeQueriesToThread = fd[1];
2607
2608 if (!setNonBlocking(threadInfos.pipes.writeQueriesToThread)) {
d10307c5
RG
2609 unixDie("Making pipe for inter-thread communications non-blocking");
2610 }
49a699c4
BH
2611 }
2612}
2613
00c9b8c1
BH
2614struct ThreadMSG
2615{
2616 pipefunc_t func;
2617 bool wantAnswer;
2618};
2619
b4e76a18 2620void broadcastFunction(const pipefunc_t& func)
49a699c4 2621{
b243ca3b
RG
2622 /* This function might be called by the worker with t_id 0 during startup
2623 for the initialization of ACLs and domain maps. After that it should only
2624 be called by the handler. */
d77abca1 2625
b243ca3b
RG
2626 if (s_threadInfos.empty() && isHandlerThread()) {
2627 /* the handler and distributors will call themselves below, but
2628 during startup we get called while s_threadInfos has not been
2629 populated yet to update the ACL or domain maps, so we need to
2630 handle that case.
2631 */
2632 func();
2633 }
b4e76a18 2634
b243ca3b
RG
2635 unsigned int n = 0;
2636 for (const auto& threadInfo : s_threadInfos) {
49a699c4 2637 if(n++ == t_id) {
b4e76a18 2638 func(); // don't write to ourselves!
49a699c4
BH
2639 continue;
2640 }
3ddb9247 2641
00c9b8c1
BH
2642 ThreadMSG* tmsg = new ThreadMSG();
2643 tmsg->func = func;
2644 tmsg->wantAnswer = true;
b243ca3b 2645 if(write(threadInfo.pipes.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
b841314c 2646 delete tmsg;
b243ca3b 2647
49a699c4 2648 unixDie("write to thread pipe returned wrong size or error");
b841314c 2649 }
3ddb9247 2650
49467864 2651 string* resp = nullptr;
b243ca3b 2652 if(read(threadInfo.pipes.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
49a699c4 2653 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 2654
49a699c4 2655 if(resp) {
49a699c4 2656 delete resp;
49467864 2657 resp = nullptr;
49a699c4
BH
2658 }
2659 }
2660}
06ea9015 2661
b243ca3b 2662// This function is only called by the distributor threads, when pdns-distributes-queries is set
8171ab83 2663void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
00c9b8c1 2664{
b243ca3b 2665 if (!isDistributorThread()) {
d77abca1
RG
2666 g_log<<Logger::Error<<"distributeAsyncFunction() has been called by a worker ("<<t_id<<")"<<endl;
2667 exit(1);
2668 }
2669
8171ab83 2670 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
b243ca3b 2671 unsigned int target = /* skip handler */ 1 + g_numDistributorThreads + (hash % g_numWorkerThreads);
06ea9015 2672
b243ca3b
RG
2673 const auto& targetInfo = s_threadInfos[target];
2674 if(!targetInfo.isWorker) {
2675 g_log<<Logger::Error<<"distributeAsyncFunction() tried to assign a query to a non-worker thread"<<endl;
d77abca1 2676 exit(1);
00c9b8c1 2677 }
d77abca1 2678
b243ca3b 2679 const auto& tps = targetInfo.pipes;
00c9b8c1
BH
2680 ThreadMSG* tmsg = new ThreadMSG();
2681 tmsg->func = func;
2682 tmsg->wantAnswer = false;
3ddb9247 2683
cf8cda18
RG
2684 ssize_t written = write(tps.writeQueriesToThread, &tmsg, sizeof(tmsg));
2685 if (written > 0) {
2686 if (static_cast<size_t>(written) != sizeof(tmsg)) {
2687 delete tmsg;
2688 unixDie("write to thread pipe returned wrong size or error");
2689 }
2690 }
2691 else {
2692 int error = errno;
b841314c 2693 delete tmsg;
cf8cda18
RG
2694 if (error == EAGAIN || error == EWOULDBLOCK) {
2695 g_stats.queryPipeFullDrops++;
2696 } else {
17634427 2697 unixDie("write to thread pipe returned wrong size or error:" + std::to_string(error));
cf8cda18 2698 }
b841314c 2699 }
00c9b8c1 2700}
3427fa8a 2701
d187038c 2702static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
49a699c4 2703{
f26bf547 2704 ThreadMSG* tmsg = nullptr;
3ddb9247 2705
cf8cda18 2706 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread || fd == readQueriesToThread
49a699c4
BH
2707 unixDie("read from thread pipe returned wrong size or error");
2708 }
3ddb9247 2709
2f22827a 2710 void *resp=0;
2711 try {
2712 resp = tmsg->func();
2713 }
2714 catch(std::exception& e) {
6d2010a8 2715 if(g_logCommonErrors)
e6a9dde5 2716 g_log<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2f22827a 2717 }
2718 catch(PDNSException& e) {
6d2010a8 2719 if(g_logCommonErrors)
e6a9dde5 2720 g_log<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2f22827a 2721 }
d7c676a5 2722 if(tmsg->wantAnswer) {
b243ca3b
RG
2723 const auto& threadInfo = s_threadInfos.at(t_id);
2724 if(write(threadInfo.pipes.writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
d7c676a5 2725 delete tmsg;
00c9b8c1 2726 unixDie("write to thread pipe returned wrong size or error");
d7c676a5
RG
2727 }
2728 }
3ddb9247 2729
00c9b8c1 2730 delete tmsg;
49a699c4 2731}
09e6702a 2732
13034931
BH
2733template<class T> void *voider(const boost::function<T*()>& func)
2734{
2735 return func();
2736}
2737
b3b5459d
BH
2738vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
2739{
2740 a.insert(a.end(), b.begin(), b.end());
2741 return a;
2742}
2743
92011b8f 2744vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
2745{
2746 a.insert(a.end(), b.begin(), b.end());
2747 return a;
2748}
2749
3ddb9247
PD
2750vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
2751{
2752 a.insert(a.end(), b.begin(), b.end());
2753 return a;
2754}
2755
92011b8f 2756
387b9ca6
RG
2757/*
2758 This function should only be called by the handler to gather metrics, wipe the cache,
788eeb4c
RG
2759 reload the Lua script (not the Lua config) or change the current trace regex,
2760 and by the SNMP thread to gather metrics. */
b4e76a18 2761template<class T> T broadcastAccFunction(const boost::function<T*()>& func)
3427fa8a 2762{
b243ca3b 2763 if (!isHandlerThread()) {
788eeb4c 2764 g_log<<Logger::Error<<"broadcastAccFunction has been called by a worker ("<<t_id<<")"<<endl;
d77abca1 2765 exit(1);
d77abca1
RG
2766 }
2767
b243ca3b 2768 unsigned int n = 0;
3427fa8a 2769 T ret=T();
b243ca3b
RG
2770 for (const auto& threadInfo : s_threadInfos) {
2771 if (n++ == t_id) {
2772 continue;
2773 }
2774
2775 const auto& tps = threadInfo.pipes;
00c9b8c1
BH
2776 ThreadMSG* tmsg = new ThreadMSG();
2777 tmsg->func = boost::bind(voider<T>, func);
2778 tmsg->wantAnswer = true;
3ddb9247 2779
b841314c
RG
2780 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2781 delete tmsg;
3427fa8a 2782 unixDie("write to thread pipe returned wrong size or error");
b841314c 2783 }
3ddb9247 2784
49467864 2785 T* resp = nullptr;
3427fa8a
BH
2786 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2787 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 2788
3427fa8a 2789 if(resp) {
3427fa8a
BH
2790 ret += *resp;
2791 delete resp;
49467864 2792 resp = nullptr;
3427fa8a
BH
2793 }
2794 }
2795 return ret;
2796}
2797
b4e76a18
RG
2798template string broadcastAccFunction(const boost::function<string*()>& fun); // explicit instantiation
2799template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun); // explicit instantiation
2800template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun); // explicit instantiation
2801template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun); // explicit instantiation
3427fa8a 2802
d187038c 2803static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
09e6702a
BH
2804{
2805 string remote;
2806 string msg=s_rcc.recv(&remote);
2807 RecursorControlParser rcp;
2808 RecursorControlParser::func_t* command;
3ddb9247 2809
09e6702a 2810 string answer=rcp.getAnswer(msg, &command);
f0f3f0b0
PL
2811
2812 // If we are inside a chroot, we need to strip
2813 if (!arg()["chroot"].empty()) {
a683e8bd 2814 size_t len = arg()["chroot"].length();
f0f3f0b0
PL
2815 remote = remote.substr(len);
2816 }
2817
ab5c053d
BH
2818 try {
2819 s_rcc.send(answer, &remote);
2820 command();
2821 }
fdbf35ac 2822 catch(std::exception& e) {
e6a9dde5 2823 g_log<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
ab5c053d 2824 }
3f81d239 2825 catch(PDNSException& ae) {
e6a9dde5 2826 g_log<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
ab5c053d 2827 }
09e6702a
BH
2828}
2829
d187038c 2830static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2831{
0b18b22e 2832 PacketID* pident=any_cast<PacketID>(&var);
667f7e60 2833 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
09e6702a 2834
667f7e60 2835 shared_array<char> buffer(new char[pident->inNeeded]);
09e6702a 2836
a683e8bd 2837 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
09e6702a 2838 if(ret > 0) {
667f7e60 2839 pident->inMSG.append(&buffer[0], &buffer[ret]);
a683e8bd 2840 pident->inNeeded-=(size_t)ret;
825fa717 2841 if(!pident->inNeeded || pident->inIncompleteOkay) {
667f7e60
BH
2842 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
2843 PacketID pid=*pident;
2844 string msg=pident->inMSG;
3ddb9247 2845
bb4bdbaf 2846 t_fdm->removeReadFD(fd);
3ddb9247 2847 MT->sendEvent(pid, &msg);
09e6702a
BH
2848 }
2849 else {
667f7e60 2850 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
09e6702a
BH
2851 }
2852 }
2853 else {
667f7e60 2854 PacketID tmp=*pident;
bb4bdbaf 2855 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
09e6702a
BH
2856 string empty;
2857 MT->sendEvent(tmp, &empty); // this conveys error status
2858 }
2859}
2860
d187038c 2861static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2862{
0b18b22e 2863 PacketID* pid=any_cast<PacketID>(&var);
a683e8bd 2864 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
09e6702a 2865 if(ret > 0) {
a683e8bd 2866 pid->outPos+=(ssize_t)ret;
667f7e60
BH
2867 if(pid->outPos==pid->outMSG.size()) {
2868 PacketID tmp=*pid;
bb4bdbaf 2869 t_fdm->removeWriteFD(fd);
09e6702a
BH
2870 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
2871 }
2872 }
2873 else { // error or EOF
667f7e60 2874 PacketID tmp(*pid);
bb4bdbaf 2875 t_fdm->removeWriteFD(fd);
09e6702a 2876 string sent;
998a4334 2877 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
09e6702a
BH
2878 }
2879}
2880
34801ab1 2881// resend event to everybody chained onto it
d187038c 2882static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
34801ab1
BH
2883{
2884 if(iter->key.chain.empty())
2885 return;
e27e91a8 2886 // cerr<<"doResends called!\n";
34801ab1
BH
2887 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
2888 resend.fd=-1;
2889 resend.id=*i;
e27e91a8 2890 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
4665c31e 2891
34801ab1
BH
2892 MT->sendEvent(resend, &content);
2893 g_stats.chainResends++;
34801ab1
BH
2894 }
2895}
2896
d187038c 2897static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2898{
600fc20b 2899 PacketID pid=any_cast<PacketID>(var);
a683e8bd 2900 ssize_t len;
fae8fe07
RG
2901 std::string packet;
2902 packet.resize(g_outgoingEDNSBufsize);
996c89cc 2903 ComboAddress fromaddr;
09e6702a
BH
2904 socklen_t addrlen=sizeof(fromaddr);
2905
fae8fe07 2906 len=recvfrom(fd, &packet.at(0), packet.size(), 0, (sockaddr *)&fromaddr, &addrlen);
c1da7976 2907
a683e8bd 2908 if(len < (ssize_t) sizeof(dnsheader)) {
998a4334 2909 if(len < 0)
996c89cc 2910 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
09e6702a 2911 else {
3ddb9247 2912 g_stats.serverParseError++;
09e6702a 2913 if(g_logCommonErrors)
e6a9dde5 2914 g_log<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
e44d9fa7 2915 ": packet smaller than DNS header"<<endl;
998a4334 2916 }
34801ab1 2917
49a699c4 2918 t_udpclientsocks->returnSocket(fd);
34801ab1
BH
2919 string empty;
2920
2921 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3ddb9247 2922 if(iter != MT->d_waiters.end())
34801ab1 2923 doResends(iter, pid, empty);
3ddb9247 2924
34801ab1 2925 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
998a4334 2926 return;
3ddb9247 2927 }
998a4334 2928
fae8fe07 2929 packet.resize(len);
998a4334 2930 dnsheader dh;
fae8fe07 2931 memcpy(&dh, &packet.at(0), sizeof(dh));
3ddb9247 2932
6da3b3ad
PD
2933 PacketID pident;
2934 pident.remote=fromaddr;
2935 pident.id=dh.id;
2936 pident.fd=fd;
34801ab1 2937
33a928af 2938 if(!dh.qr && g_logCommonErrors) {
e6a9dde5 2939 g_log<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
6da3b3ad
PD
2940 }
2941
2942 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
2943 !dh.qr) { // one weird server
2944 pident.domain.clear();
2945 pident.type = 0;
2946 }
2947 else {
2948 try {
0b31e67e 2949 if(len > 12)
fae8fe07 2950 pident.domain=DNSName(&packet.at(0), len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
6da3b3ad
PD
2951 }
2952 catch(std::exception& e) {
2953 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
e6a9dde5 2954 g_log<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
6da3b3ad 2955 return;
34801ab1 2956 }
6da3b3ad 2957 }
34801ab1 2958
6da3b3ad
PD
2959 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
2960 if(iter != MT->d_waiters.end()) {
2961 doResends(iter, pident, packet);
2962 }
c1da7976 2963
6da3b3ad 2964retryWithName:
4957a608 2965
6da3b3ad
PD
2966 if(!MT->sendEvent(pident, &packet)) {
2967 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
2968 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
2969 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
e325f20c 2970 pident.domain == mthread->key.domain) {
6da3b3ad 2971 mthread->key.nearMisses++;
998a4334 2972 }
6da3b3ad
PD
2973
2974 // be a bit paranoid here since we're weakening our matching
3ddb9247 2975 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
6da3b3ad
PD
2976 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
2977 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
2978 pident.domain = mthread->key.domain;
2979 pident.type = mthread->key.type;
2980 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
d4fb76e9 2981 }
09e6702a 2982 }
6da3b3ad
PD
2983 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
2984 if(g_logCommonErrors) {
e6a9dde5 2985 g_log<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
d8f6d49f 2986 }
09e6702a 2987 }
6da3b3ad
PD
2988 else if(fd >= 0) {
2989 t_udpclientsocks->returnSocket(fd);
2990 }
09e6702a
BH
2991}
2992
1f4abb20
BH
2993FDMultiplexer* getMultiplexer()
2994{
2995 FDMultiplexer* ret;
f26bf547 2996 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
1f4abb20 2997 try {
f26bf547 2998 ret=i.second();
1f4abb20
BH
2999 return ret;
3000 }
98d0ee4a 3001 catch(FDMultiplexerException &fe) {
e6a9dde5 3002 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
98d0ee4a
BH
3003 }
3004 catch(...) {
e6a9dde5 3005 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
98d0ee4a 3006 }
1f4abb20 3007 }
e6a9dde5 3008 g_log<<Logger::Error<<"No working multiplexer found!"<<endl;
1f4abb20
BH
3009 exit(1);
3010}
3011
3ddb9247 3012
d187038c 3013static string* doReloadLuaScript()
4485aa35 3014{
674cf0f6 3015 string fname= ::arg()["lua-dns-script"];
4485aa35 3016 try {
674cf0f6 3017 if(fname.empty()) {
f26bf547 3018 t_pdl.reset();
e6a9dde5 3019 g_log<<Logger::Error<<t_id<<" Unloaded current lua script"<<endl;
0f39c1a3 3020 return new string("unloaded\n");
4485aa35
BH
3021 }
3022 else {
9694e14f
AT
3023 t_pdl = std::make_shared<RecursorLua4>();
3024 t_pdl->loadFile(fname);
4485aa35
BH
3025 }
3026 }
fdbf35ac 3027 catch(std::exception& e) {
e6a9dde5 3028 g_log<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
0f39c1a3 3029 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
4485aa35 3030 }
3ddb9247 3031
e6a9dde5 3032 g_log<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
0f39c1a3 3033 return new string("(re)loaded '"+fname+"'\n");
4485aa35
BH
3034}
3035
49a699c4
BH
3036string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3037{
3ddb9247 3038 if(begin != end)
49a699c4 3039 ::arg().set("lua-dns-script") = *begin;
3ddb9247 3040
0f39c1a3 3041 return broadcastAccFunction<string>(doReloadLuaScript);
3ddb9247 3042}
49a699c4 3043
d187038c 3044static string* pleaseUseNewTraceRegex(const std::string& newRegex)
77499b05
BH
3045try
3046{
3047 if(newRegex.empty()) {
f26bf547 3048 t_traceRegex.reset();
77499b05
BH
3049 return new string("unset\n");
3050 }
3051 else {
f26bf547 3052 t_traceRegex = std::make_shared<Regex>(newRegex);
77499b05
BH
3053 return new string("ok\n");
3054 }
3055}
3f81d239 3056catch(PDNSException& ae)
77499b05
BH
3057{
3058 return new string(ae.reason+"\n");
3059}
3060
3061string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3062{
3063 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
3064}
3065
4e9a20e6 3066static void checkLinuxIPv6Limits()
3067{
3068#ifdef __linux__
3069 string line;
3070 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
335da0ba 3071 int lim=std::stoi(line);
4e9a20e6 3072 if(lim < 16384) {
e6a9dde5 3073 g_log<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
4e9a20e6 3074 }
3075 }
3076#endif
3077}
36849ff2 3078static void checkOrFixFDS()
4e9a20e6 3079{
c0063e60 3080 unsigned int availFDs=getFilenumLimit();
3081 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
3082
3083 if(wantFDs > availFDs) {
067ad20e 3084 unsigned int hardlimit= getFilenumLimit(true);
3085 if(hardlimit >= wantFDs) {
c0063e60 3086 setFilenumLimit(wantFDs);
e6a9dde5 3087 g_log<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
36849ff2 3088 }
3089 else {
067ad20e 3090 int newval = (hardlimit - 25) / g_numWorkerThreads;
e6a9dde5 3091 g_log<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
36849ff2 3092 g_maxMThreads = newval;
067ad20e 3093 setFilenumLimit(hardlimit);
36849ff2 3094 }
3095 }
4e9a20e6 3096}
77499b05 3097
b243ca3b 3098static void* recursorThread(unsigned int tid);
51e2144e 3099
f26bf547 3100static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
49a699c4
BH
3101{
3102 t_allowFrom = ng;
f26bf547 3103 return nullptr;
49a699c4
BH
3104}
3105
dbd23fc2
BH
3106int g_argc;
3107char** g_argv;
3108
18af64a8 3109void parseACLs()
f7c1d4e3 3110{
18af64a8 3111 static bool l_initialized;
3ddb9247 3112
49a699c4 3113 if(l_initialized) { // only reload configuration file on second call
18af64a8 3114 string configname=::arg()["config-dir"]+"/recursor.conf";
3e63da83
JR
3115 if(::arg()["config-name"]!="") {
3116 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3117 }
18af64a8 3118 cleanSlashes(configname);
3ddb9247
PD
3119
3120 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
7e818521 3121 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
49a699c4 3122 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
242b90e1 3123 ::arg().preParseFile(configname.c_str(), "include-dir");
829849d6
AT
3124 ::arg().preParse(g_argc, g_argv, "include-dir");
3125
3126 // then process includes
3127 std::vector<std::string> extraConfigs;
242b90e1
AT
3128 ::arg().gatherIncludes(extraConfigs);
3129
1dc8f4d0 3130 for(const std::string& fn : extraConfigs) {
7e818521 3131 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
3132 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
3133 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
3134 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
829849d6 3135 }
ca2c884c
AT
3136
3137 ::arg().preParse(g_argc, g_argv, "allow-from-file");
3138 ::arg().preParse(g_argc, g_argv, "allow-from");
f27e6356 3139 }
49a699c4 3140
f26bf547
RG
3141 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
3142 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
3ddb9247 3143
2c95fc65
BH
3144 if(!::arg()["allow-from-file"].empty()) {
3145 string line;
2c95fc65
BH
3146 ifstream ifs(::arg()["allow-from-file"].c_str());
3147 if(!ifs) {
9c61b9d0 3148 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2c95fc65
BH
3149 }
3150
3151 string::size_type pos;
3152 while(getline(ifs,line)) {
3153 pos=line.find('#');
3154 if(pos!=string::npos)
3155 line.resize(pos);
3156 trim(line);
3157 if(line.empty())
3158 continue;
3159
18af64a8 3160 allowFrom->addMask(line);
2c95fc65 3161 }
e6a9dde5 3162 g_log<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2c95fc65
BH
3163 }
3164 else if(!::arg()["allow-from"].empty()) {
f7c1d4e3
BH
3165 vector<string> ips;
3166 stringtok(ips, ::arg()["allow-from"], ", ");
3ddb9247 3167
e6a9dde5 3168 g_log<<Logger::Warning<<"Only allowing queries from: ";
f7c1d4e3 3169 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
18af64a8 3170 allowFrom->addMask(*i);
f7c1d4e3 3171 if(i!=ips.begin())
e6a9dde5
PL
3172 g_log<<Logger::Warning<<", ";
3173 g_log<<Logger::Warning<<*i;
f7c1d4e3 3174 }
e6a9dde5 3175 g_log<<Logger::Warning<<endl;
f7c1d4e3 3176 }
49a699c4 3177 else {
3ddb9247 3178 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
e6a9dde5 3179 g_log<<Logger::Error<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
f26bf547 3180 allowFrom = nullptr;
49a699c4 3181 }
3ddb9247 3182
49a699c4 3183 g_initialAllowFrom = allowFrom;
d7dae798 3184 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
f26bf547 3185 oldAllowFrom = nullptr;
3ddb9247 3186
49a699c4 3187 l_initialized = true;
18af64a8
BH
3188}
3189
795215f2 3190
756e82cf 3191static void setupDelegationOnly()
3192{
3193 vector<string> parts;
3194 stringtok(parts, ::arg()["delegation-only"], ", \t");
3195 for(const auto& p : parts) {
9065eb05 3196 SyncRes::addDelegationOnly(DNSName(p));
756e82cf 3197 }
3198}
795215f2 3199
8fd25133
RG
3200static std::map<unsigned int, std::set<int> > parseCPUMap()
3201{
3202 std::map<unsigned int, std::set<int> > result;
3203
3204 const std::string value = ::arg()["cpu-map"];
3205
3206 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
e6a9dde5 3207 g_log<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
8fd25133
RG
3208 return result;
3209 }
3210
3211 std::vector<std::string> parts;
3212
3213 stringtok(parts, value, " \t");
3214
3215 for(const auto& part : parts) {
3216 if (part.find('=') == string::npos)
3217 continue;
3218
3219 try {
3220 auto headers = splitField(part, '=');
3221 trim(headers.first);
3222 trim(headers.second);
3223
3224 unsigned int threadId = pdns_stou(headers.first);
3225 std::vector<std::string> cpus;
3226
3227 stringtok(cpus, headers.second, ",");
3228
3229 for(const auto& cpu : cpus) {
3230 int cpuId = std::stoi(cpu);
3231
3232 result[threadId].insert(cpuId);
3233 }
3234 }
3235 catch(const std::exception& e) {
e6a9dde5 3236 g_log<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
8fd25133
RG
3237 }
3238 }
3239
3240 return result;
3241}
3242
3243static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
3244{
3245 const auto& cpuMapping = cpusMap.find(n);
3246 if (cpuMapping != cpusMap.cend()) {
3247 int rc = mapThreadToCPUList(tid, cpuMapping->second);
3248 if (rc == 0) {
e6a9dde5 3249 g_log<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
8fd25133 3250 for (const auto cpu : cpuMapping->second) {
e6a9dde5 3251 g_log<<Logger::Info<<" "<<cpu;
8fd25133 3252 }
e6a9dde5 3253 g_log<<Logger::Info<<endl;
8fd25133
RG
3254 }
3255 else {
e6a9dde5 3256 g_log<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
8fd25133 3257 for (const auto cpu : cpuMapping->second) {
e6a9dde5 3258 g_log<<Logger::Info<<" "<<cpu;
8fd25133 3259 }
e6a9dde5 3260 g_log<<Logger::Info<<strerror(rc)<<endl;
8fd25133
RG
3261 }
3262 }
3263}
3264
af1377b7
NC
3265#ifdef NOD_ENABLED
3266static void setupNODThread()
3267{
3268 if (g_nodEnabled) {
3269 t_nodDBp = std::make_shared<nod::NODDB>();
3270 try {
3271 t_nodDBp->setCacheDir(::arg()["new-domain-history-dir"]);
3272 }
3273 catch (const PDNSException& e) {
3274 g_log<<Logger::Error<<"new-domain-history-dir (" << ::arg()["new-domain-history-dir"] << ") is not readable or does not exist"<<endl;
3275 _exit(1);
3276 }
3277 if (!t_nodDBp->init()) {
3278 g_log<<Logger::Error<<"Could not initialize domain tracking"<<endl;
3279 _exit(1);
3280 }
3281 std::thread t(nod::NODDB::startHousekeepingThread, t_nodDBp);
3282 t.detach();
3283 }
3284}
3285
3286void parseNODWhitelist(const std::string& wlist)
3287{
3288 vector<string> parts;
3289 stringtok(parts, wlist, ",; ");
3290 for(const auto& a : parts) {
3291 g_nodDomainWL.add(DNSName(a));
3292 }
3293}
3294
3295static void setupNODGlobal()
3296{
3297 // Setup NOD subsystem
3298 g_nodEnabled = ::arg().mustDo("new-domain-tracking");
3299 g_nodLookupDomain = DNSName(::arg()["new-domain-lookup"]);
3300 g_nodLog = ::arg().mustDo("new-domain-log");
3301 parseNODWhitelist(::arg()["new-domain-whitelist"]);
3302}
3303#endif /* NOD_ENABLED */
3304
d187038c 3305static int serviceMain(int argc, char*argv[])
18af64a8 3306{
e6a9dde5
PL
3307 g_log.setName(s_programname);
3308 g_log.disableSyslog(::arg().mustDo("disable-syslog"));
3309 g_log.setTimestamps(::arg().mustDo("log-timestamp"));
18af64a8
BH
3310
3311 if(!::arg()["logging-facility"].empty()) {
f8499e52
BH
3312 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
3313 if(val >= 0)
e6a9dde5 3314 g_log.setFacility(val);
18af64a8 3315 else
e6a9dde5 3316 g_log<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
18af64a8
BH
3317 }
3318
ba1a571d 3319 showProductVersion();
3afde9b2 3320
06ea9015 3321 g_disthashseed=dns_random(0xffffffff);
3322
b7ef5828
PL
3323 checkLinuxIPv6Limits();
3324 try {
3325 vector<string> addrs;
3326 if(!::arg()["query-local-address6"].empty()) {
3327 SyncRes::s_doIPv6=true;
e6a9dde5 3328 g_log<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
b7ef5828
PL
3329
3330 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
3331 for(const string& addr : addrs) {
3332 g_localQueryAddresses6.push_back(ComboAddress(addr));
3333 }
3334 }
3335 else {
e6a9dde5 3336 g_log<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
b7ef5828
PL
3337 }
3338 addrs.clear();
3339 stringtok(addrs, ::arg()["query-local-address"], ", ;");
3340 for(const string& addr : addrs) {
3341 g_localQueryAddresses4.push_back(ComboAddress(addr));
3342 }
3343 }
3344 catch(std::exception& e) {
e6a9dde5 3345 g_log<<Logger::Error<<"Assigning local query addresses: "<<e.what();
b7ef5828
PL
3346 exit(99);
3347 }
3348
e48c6b8a
PL
3349 // keep this ABOVE loadRecursorLuaConfig!
3350 if(::arg()["dnssec"]=="off")
3351 g_dnssecmode=DNSSECMode::Off;
3352 else if(::arg()["dnssec"]=="process-no-validate")
3353 g_dnssecmode=DNSSECMode::ProcessNoValidate;
3354 else if(::arg()["dnssec"]=="process")
3355 g_dnssecmode=DNSSECMode::Process;
3356 else if(::arg()["dnssec"]=="validate")
3357 g_dnssecmode=DNSSECMode::ValidateAll;
3358 else if(::arg()["dnssec"]=="log-fail")
3359 g_dnssecmode=DNSSECMode::ValidateForLog;
3360 else {
e6a9dde5 3361 g_log<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
e48c6b8a
PL
3362 exit(1);
3363 }
3364
3365 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
d377bb54 3366 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
e48c6b8a 3367
a6f7f5fe 3368 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
3369 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
e6ec15bf
RG
3370
3371 luaConfigDelayedThreads delayedLuaThreads;
0f5785a6 3372 try {
e6ec15bf 3373 loadRecursorLuaConfig(::arg()["lua-config-file"], delayedLuaThreads);
0f5785a6
PL
3374 }
3375 catch (PDNSException &e) {
e6a9dde5 3376 g_log<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
0f5785a6
PL
3377 exit(1);
3378 }
ad42489c 3379
18af64a8 3380 parseACLs();
92011b8f 3381 sortPublicSuffixList();
3382
eb5bae86 3383 if(!::arg()["dont-query"].empty()) {
eb5bae86
BH
3384 vector<string> ips;
3385 stringtok(ips, ::arg()["dont-query"], ", ");
66e0b6ea
BH
3386 ips.push_back("0.0.0.0");
3387 ips.push_back("::");
c36bc97a 3388
e6a9dde5 3389 g_log<<Logger::Warning<<"Will not send queries to: ";
eb5bae86 3390 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
9065eb05 3391 SyncRes::addDontQuery(*i);
eb5bae86 3392 if(i!=ips.begin())
e6a9dde5
PL
3393 g_log<<Logger::Warning<<", ";
3394 g_log<<Logger::Warning<<*i;
eb5bae86 3395 }
e6a9dde5 3396 g_log<<Logger::Warning<<endl;
eb5bae86
BH
3397 }
3398
f7c1d4e3 3399 g_quiet=::arg().mustDo("quiet");
3ddb9247 3400
b243ca3b 3401 /* this needs to be done before parseACLs(), which call broadcastFunction() */
1bc3c142
BH
3402 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
3403 if(g_weDistributeQueries) {
b243ca3b 3404 g_log<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
1bc3c142 3405 }
3ddb9247 3406
756e82cf 3407 setupDelegationOnly();
b33c2462 3408 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
756e82cf 3409
77499b05
BH
3410 if(::arg()["trace"]=="fail") {
3411 SyncRes::setDefaultLogMode(SyncRes::Store);
3412 }
3413 else if(::arg().mustDo("trace")) {
3414 SyncRes::setDefaultLogMode(SyncRes::Log);
f7c1d4e3
BH
3415 ::arg().set("quiet")="no";
3416 g_quiet=false;
3e9c6c0a 3417 g_dnssecLOG=true;
f7c1d4e3 3418 }
43a9b290
PL
3419 string myHostname = getHostname();
3420 if (myHostname == "UNKNOWN"){
3421 g_log<<Logger::Warning<<"Unable to get the hostname, NSID and id.server values will be empty"<<endl;
3422 myHostname = "";
d0983bff 3423 }
3ddb9247 3424
aadceba8 3425 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
3426
1051f8a9
BH
3427 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
3428
f7c1d4e3 3429 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
63637fd8 3430 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
1051f8a9 3431 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
79ec0627
PL
3432 // Cap the packetcache-servfail-ttl to the packetcache-ttl
3433 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
3434 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
628e2c7b
PA
3435 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
3436 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
f7c1d4e3 3437 SyncRes::s_serverID=::arg()["server-id"];
173d790e 3438 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
9de3e034 3439 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
7c3398aa 3440 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
01402d56 3441 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
f7c1d4e3 3442 if(SyncRes::s_serverID.empty()) {
d0983bff 3443 SyncRes::s_serverID = myHostname;
f7c1d4e3 3444 }
3ddb9247 3445
e9f9b8ec
RG
3446 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
3447 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
3448
8a3a3822
RG
3449 if (!::arg().isEmpty("ecs-scope-zero-address")) {
3450 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
3451 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
3452 }
3453 else {
3454 bool found = false;
3455 for (const auto& addr : g_localQueryAddresses4) {
3456 if (!IsAnyAddress(addr)) {
3457 SyncRes::setECSScopeZeroAddress(Netmask(addr, 32));
3458 found = true;
3459 break;
3460 }
3461 }
3462 if (!found) {
3463 for (const auto& addr : g_localQueryAddresses6) {
3464 if (!IsAnyAddress(addr)) {
3465 SyncRes::setECSScopeZeroAddress(Netmask(addr, 128));
3466 found = true;
3467 break;
3468 }
3469 }
3470 if (!found) {
3471 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
3472 }
3473 }
3474 }
3475
2fe3354d
CH
3476 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
3477 SyncRes::parseEDNSSubnetAddFor(::arg()["ecs-add-for"]);
3478 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
3479
5cc8371b 3480 g_XPFAcl.toMasks(::arg()["xpf-allow-from"]);
59cb4a79 3481 g_xpfRRCode = ::arg().asNum("xpf-rr-code");
5cc8371b 3482
5b0ddd18 3483 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
bb4bdbaf 3484
49a699c4 3485 g_initialDomainMap = parseAuthAndForwards();
3ddb9247 3486
08f3f638 3487 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3ddb9247 3488
f7c1d4e3 3489 g_logCommonErrors=::arg().mustDo("log-common-errors");
98d36505 3490 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
e661a20b
PD
3491
3492 g_anyToTcp = ::arg().mustDo("any-to-tcp");
a09a8ce0
PD
3493 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
3494
b3adda56
PD
3495 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
3496
b243ca3b 3497 g_numDistributorThreads = ::arg().asNum("distributor-threads");
810ff705 3498 g_numWorkerThreads = ::arg().asNum("threads");
1c4f2e1b 3499 if (g_numWorkerThreads < 1) {
e6a9dde5 3500 g_log<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
1c4f2e1b
RG
3501 g_numWorkerThreads = 1;
3502 }
3503
b243ca3b 3504 g_numThreads = g_numDistributorThreads + g_numWorkerThreads;
810ff705
RG
3505 g_maxMThreads = ::arg().asNum("max-mthreads");
3506
00b8cadc
RG
3507 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
3508
0ec489bf 3509 g_statisticsInterval = ::arg().asNum("statistics-interval");
3510
810ff705
RG
3511#ifdef SO_REUSEPORT
3512 g_reusePort = ::arg().mustDo("reuseport");
3513#endif
3514
b243ca3b 3515 s_threadInfos.resize(g_numDistributorThreads + g_numWorkerThreads + /* handler */ 1);
810ff705 3516
b243ca3b
RG
3517 if (g_reusePort) {
3518 if (g_weDistributeQueries) {
3519 /* first thread is the handler, then distributors */
3520 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3521 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
adb6cd72 3522 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
b243ca3b 3523 makeUDPServerSockets(deferredAdds);
adb6cd72 3524 makeTCPServerSockets(deferredAdds, tcpSockets);
b243ca3b
RG
3525 }
3526 }
3527 else {
3528 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3529 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3530 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
adb6cd72 3531 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
b243ca3b 3532 makeUDPServerSockets(deferredAdds);
adb6cd72 3533 makeTCPServerSockets(deferredAdds, tcpSockets);
b243ca3b 3534 }
810ff705
RG
3535 }
3536 }
3537 else {
c47f201b 3538 std::set<int> tcpSockets;
b243ca3b
RG
3539 /* we don't have reuseport so we can only open one socket per
3540 listening addr:port and everyone will listen on it */
3541 makeUDPServerSockets(g_deferredAdds);
c47f201b
RG
3542 makeTCPServerSockets(g_deferredAdds, tcpSockets);
3543
3544 /* every listener (so distributor if g_weDistributeQueries, workers otherwise)
3545 needs to listen to the shared sockets */
3546 if (g_weDistributeQueries) {
3547 /* first thread is the handler, then distributors */
3548 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3549 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3550 }
3551 }
3552 else {
3553 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3554 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3555 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3556 }
3557 }
810ff705 3558 }
815099b2 3559
af1377b7
NC
3560#ifdef NOD_ENABLED
3561 // Setup newly observed domain globals
3562 setupNODGlobal();
3563#endif /* NOD_ENABLED */
3564
677e2a46
BH
3565 int forks;
3566 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
1bc3c142
BH
3567 if(!fork()) // we are child
3568 break;
3569 }
3ddb9247 3570
f7c1d4e3 3571 if(::arg().mustDo("daemon")) {
e6a9dde5
PL
3572 g_log<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
3573 g_log.toConsole(Logger::Critical);
f7c1d4e3
BH
3574 daemonize();
3575 }
3576 signal(SIGUSR1,usr1Handler);
3577 signal(SIGUSR2,usr2Handler);
3578 signal(SIGPIPE,SIG_IGN);
810ff705 3579
a6414fdc 3580 checkOrFixFDS();
3ddb9247 3581
d1b28475
KM
3582#ifdef HAVE_LIBSODIUM
3583 if (sodium_init() == -1) {
e6a9dde5 3584 g_log<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
d1b28475
KM
3585 exit(99);
3586 }
3587#endif
3588
3afde9b2
PL
3589 openssl_thread_setup();
3590 openssl_seed();
e97cb679
AT
3591 /* setup rng before chroot */
3592 dns_random_init();
3afde9b2 3593
bdbb07e0 3594 if(::arg()["server-id"].empty()) {
d0983bff 3595 ::arg().set("server-id") = myHostname;
bdbb07e0
PL
3596 }
3597
138435cb
BH
3598 int newgid=0;
3599 if(!::arg()["setgid"].empty())
3600 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
3601 int newuid=0;
3602 if(!::arg()["setuid"].empty())
3603 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
3604
f1d6a7ce
KM
3605 Utility::dropGroupPrivs(newuid, newgid);
3606
138435cb 3607 if (!::arg()["chroot"].empty()) {
75336810
PL
3608#ifdef HAVE_SYSTEMD
3609 char *ns;
3610 ns = getenv("NOTIFY_SOCKET");
3611 if (ns != nullptr) {
e6a9dde5 3612 g_log<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
75336810
PL
3613 exit(1);
3614 }
3615#endif
138435cb 3616 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
e6a9dde5 3617 g_log<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
138435cb
BH
3618 exit(1);
3619 }
f0f3f0b0 3620 else
e6a9dde5 3621 g_log<<Logger::Error<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
138435cb
BH
3622 }
3623
f0f3f0b0
PL
3624 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
3625 if(!s_pidfname.empty())
3626 unlink(s_pidfname.c_str()); // remove possible old pid file
3627 writePid();
3628
3629 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
3630
f1d6a7ce 3631 Utility::dropUserPrivs(newuid);
c0063e60 3632
e6ec15bf
RG
3633 startLuaConfigDelayedThreads(delayedLuaThreads, g_luaconfs.getCopy().generation);
3634
49a699c4 3635 makeThreadPipes();
3ddb9247 3636
5d4dd7fe
BH
3637 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
3638 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
fde296a3 3639 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
a5886e6a 3640 s_maxUDPQueriesPerRound=::arg().asNum("max-udp-queries-per-round");
343257a4 3641
d705aad9
RG
3642 if (::arg().mustDo("snmp-agent")) {
3643 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
3644 g_snmpAgent->run();
3645 }
3646
b47026fd 3647 int port = ::arg().asNum("udp-source-port-min");
58da9034 3648 if(port < 1024 || port > 65535){
e6a9dde5 3649 g_log<<Logger::Error<<"Unable to launch, udp-source-port-min is not a valid port number"<<endl;
bf6f28ca
CHB
3650 exit(99); // this isn't going to fix itself either
3651 }
3652 s_minUdpSourcePort = port;
b47026fd 3653 port = ::arg().asNum("udp-source-port-max");
58da9034 3654 if(port < 1024 || port > 65535 || port < s_minUdpSourcePort){
e6a9dde5 3655 g_log<<Logger::Error<<"Unable to launch, udp-source-port-max is not a valid port number or is smaller than udp-source-port-min"<<endl;
bf6f28ca
CHB
3656 exit(99); // this isn't going to fix itself either
3657 }
3658 s_maxUdpSourcePort = port;
3659 std::vector<string> parts {};
b47026fd 3660 stringtok(parts, ::arg()["udp-source-port-avoid"], ", ");
bf6f28ca
CHB
3661 for (const auto &part : parts)
3662 {
3663 port = std::stoi(part);
58da9034 3664 if(port < 1024 || port > 65535){
e6a9dde5 3665 g_log<<Logger::Error<<"Unable to launch, udp-source-port-avoid contains an invalid port number: "<<part<<endl;
bf6f28ca
CHB
3666 exit(99); // this isn't going to fix itself either
3667 }
3668 s_avoidUdpSourcePorts.insert(port);
3669 }
3670
b243ca3b 3671 unsigned int currentThreadId = 1;
8fd25133 3672 const auto cpusMap = parseCPUMap();
d77abca1 3673
c3828c03 3674 if(g_numThreads == 1) {
e6a9dde5 3675 g_log<<Logger::Warning<<"Operating unthreaded"<<endl;
6b6720de
PL
3676#ifdef HAVE_SYSTEMD
3677 sd_notify(0, "READY=1");
3678#endif
b243ca3b
RG
3679
3680 /* This thread handles the web server, carbon, statistics and the control channel */
3681 auto& handlerInfos = s_threadInfos.at(0);
3682 handlerInfos.isHandler = true;
3683 handlerInfos.thread = std::thread(recursorThread, 0);
3684
3685 setCPUMap(cpusMap, currentThreadId, pthread_self());
3686
3687 auto& infos = s_threadInfos.at(currentThreadId);
3688 infos.isListener = true;
3689 infos.isWorker = true;
3690 recursorThread(currentThreadId++);
76698c6e
BH
3691 }
3692 else {
8fd25133 3693
b243ca3b
RG
3694 if (g_weDistributeQueries) {
3695 g_log<<Logger::Warning<<"Launching "<< g_numDistributorThreads <<" distributor threads"<<endl;
3696 for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
3697 auto& infos = s_threadInfos.at(currentThreadId);
3698 infos.isListener = true;
3699 infos.thread = std::thread(recursorThread, currentThreadId++);
3700
3701 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
3702 }
3703 }
8fd25133 3704
62b549e0
RG
3705 g_log<<Logger::Warning<<"Launching "<< g_numWorkerThreads <<" worker threads"<<endl;
3706
b243ca3b
RG
3707 for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
3708 auto& infos = s_threadInfos.at(currentThreadId);
3709 infos.isListener = g_weDistributeQueries ? false : true;
3710 infos.isWorker = true;
3711 infos.thread = std::thread(recursorThread, currentThreadId++);
3712
3713 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
76698c6e 3714 }
b243ca3b 3715
6b6720de
PL
3716#ifdef HAVE_SYSTEMD
3717 sd_notify(0, "READY=1");
3718#endif
b243ca3b
RG
3719
3720 /* This thread handles the web server, carbon, statistics and the control channel */
3721 auto& infos = s_threadInfos.at(0);
3722 infos.isHandler = true;
3723 infos.thread = std::thread(recursorThread, 0);
3724
3725 s_threadInfos.at(0).thread.join();
bb4bdbaf 3726 }
bb4bdbaf
BH
3727 return 0;
3728}
3729
b243ca3b 3730static void* recursorThread(unsigned int n)
bb4bdbaf
BH
3731try
3732{
d77abca1 3733 t_id=n;
b243ca3b 3734 auto& threadInfo = s_threadInfos.at(t_id);
49a699c4 3735 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
a712cb56 3736 SyncRes::setDomainMap(g_initialDomainMap);
49a699c4 3737 t_allowFrom = g_initialAllowFrom;
f26bf547
RG
3738 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
3739 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
49a699c4 3740 primeHints();
3ddb9247 3741
f26bf547 3742 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
3ddb9247 3743
aa7929a3 3744#ifdef HAVE_PROTOBUF
f26bf547 3745 t_uuidGenerator = std::unique_ptr<boost::uuids::random_generator>(new boost::uuids::random_generator());
aa7929a3 3746#endif
e6a9dde5 3747 g_log<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3ddb9247 3748
af1377b7
NC
3749#ifdef NOD_ENABLED
3750 setupNODThread();
3751#endif /* NOD_ENABLED */
3752
8fb594ba 3753 if(threadInfo.isWorker) {
5b388d28
PD
3754 try {
3755 if(!::arg()["lua-dns-script"].empty()) {
3756 t_pdl = std::make_shared<RecursorLua4>();
3757 t_pdl->loadFile(::arg()["lua-dns-script"]);
3758 g_log<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
3759 }
3760 }
3761 catch(std::exception &e) {
3762 g_log<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
3763 _exit(99);
674cf0f6 3764 }
674cf0f6 3765 }
3ddb9247 3766
f8f243b0 3767 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
92011b8f 3768 if(ringsize) {
f26bf547 3769 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
b243ca3b
RG
3770 if(g_weDistributeQueries)
3771 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries") / g_numDistributorThreads);
f8f243b0 3772 else
3ddb9247 3773 t_remotes->set_capacity(ringsize);
f26bf547 3774 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 3775 t_servfailremotes->set_capacity(ringsize);
66f2e6ad
KM
3776 t_bogusremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3777 t_bogusremotes->set_capacity(ringsize);
f26bf547 3778 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 3779 t_largeanswerremotes->set_capacity(ringsize);
621ccf89 3780 t_timeouts = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3781 t_timeouts->set_capacity(ringsize);
92011b8f 3782
f26bf547 3783 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 3784 t_queryring->set_capacity(ringsize);
f26bf547 3785 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 3786 t_servfailqueryring->set_capacity(ringsize);
66f2e6ad
KM
3787 t_bogusqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3788 t_bogusqueryring->set_capacity(ringsize);
92011b8f 3789 }
3ddb9247 3790
f26bf547 3791 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
3ddb9247 3792
63341e8d
RG
3793#ifdef HAVE_PROTOBUF
3794 /* start protobuf export threads if needed */
3795 auto luaconfsLocal = g_luaconfs.getLocal();
3796 checkProtobufExport(luaconfsLocal);
3797 checkOutgoingProtobufExport(luaconfsLocal);
3798#endif /* HAVE_PROTOBUF */
3799
bb4bdbaf
BH
3800 PacketID pident;
3801
3802 t_fdm=getMultiplexer();
d77abca1 3803
b243ca3b 3804 if(threadInfo.isHandler) {
d07bf7ff 3805 if(::arg().mustDo("webserver")) {
e6a9dde5 3806 g_log<<Logger::Warning << "Enabling web server" << endl;
8989097d 3807 try {
1ce57618 3808 new RecursorWebServer(t_fdm);
8989097d
CH
3809 }
3810 catch(PDNSException &e) {
e6a9dde5 3811 g_log<<Logger::Error<<"Exception: "<<e.reason<<endl;
8989097d
CH
3812 exit(99);
3813 }
f3d1d67b 3814 }
e6a9dde5 3815 g_log<<Logger::Error<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
f3d1d67b 3816 }
810ff705 3817 else {
d77abca1 3818
b243ca3b
RG
3819 t_fdm->addReadFD(threadInfo.pipes.readToThread, handlePipeRequest);
3820 t_fdm->addReadFD(threadInfo.pipes.readQueriesToThread, handlePipeRequest);
3821
3822 if (threadInfo.isListener) {
3823 if (g_reusePort) {
3824 /* then every listener has its own FDs */
3825 for(const auto deferred : threadInfo.deferredAdds) {
3826 t_fdm->addReadFD(deferred.first, deferred.second);
3827 }
810ff705 3828 }
b243ca3b
RG
3829 else {
3830 /* otherwise all listeners are listening on the same ones */
3831 for(const auto deferred : g_deferredAdds) {
3832 t_fdm->addReadFD(deferred.first, deferred.second);
d77abca1
RG
3833 }
3834 }
3835 }
810ff705 3836 }
3ddb9247 3837
b0b37121 3838 registerAllStats();
d77abca1 3839
b243ca3b 3840 if(threadInfo.isHandler) {
674cf0f6
BH
3841 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
3842 }
1bc3c142 3843
f7c1d4e3 3844 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3ddb9247 3845
f7c1d4e3 3846 bool listenOnTCP(true);
49a699c4 3847
cb1523d1 3848 time_t last_stat = 0;
a2f87dd1 3849 time_t last_carbon=0, last_lua_maintenance=0;
2c78bd57 3850 time_t carbonInterval=::arg().asNum("carbon-interval");
a2f87dd1 3851 time_t luaMaintenanceInterval=::arg().asNum("lua-maintenance-interval");
ac0995bb 3852 counter.store(0); // used to periodically execute certain tasks
f7c1d4e3 3853 for(;;) {
ac0e821b 3854 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3ddb9247 3855
3427fa8a
BH
3856 if(!(counter%500)) {
3857 MT->makeThread(houseKeeping, 0);
f7c1d4e3
BH
3858 }
3859
d2392145 3860 if(!(counter%55)) {
d8f6d49f 3861 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
bb4bdbaf 3862 expired_t expired=t_fdm->getTimeouts(g_now);
3ddb9247 3863
f7c1d4e3 3864 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
cd989c87 3865 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4957a608 3866 if(g_logCommonErrors)
e6a9dde5 3867 g_log<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4957a608 3868 t_fdm->removeReadFD(i->first);
f7c1d4e3
BH
3869 }
3870 }
3ddb9247 3871
f7c1d4e3
BH
3872 counter++;
3873
b243ca3b 3874 if(threadInfo.isHandler) {
cb1523d1
RG
3875 if(statsWanted || (g_statisticsInterval > 0 && (g_now.tv_sec - last_stat) >= g_statisticsInterval)) {
3876 doStats();
3877 last_stat = g_now.tv_sec;
3878 }
f7c1d4e3 3879
cb1523d1 3880 Utility::gettimeofday(&g_now, 0);
2c78bd57 3881
cb1523d1
RG
3882 if((g_now.tv_sec - last_carbon) >= carbonInterval) {
3883 MT->makeThread(doCarbonDump, 0);
3884 last_carbon = g_now.tv_sec;
3885 }
2c78bd57 3886 }
2a0276a9 3887 if (t_pdl != nullptr) {
9adbe790 3888 // lua-dns-script directive is present, call the maintenance callback if needed
b243ca3b 3889 if (threadInfo.isWorker) {
2a0276a9
CHB
3890 // Only on threads processing queries
3891 if(g_now.tv_sec - last_lua_maintenance >= luaMaintenanceInterval) {
3892 t_pdl->maintenance();
3893 last_lua_maintenance = g_now.tv_sec;
3894 }
9adbe790 3895 }
a2f87dd1 3896 }
2c78bd57 3897
bb4bdbaf 3898 t_fdm->run(&g_now);
3ea54bf0 3899 // 'run' updates g_now for us
f7c1d4e3 3900
b243ca3b 3901 if(threadInfo.isListener) {
5c889cf5 3902 if(listenOnTCP) {
c47f201b
RG
3903 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
3904 for(const auto fd : threadInfo.tcpSockets) {
3905 t_fdm->removeReadFD(fd);
b243ca3b 3906 }
c47f201b
RG
3907 listenOnTCP=false;
3908 }
f7c1d4e3 3909 }
5c889cf5 3910 else {
c47f201b
RG
3911 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
3912 for(const auto fd : threadInfo.tcpSockets) {
3913 t_fdm->addReadFD(fd, handleNewTCPQuestion);
b243ca3b 3914 }
c47f201b
RG
3915 listenOnTCP=true;
3916 }
f7c1d4e3
BH
3917 }
3918 }
3919 }
3920}
3f81d239 3921catch(PDNSException &ae) {
e6a9dde5 3922 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
bb4bdbaf
BH
3923 return 0;
3924}
3925catch(std::exception &e) {
e6a9dde5 3926 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
bb4bdbaf
BH
3927 return 0;
3928}
3929catch(...) {
e6a9dde5 3930 g_log<<Logger::Error<<"any other exception in main: "<<endl;
bb4bdbaf
BH
3931 return 0;
3932}
3933
51e2144e 3934
3ddb9247 3935int main(int argc, char **argv)
288f4aa9 3936{
dbd23fc2
BH
3937 g_argc = argc;
3938 g_argv = argv;
5e3de507 3939 g_stats.startupTime=time(0);
3e135495 3940 versionSetProduct(ProductRecursor);
8a63d3ce 3941 reportBasicTypes();
0007c2e5 3942 reportOtherTypes();
ea634573 3943
22030c37 3944 int ret = EXIT_SUCCESS;
caa6eefa 3945
288f4aa9 3946 try {
f888311c 3947 ::arg().set("stack-size","stack size per mthread")="200000";
2e3d8a19 3948 ::arg().set("soa-minimum-ttl","Don't change")="0";
2e3d8a19 3949 ::arg().set("no-shuffle","Don't change")="off";
2e3d8a19 3950 ::arg().set("local-port","port to listen on")="53";
32252594 3951 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
fec7dd5a 3952 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
77499b05 3953 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
a6415142 3954 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
c87e1876 3955 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
d3f809bf 3956 ::arg().set("daemon","Operate as a daemon")="no";
191f2e47 3957 ::arg().setSwitch("write-pid","Write a PID file")="yes";
9afa8662 3958 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
b6cfa948 3959 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
b18fa400 3960 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
22e0810c 3961 ::arg().set("log-common-errors","If we should log rather common errors")="no";
2e3d8a19
BH
3962 ::arg().set("chroot","switch to chroot jail")="";
3963 ::arg().set("setgid","If set, change group id to this gid for more security")="";
3964 ::arg().set("setuid","If set, change user id to this uid for more security")="";
c83ee49d 3965 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
bb4bdbaf 3966 ::arg().set("threads", "Launch this number of threads")="2";
b243ca3b 3967 ::arg().set("distributor-threads", "Launch this number of distributor threads, distributing queries to other threads")="0";
adabfcb9 3968 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
5124de27 3969 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
d07bf7ff 3970 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
479e0976
CH
3971 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
3972 ::arg().set("api-logfile", "Location of the server logfile (used by the REST API)") = "/var/log/pdns.log";
479e0976 3973 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
d07bf7ff
PL
3974 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
3975 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
3976 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
be3e1477 3977 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
cc08b5a9 3978 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
e12f8407 3979 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
2c78bd57 3980 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
0ec489bf 3981 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
c038218b 3982 ::arg().set("quiet","Suppress logging of questions and answers")="";
f27e6356 3983 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
2e3d8a19 3984 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
fdbf35ac
BH
3985 ::arg().set("socket-owner","Owner of socket")="";
3986 ::arg().set("socket-group","Group of socket")="";
3987 ::arg().set("socket-mode", "Permissions for socket")="";
3ddb9247 3988
f0f3f0b0 3989 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
2e3d8a19
BH
3990 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
3991 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
d4fb76e9 3992 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
2e3d8a19 3993 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
85c32340 3994 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
2e3d8a19 3995 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
324dc148 3996 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
979edd70 3997 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
2e3d8a19 3998 ::arg().set("hint-file", "If set, load root hints from this file")="";
b45eb27c 3999 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
a9af3782 4000 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
c3e753c7 4001 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
1051f8a9 4002 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
927c12b0 4003 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
1051f8a9 4004 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
950626be 4005 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname, set custom or 'disabled'")="";
92011b8f 4006 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
ba1a571d 4007 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
49a699c4 4008 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
2c95fc65 4009 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
51e2144e 4010 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3ddb9247 4011 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4e120339 4012 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
fde296a3 4013 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
0d5f0a9f 4014 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4ef015cd 4015 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
5605c067 4016 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3e61e7f7 4017 ::arg().set("lua-config-file", "More powerful configuration options")="";
644dd1da 4018
5605c067 4019 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
927c12b0
BH
4020 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
4021 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
5605c067 4022 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
ac0b4eb3 4023 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3ea54bf0 4024 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
e498dac1 4025 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
4485aa35 4026 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
a2f87dd1 4027 ::arg().set("lua-maintenance-interval", "Number of seconds between calls to the lua user defined maintenance() function")="1";
08f3f638 4028 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3ddb9247 4029 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
35695d18 4030 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
4031 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
3f975863 4032 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
2fe3354d 4033 ::arg().set("ecs-add-for", "List of client netmasks for which EDNS Client Subnet will be added")="0.0.0.0/0, ::/0, " LOCAL_NETS_INVERSE;
8a3a3822 4034 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
a16c4536 4035 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
e498dac1 4036 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
4ca2d205 4037 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
e661a20b 4038 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
b3adda56 4039 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
00b8cadc 4040 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
a09a8ce0 4041 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1680";
b33c2462 4042 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1680";
aadceba8 4043 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
173d790e 4044 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
c5950146 4045 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
7c3398aa 4046 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
78227847 4047 ::arg().set("max-udp-queries-per-round", "Maximum number of UDP queries processed per recvmsg() round, before returning back to normal processing")="10000";
a09a8ce0 4048
68e6df3c 4049 ::arg().set("include-dir","Include *.conf files from this directory")="";
d67620e4 4050 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
2332f42d 4051
4052 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
2e3d8a19 4053
d705aad9 4054 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
396f126e 4055 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
d705aad9 4056
0735b17e 4057 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
d377bb54 4058 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
0735b17e 4059
8fd25133
RG
4060 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
4061
98d36505
RG
4062 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
4063
5cc8371b 4064 ::arg().set("xpf-allow-from","XPF information is only processed from these subnets")="";
59cb4a79 4065 ::arg().set("xpf-rr-code","XPF option code to use")="0";
5cc8371b 4066
58da9034 4067 ::arg().set("udp-source-port-min", "Minimum UDP port to bind on")="1024";
b47026fd
CHB
4068 ::arg().set("udp-source-port-max", "Maximum UDP port to bind on")="65535";
4069 ::arg().set("udp-source-port-avoid", "List of comma separated UDP port number to avoid")="11211";
e97cb679 4070 ::arg().set("rng", "Specify random number generator to use. Valid values are auto,sodium,openssl,getrandom,arc4random,urandom.")="auto";
af1377b7
NC
4071#ifdef NOD_ENABLED
4072 ::arg().set("new-domain-tracking", "Track newly observed domains (i.e. never seen before).")="no";
4073 ::arg().set("new-domain-log", "Log newly observed domains.")="yes";
4074 ::arg().set("new-domain-lookup", "Perform a DNS lookup newly observed domains as a subdomain of the configured domain")="";
4075 ::arg().set("new-domain-history-dir", "Persist new domain tracking data here to persist between restarts")=string(NODCACHEDIR)+"/nod";
4076 ::arg().set("new-domain-whitelist", "List of domains (and implicitly all subdomains) which will never be considered a new domain")="";
4077#endif /* NOD_ENABLED */
2e3d8a19 4078 ::arg().setCmd("help","Provide a helpful message");
ba1a571d 4079 ::arg().setCmd("version","Print version string");
d5141417 4080 ::arg().setCmd("config","Output blank configuration");
e6a9dde5 4081 g_log.toConsole(Logger::Info);
2e3d8a19 4082 ::arg().laxParse(argc,argv); // do a lax parse
c75a6a9e 4083
2d733c0f
CH
4084 string configname=::arg()["config-dir"]+"/recursor.conf";
4085 if(::arg()["config-name"]!="") {
4086 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
5124de27 4087 s_programname+="-"+::arg()["config-name"];
2d733c0f
CH
4088 }
4089 cleanSlashes(configname);
5124de27 4090
5cc1ea1d
CH
4091 if(!::arg().getCommands().empty()) {
4092 cerr<<"Fatal: non-option on the command line, perhaps a '--setting=123' statement missed the '='?"<<endl;
4093 exit(99);
4094 }
4095
577cf284
BH
4096 if(::arg().mustDo("config")) {
4097 cout<<::arg().configstring()<<endl;
4098 exit(0);
4099 }
4100
3ddb9247 4101 if(!::arg().file(configname.c_str()))
e6a9dde5 4102 g_log<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
c75a6a9e 4103
2e3d8a19 4104 ::arg().parse(argc,argv);
c836dc19 4105
2054afbb
CH
4106 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() ) {
4107 g_log<<Logger::Error<<"Using chroot and enabling the API is not possible"<<endl;
f0f3f0b0
PL
4108 exit(EXIT_FAILURE);
4109 }
4110
4111 if (::arg()["socket-dir"].empty()) {
4112 if (::arg()["chroot"].empty())
4113 ::arg().set("socket-dir") = LOCALSTATEDIR;
4114 else
4115 ::arg().set("socket-dir") = "/";
4116 }
4117
2e3d8a19 4118 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
562588a3 4119
b243ca3b
RG
4120 if(::arg().asNum("threads")==1) {
4121 if (::arg().mustDo("pdns-distributes-queries")) {
4122 g_log<<Logger::Warning<<"Only one thread, no need to distribute queries ourselves"<<endl;
4123 ::arg().set("pdns-distributes-queries")="no";
4124 }
4125 }
4126
4127 if(::arg().mustDo("pdns-distributes-queries") && ::arg().asNum("distributor-threads") <= 0) {
4128 g_log<<Logger::Warning<<"Asked to run with pdns-distributes-queries set but no distributor threads, raising to 1"<<endl;
4129 ::arg().set("distributor-threads")="1";
4130 }
4131
4132 if (!::arg().mustDo("pdns-distributes-queries")) {
4133 ::arg().set("distributor-threads")="0";
4134 }
61d74169 4135
2e3d8a19 4136 if(::arg().mustDo("help")) {
ff5ba4f9
WA
4137 cout<<"syntax:"<<endl<<endl;
4138 cout<<::arg().helpstring(::arg()["help"])<<endl;
4139 exit(0);
b636533b 4140 }
5e3de507 4141 if(::arg().mustDo("version")) {
ba1a571d 4142 showProductVersion();
3613a51c 4143 showBuildConfiguration();
67076869 4144 exit(0);
5e3de507 4145 }
b636533b 4146
34162f8f 4147 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
f48d7b65 4148
34162f8f
CH
4149 if (logUrgency < Logger::Error)
4150 logUrgency = Logger::Error;
f48d7b65 4151 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
4152 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
4153 }
e6a9dde5
PL
4154 g_log.setLoglevel(logUrgency);
4155 g_log.toConsole(logUrgency);
34162f8f 4156
f7c1d4e3 4157 serviceMain(argc, argv);
288f4aa9 4158 }
3f81d239 4159 catch(PDNSException &ae) {
e6a9dde5 4160 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
22030c37 4161 ret=EXIT_FAILURE;
288f4aa9 4162 }
fdbf35ac 4163 catch(std::exception &e) {
e6a9dde5 4164 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
22030c37 4165 ret=EXIT_FAILURE;
288f4aa9
BH
4166 }
4167 catch(...) {
e6a9dde5 4168 g_log<<Logger::Error<<"any other exception in main: "<<endl;
22030c37 4169 ret=EXIT_FAILURE;
288f4aa9 4170 }
3ddb9247 4171
22030c37 4172 return ret;
288f4aa9 4173}