]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/pdns_recursor.cc
Add some notes explaining why some validations are not relevant in the dnstap case.
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
CommitLineData
288f4aa9 1/*
6edbf68a
PL
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
870a0fe4
AT
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
3e61e7f7 25
76473b92
KM
26#include <netdb.h>
27#include <sys/stat.h>
28#include <unistd.h>
f097141b 29#ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
b47026fd 30#include <boost/container/flat_set.hpp>
f097141b 31#endif
2470b36e 32#include "ws-recursor.hh"
c390b2da 33#include <thread>
519f5484 34#include "threadname.hh"
3ea54bf0 35#include "recpacketcache.hh"
3ddb9247 36#include "utility.hh"
51e2144e 37#include "dns_random.hh"
d1b28475
KM
38#ifdef HAVE_LIBSODIUM
39#include <sodium.h>
40#endif
3afde9b2 41#include "opensslsigners.hh"
288f4aa9
BH
42#include <iostream>
43#include <errno.h>
81859ba5 44#include <boost/static_assert.hpp>
288f4aa9
BH
45#include <map>
46#include <set>
97bb160b 47#include "recursor_cache.hh"
38c9ceaa 48#include "cachecleaner.hh"
288f4aa9 49#include <stdio.h>
c75a6a9e 50#include <signal.h>
288f4aa9 51#include <stdlib.h>
bb4bdbaf 52#include "misc.hh"
288f4aa9
BH
53#include "mtasker.hh"
54#include <utility>
288f4aa9
BH
55#include "arguments.hh"
56#include "syncres.hh"
88def049
BH
57#include <fcntl.h>
58#include <fstream>
3e61e7f7 59#include "sortlist.hh"
5c633640
BH
60#include "sstuff.hh"
61#include <boost/tuple/tuple.hpp>
62#include <boost/tuple/tuple_comparison.hpp>
72df400f 63#include <boost/shared_array.hpp>
7f1fa77d 64#include <boost/function.hpp>
5605c067 65#include <boost/algorithm/string.hpp>
8f7473d7 66#ifdef MALLOC_TRACE
67#include "malloctrace.hh"
68#endif
40a3dd64 69#include <netinet/tcp.h>
f12666f2 70#include "capabilities.hh"
ea634573
BH
71#include "dnsparser.hh"
72#include "dnswriter.hh"
73#include "dnsrecords.hh"
f814d7c8 74#include "zoneparser-tng.hh"
1d5b3ce6 75#include "rec_channel.hh"
aaacf7f2 76#include "logger.hh"
c8ddb7c2 77#include "iputils.hh"
09e6702a 78#include "mplexer.hh"
c038218b 79#include "config.h"
808c5ef7 80#include "lua-recursor4.hh"
ba1a571d 81#include "version.hh"
79332bff 82#include "responsestats.hh"
d67620e4 83#include "secpoll-recursor.hh"
c5c066bf 84#include "dnsname.hh"
644dd1da 85#include "filterpo.hh"
86#include "rpzloader.hh"
b3f0ed10 87#include "validate-recursor.hh"
f3c18728 88#include "rec-lua-conf.hh"
5c3b5e7f 89#include "ednsoptions.hh"
85c7ca75 90#include "gettime.hh"
d6f3fcfa 91#include "pubsuffix.hh"
af1377b7
NC
92#ifdef NOD_ENABLED
93#include "nod.hh"
94#endif /* NOD_ENABLED */
f3c18728 95
d9d3f9c1 96#include "rec-protobuf.hh"
d705aad9 97#include "rec-snmp.hh"
aa7929a3 98
6b6720de
PL
99#ifdef HAVE_SYSTEMD
100#include <systemd/sd-daemon.h>
101#endif
102
d187038c
RG
103#include "namespaces.hh"
104
d61aa945
RG
105#ifdef HAVE_PROTOBUF
106#include "uuid-utils.hh"
b9fa43e0 107#endif /* HAVE_PROTOBUF */
d61aa945 108
5cc8371b
RG
109#include "xpf.hh"
110
d187038c
RG
111typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
112
f26bf547 113static thread_local std::shared_ptr<RecursorLua4> t_pdl;
b243ca3b 114static thread_local unsigned int t_id = 0;
f26bf547
RG
115static thread_local std::shared_ptr<Regex> t_traceRegex;
116static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
63341e8d 117#ifdef HAVE_PROTOBUF
3fe06137 118static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_protobufServers{nullptr};
b773359c 119static thread_local uint64_t t_protobufServersGeneration;
3fe06137 120static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_outgoingProtobufServers{nullptr};
b773359c 121static thread_local uint64_t t_outgoingProtobufServersGeneration;
63341e8d 122#endif /* HAVE_PROTOBUF */
f26bf547 123
b9fa43e0 124#ifdef HAVE_FSTRM
10ba6d01 125static thread_local std::shared_ptr<std::vector<std::unique_ptr<FrameStreamLogger>>> t_frameStreamServers{nullptr};
b9fa43e0
OM
126static thread_local uint64_t t_frameStreamServersGeneration;
127#endif /* HAVE_FSTRM */
128
f26bf547
RG
129thread_local std::unique_ptr<MT_t> MT; // the big MTasker
130thread_local std::unique_ptr<MemRecursorCache> t_RC;
131thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
3337c2f7 132thread_local FDMultiplexer* t_fdm{nullptr};
be9078b3 133thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes, t_bogusremotes;
66f2e6ad 134thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring, t_bogusqueryring;
f26bf547 135thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
af1377b7
NC
136#ifdef NOD_ENABLED
137thread_local std::shared_ptr<nod::NODDB> t_nodDBp;
41c542ec 138thread_local std::shared_ptr<nod::UniqueResponseDB> t_udrDBp;
af1377b7 139#endif /* NOD_ENABLED */
d187038c 140__thread struct timeval g_now; // timestamp, updated (too) frequently
d7dae798 141
b243ca3b
RG
142typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
143
d7dae798 144// for communicating with our threads
b243ca3b
RG
145// effectively readonly after startup
146struct RecThreadInfo
147{
148 struct ThreadPipeSet
149 {
150 int writeToThread{-1};
151 int readToThread{-1};
152 int writeFromThread{-1};
153 int readFromThread{-1};
154 int writeQueriesToThread{-1}; // this one is non-blocking
155 int readQueriesToThread{-1};
156 };
157
adb6cd72 158 /* FD corresponding to TCP sockets this thread is listening
c47f201b 159 on.
adb6cd72
RG
160 These FDs are also in deferredAdds when we have one
161 socket per listener, and in g_deferredAdds instead. */
162 std::set<int> tcpSockets;
b243ca3b
RG
163 /* FD corresponding to listening sockets if we have one socket per
164 listener (with reuseport), otherwise all listeners share the
165 same FD and g_deferredAdds is then used instead */
166 deferredAdd_t deferredAdds;
167 struct ThreadPipeSet pipes;
168 std::thread thread;
144040be
RG
169 MT_t* mt{nullptr};
170 uint64_t numberOfDistributedQueries{0};
b243ca3b
RG
171 /* handle the web server, carbon, statistics and the control channel */
172 bool isHandler{false};
173 /* accept incoming queries (and distributes them to the workers if pdns-distributes-queries is set) */
174 bool isListener{false};
175 /* process queries */
176 bool isWorker{false};
49a699c4 177};
810ff705 178
b243ca3b
RG
179/* first we have the handler thread, t_id == 0 (some other
180 helper threads like SNMP might have t_id == 0 as well)
181 then the distributor threads if any
182 and finally the workers */
183static std::vector<RecThreadInfo> s_threadInfos;
184/* without reuseport, all listeners share the same sockets */
185static deferredAdd_t g_deferredAdds;
faf580f5 186
d187038c
RG
187typedef vector<int> tcpListenSockets_t;
188typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
3ea54bf0 189
d187038c 190static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
d187038c 191static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
d187038c
RG
192static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
193static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
194static AtomicCounter counter;
9065eb05 195static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
f26bf547 196static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
5cc8371b 197static NetmaskGroup g_XPFAcl;
d187038c 198static size_t g_tcpMaxQueriesPerConn;
a5886e6a 199static size_t s_maxUDPQueriesPerRound;
d187038c
RG
200static uint64_t g_latencyStatSize;
201static uint32_t g_disthashseed;
202static unsigned int g_maxTCPPerClient;
d187038c 203static unsigned int g_maxMThreads;
b243ca3b 204static unsigned int g_numDistributorThreads;
d187038c
RG
205static unsigned int g_numWorkerThreads;
206static int g_tcpTimeout;
207static uint16_t g_udpTruncationThreshold;
59cb4a79 208static uint16_t g_xpfRRCode{0};
d187038c
RG
209static std::atomic<bool> statsWanted;
210static std::atomic<bool> g_quiet;
211static bool g_logCommonErrors;
212static bool g_anyToTcp;
b243ca3b 213static bool g_weDistributeQueries; // if true, 1 or more threads listen on the incoming query sockets and distribute them to workers
810ff705 214static bool g_reusePort{false};
00b8cadc 215static bool g_gettagNeedsEDNSOptions{false};
0ec489bf 216static time_t g_statisticsInterval;
9065eb05 217static bool g_useIncomingECS;
c29d820c 218static bool g_useKernelTimestamp;
a6f7f5fe 219std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
af1377b7
NC
220#ifdef NOD_ENABLED
221static bool g_nodEnabled;
222static DNSName g_nodLookupDomain;
223static bool g_nodLog;
224static SuffixMatchNode g_nodDomainWL;
ca2526f5 225static std::string g_nod_pbtag;
41c542ec
NC
226static bool g_udrEnabled;
227static bool g_udrLog;
ca2526f5 228static std::string g_udr_pbtag;
af1377b7 229#endif /* NOD_ENABLED */
f097141b 230#ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
bf6f28ca 231static boost::container::flat_set<uint16_t> s_avoidUdpSourcePorts;
f097141b
CHB
232#else
233static std::set<uint16_t> s_avoidUdpSourcePorts;
234#endif
bf6f28ca
CHB
235static uint16_t s_minUdpSourcePort;
236static uint16_t s_maxUdpSourcePort;
144040be 237static double s_balancingFactor;
49a699c4 238
b243ca3b 239RecursorControlChannel s_rcc; // only active in the handler thread
d187038c 240RecursorStats g_stats;
2d733c0f 241string s_programname="pdns_recursor";
d187038c 242string s_pidfname;
c1c29961 243bool g_lowercaseOutgoing;
bf19ccfd 244unsigned int g_networkTimeoutMsec;
d187038c
RG
245unsigned int g_numThreads;
246uint16_t g_outgoingEDNSBufsize;
98d36505 247bool g_logRPZChanges{false};
c3828c03 248
559b6c93
PL
249// Used in the Syncres to not throttle certain servers
250GlobalStateHolder<SuffixMatchNode> g_dontThrottleNames;
251GlobalStateHolder<NetmaskGroup> g_dontThrottleNetmasks;
252
12cd44ee 253#define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
2fe3354d 254#define LOCAL_NETS_INVERSE "!127.0.0.0/8, !10.0.0.0/8, !100.64.0.0/10, !169.254.0.0/16, !192.168.0.0/16, !172.16.0.0/12, !::1/128, !fc00::/7, !fe80::/10"
12cd44ee 255// Bad Nets taken from both:
3ddb9247 256// http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
12cd44ee 257// and
258// http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
259// where such a network may not be considered a valid destination
260#define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
261#define DONT_QUERY LOCAL_NETS ", " BAD_NETS
49a699c4 262
d7dae798 263//! used to send information to a newborn mthread
ea634573 264struct DNSComboWriter {
08b02366 265 DNSComboWriter(const std::string& query, const struct timeval& now): d_mdp(true, query), d_now(now), d_query(query)
2749c3fe
RG
266 {
267 }
5cc8371b 268
08b02366 269 DNSComboWriter(const std::string& query, const struct timeval& now, std::vector<std::string>&& policyTags, LuaContext::LuaObject&& data): d_mdp(true, query), d_now(now), d_query(query), d_policyTags(std::move(policyTags)), d_data(std::move(data))
5164bac3
RG
270 {
271 }
272
5cc8371b
RG
273 void setRemote(const ComboAddress& sa)
274 {
275 d_remote=sa;
276 }
277
278 void setSource(const ComboAddress& sa)
ea634573 279 {
5cc8371b 280 d_source=sa;
ea634573
BH
281 }
282
b71b60ee 283 void setLocal(const ComboAddress& sa)
284 {
285 d_local=sa;
286 }
287
5cc8371b
RG
288 void setDestination(const ComboAddress& sa)
289 {
290 d_destination=sa;
291 }
b71b60ee 292
ea634573
BH
293 void setSocket(int sock)
294 {
295 d_socket=sock;
296 }
a1754c6a
BH
297
298 string getRemote() const
299 {
5cc8371b
RG
300 if (d_source == d_remote) {
301 return d_source.toStringWithPort();
302 }
303 return d_source.toStringWithPort() + " (proxied by " + d_remote.toStringWithPort() + ")";
a1754c6a
BH
304 }
305
5cc8371b 306 MOADNSParser d_mdp;
c9e9e5e0 307 struct timeval d_now;
5cc8371b
RG
308 /* Remote client, might differ from d_source
309 in case of XPF, in which case d_source holds
310 the IP of the client and d_remote of the proxy
311 */
312 ComboAddress d_remote;
313 ComboAddress d_source;
314 /* Destination address, might differ from
315 d_destination in case of XPF, in which case
316 d_destination holds the IP of the proxy and
317 d_local holds our own. */
318 ComboAddress d_local;
319 ComboAddress d_destination;
aa7929a3
RG
320#ifdef HAVE_PROTOBUF
321 boost::uuids::uuid d_uuid;
67e31ebe 322 string d_requestorId;
590388d2 323 string d_deviceId;
c29d820c 324 struct timeval d_kernelTimestamp{0,0};
aa7929a3 325#endif
08b02366 326 std::string d_query;
5164bac3
RG
327 std::vector<std::string> d_policyTags;
328 LuaContext::LuaObject d_data;
b40562da 329 EDNSSubnetOpts d_ednssubnet;
5164bac3 330 shared_ptr<TCPConnection> d_tcpConnection;
ea634573 331 int d_socket;
b673817a 332 unsigned int d_tag{0};
e9f63d47 333 uint32_t d_qhash{0};
70fb28d9 334 uint32_t d_ttlCap{std::numeric_limits<uint32_t>::max()};
08b02366
RG
335 uint16_t d_ecsBegin{0};
336 uint16_t d_ecsEnd{0};
70fb28d9 337 bool d_variable{false};
5164bac3
RG
338 bool d_ecsFound{false};
339 bool d_ecsParsed{false};
340 bool d_tcp;
ea634573
BH
341};
342
06857845
RG
343MT_t* getMT()
344{
345 return MT ? MT.get() : nullptr;
346}
ea634573 347
288f4aa9
BH
348ArgvMap &arg()
349{
350 static ArgvMap theArg;
351 return theArg;
352}
4ef015cd 353
8fb594ba 354unsigned int getRecursorThreadId()
b4015453 355{
30da2030 356 return t_id;
b4015453 357}
09e6702a 358
30ee601a
RG
359int getMTaskerTID()
360{
361 return MT->getTid();
362}
363
b243ca3b
RG
364static bool isDistributorThread()
365{
366 if (t_id == 0) {
367 return false;
368 }
369
370 return g_weDistributeQueries && s_threadInfos.at(t_id).isListener;
371}
372
373static bool isHandlerThread()
374{
375 if (t_id == 0) {
376 return true;
377 }
378
379 return s_threadInfos.at(t_id).isHandler;
380}
381
d187038c 382static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 383
50c81227 384// -1 is error, 0 is timeout, 1 is success
3ddb9247 385int asendtcp(const string& data, Socket* sock)
5c633640
BH
386{
387 PacketID pident;
388 pident.sock=sock;
389 pident.outMSG=data;
3ddb9247 390
bb4bdbaf 391 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
50c81227 392 string packet;
5c633640 393
5b0ddd18 394 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
23db0a09 395
9170fbaf 396 if(!ret || ret==-1) { // timeout
bb4bdbaf 397 t_fdm->removeWriteFD(sock->getHandle());
5c633640 398 }
50c81227
BH
399 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
400 return -1;
401 }
9170fbaf 402 return ret;
5c633640
BH
403}
404
d187038c 405static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 406
9170fbaf 407// -1 is error, 0 is timeout, 1 is success
a683e8bd 408int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
288f4aa9 409{
50c81227 410 data.clear();
5c633640
BH
411 PacketID pident;
412 pident.sock=sock;
413 pident.inNeeded=len;
825fa717 414 pident.inIncompleteOkay=incompleteOkay;
bb4bdbaf 415 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
5c633640 416
bb4bdbaf 417 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
9170fbaf 418 if(!ret || ret==-1) { // timeout
bb4bdbaf 419 t_fdm->removeReadFD(sock->getHandle());
288f4aa9 420 }
50c81227
BH
421 else if(data.empty()) {// error, EOF or other
422 return -1;
423 }
424
9170fbaf 425 return ret;
288f4aa9
BH
426}
427
d187038c 428static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
4465e941 429{
fba1e944 430 PacketID pident=*any_cast<PacketID>(&var);
4465e941 431 char resp[512];
7c77ce63
RG
432 ComboAddress fromaddr;
433 socklen_t addrlen=sizeof(fromaddr);
434
435 ssize_t ret=recvfrom(fd, resp, sizeof(resp), 0, (sockaddr *)&fromaddr, &addrlen);
436 if (fromaddr != pident.remote) {
e6a9dde5 437 g_log<<Logger::Notice<<"Response received from the wrong remote host ("<<fromaddr.toStringWithPort()<<" instead of "<<pident.remote.toStringWithPort()<<"), discarding"<<endl;
7c77ce63
RG
438
439 }
440
4465e941 441 t_fdm->removeReadFD(fd);
442 if(ret >= 0) {
a683e8bd 443 string data(resp, (size_t) ret);
fba1e944 444 MT->sendEvent(pident, &data);
4465e941 445 }
446 else {
fba1e944 447 string empty;
448 MT->sendEvent(pident, &empty);
449 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
4465e941 450 }
451}
fba1e944 452string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
4465e941 453{
4465e941 454 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
455 s.setNonBlocking();
456 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
457
458 s.bind(local);
459 s.connect(dest);
4465e941 460 s.send(query);
461
462 PacketID pident;
463 pident.sock=&s;
7c77ce63 464 pident.remote=dest;
4465e941 465 pident.type=0;
fba1e944 466 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
4465e941 467
468 string data;
fba1e944 469
4465e941 470 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
fba1e944 471
4465e941 472 if(!ret || ret==-1) { // timeout
4465e941 473 t_fdm->removeReadFD(s.getHandle());
474 }
475 else if(data.empty()) {// error, EOF or other
fba1e944 476 // we could special case this
4465e941 477 return data;
478 }
4465e941 479 return data;
480}
481
d7dae798 482//! pick a random query local address
1652a63e 483ComboAddress getQueryLocalAddress(int family, uint16_t port)
5a38281c 484{
1652a63e 485 ComboAddress ret;
5a38281c 486 if(family==AF_INET) {
3ddb9247 487 if(g_localQueryAddresses4.empty())
1652a63e 488 ret = g_local4;
3ddb9247 489 else
1652a63e
BH
490 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
491 ret.sin4.sin_port = htons(port);
5a38281c
BH
492 }
493 else {
494 if(g_localQueryAddresses6.empty())
1652a63e
BH
495 ret = g_local6;
496 else
497 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
3ddb9247 498
1652a63e 499 ret.sin6.sin6_port = htons(port);
5a38281c 500 }
1652a63e 501 return ret;
5a38281c 502}
4ef015cd 503
d187038c 504static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
09e6702a 505
d187038c 506static void setSocketBuffer(int fd, int optname, uint32_t size)
d7dae798
BH
507{
508 uint32_t psize=0;
509 socklen_t len=sizeof(psize);
3ddb9247 510
d7dae798 511 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
e6a9dde5 512 g_log<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
3ddb9247 513 return;
d7dae798
BH
514 }
515
516 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
e6a9dde5 517 g_log<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
d7dae798
BH
518}
519
520
521static void setSocketReceiveBuffer(int fd, uint32_t size)
522{
523 setSocketBuffer(fd, SO_RCVBUF, size);
524}
525
526static void setSocketSendBuffer(int fd, uint32_t size)
527{
528 setSocketBuffer(fd, SO_SNDBUF, size);
529}
530
531
4ef015cd
BH
532// you can ask this class for a UDP socket to send a query from
533// this socket is not yours, don't even think about deleting it
534// but after you call 'returnSocket' on it, don't assume anything anymore
535class UDPClientSocks
536{
4ef015cd 537 unsigned int d_numsocks;
4ef015cd 538public:
e2642526 539 UDPClientSocks() : d_numsocks(0)
4ef015cd
BH
540 {
541 }
542
996c89cc 543 typedef set<int> socks_t;
4ef015cd
BH
544 socks_t d_socks;
545
2ee280cf 546 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
d8f6d49f 547 int getSocket(const ComboAddress& toaddr, int* fd)
4ef015cd 548 {
d8f6d49f
BH
549 *fd=makeClientSocket(toaddr.sin4.sin_family);
550 if(*fd < 0) // temporary error - receive exception otherwise
2ee280cf 551 return -2;
d8f6d49f
BH
552
553 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
554 int err = errno;
41ff43f8 555 // returnSocket(*fd);
a7b68ae7
RG
556 try {
557 closesocket(*fd);
558 }
559 catch(const PDNSException& e) {
e6a9dde5 560 g_log<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
a7b68ae7
RG
561 }
562
d8f6d49f 563 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
4957a608 564 return -2;
998a4334 565 return -1;
d8f6d49f 566 }
998a4334 567
d8f6d49f 568 d_socks.insert(*fd);
998a4334 569 d_numsocks++;
d8f6d49f 570 return 0;
4ef015cd
BH
571 }
572
095c3045
BH
573 void returnSocket(int fd)
574 {
575 socks_t::iterator i=d_socks.find(fd);
34801ab1 576 if(i==d_socks.end()) {
335da0ba 577 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd)+") not in the pool");
34801ab1 578 }
bb4bdbaf 579 returnSocketLocked(i);
095c3045
BH
580 }
581
4ef015cd 582 // return a socket to the pool, or simply erase it
bb4bdbaf 583 void returnSocketLocked(socks_t::iterator& i)
4ef015cd 584 {
600fc20b 585 if(i==d_socks.end()) {
3f81d239 586 throw PDNSException("Trying to return a socket not in the pool");
600fc20b 587 }
80baf329 588 try {
bb4bdbaf 589 t_fdm->removeReadFD(*i);
80baf329
BH
590 }
591 catch(FDMultiplexerException& e) {
bb4bdbaf 592 // we sometimes return a socket that has not yet been assigned to t_fdm
80baf329 593 }
a7b68ae7
RG
594 try {
595 closesocket(*i);
596 }
597 catch(const PDNSException& e) {
e6a9dde5 598 g_log<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
a7b68ae7 599 }
3ddb9247 600
998a4334
BH
601 d_socks.erase(i++);
602 --d_numsocks;
4ef015cd 603 }
d8f6d49f
BH
604
605 // returns -1 for errors which might go away, throws for ones that won't
bb4bdbaf 606 static int makeClientSocket(int family)
d8f6d49f 607 {
a683e8bd 608 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
42c235e5 609
d8f6d49f
BH
610 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
611 return ret;
3ddb9247
PD
612
613 if(ret<0)
335da0ba 614 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
36855b53 615
7eb73ffa 616 // setCloseOnExec(ret); // we're not going to exec
5a38281c 617
d8f6d49f 618 int tries=10;
3aa91c3e 619 ComboAddress sin;
d8f6d49f 620 while(--tries) {
1652a63e 621 uint16_t port;
3ddb9247 622
d8f6d49f 623 if(tries==1) // fall back to kernel 'random'
4957a608 624 port = 0;
bf6f28ca
CHB
625 else {
626 do {
627 port = s_minUdpSourcePort + dns_random(s_maxUdpSourcePort - s_minUdpSourcePort + 1);
628 }
629 while (s_avoidUdpSourcePorts.count(port));
630 }
5a38281c 631
3aa91c3e 632 sin=getQueryLocalAddress(family, port); // does htons for us
5a38281c 633
3ddb9247 634 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
4957a608 635 break;
d8f6d49f
BH
636 }
637 if(!tries)
3aa91c3e 638 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
3ddb9247 639
29bb743c 640 setReceiveSocketErrors(ret, family);
3897b9e1 641 setNonBlocking(ret);
d8f6d49f
BH
642 return ret;
643 }
49a699c4
BH
644};
645
f26bf547 646static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
4ef015cd 647
288f4aa9 648/* these two functions are used by LWRes */
34801ab1 649// -2 is OS error, -1 is error that depends on the remote, > 0 is success
a683e8bd 650int asendto(const char *data, size_t len, int flags,
3ddb9247 651 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
288f4aa9 652{
34801ab1
BH
653
654 PacketID pident;
787e5eab
BH
655 pident.domain = domain;
656 pident.remote = toaddr;
657 pident.type = qtype;
34801ab1
BH
658
659 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
660 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
661
662 for(; chain.first != chain.second; chain.first++) {
663 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
e27e91a8 664 /*
4665c31e
BH
665 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
666 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
4957a608 667 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
e27e91a8 668 */
34801ab1
BH
669 chain.first->key.chain.insert(id); // we can chain
670 *fd=-1; // gets used in waitEvent / sendEvent later on
671 return 1;
672 }
673 }
674
49a699c4 675 int ret=t_udpclientsocks->getSocket(toaddr, fd);
d8f6d49f
BH
676 if(ret < 0)
677 return ret;
34801ab1 678
998a4334
BH
679 pident.fd=*fd;
680 pident.id=id;
3ddb9247 681
bb4bdbaf
BH
682 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
683 ret = send(*fd, data, len, 0);
684
5b0ddd18 685 int tmp = errno;
bb4bdbaf 686
7302ed0a 687 if(ret < 0)
49a699c4 688 t_udpclientsocks->returnSocket(*fd);
bb4bdbaf 689
5b0ddd18 690 errno = tmp; // this is for logging purposes only
7302ed0a 691 return ret;
288f4aa9
BH
692}
693
9170fbaf 694// -1 is error, 0 is timeout, 1 is success
f128d20d 695int arecvfrom(std::string& packet, int flags, const ComboAddress& fromaddr, size_t *d_len,
c5c066bf 696 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
288f4aa9 697{
0d5f0a9f 698 static optional<unsigned int> nearMissLimit;
3ddb9247 699 if(!nearMissLimit)
0d5f0a9f
BH
700 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
701
288f4aa9 702 PacketID pident;
4ef015cd 703 pident.fd=fd;
288f4aa9 704 pident.id=id;
0d5f0a9f 705 pident.domain=domain;
787e5eab 706 pident.type = qtype;
996c89cc 707 pident.remote=fromaddr;
b636533b 708
5b0ddd18 709 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
34801ab1 710
9170fbaf 711 if(ret > 0) {
996c89cc 712 if(packet.empty()) // means "error"
3ddb9247 713 return -1;
998a4334 714
a683e8bd 715 *d_len=packet.size();
f128d20d 716
0d5f0a9f 717 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
e6a9dde5 718 g_log<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
0d5f0a9f 719 g_stats.spoofCount++;
35ce8576
BH
720 return -1;
721 }
288f4aa9 722 }
09e6702a 723 else {
34801ab1 724 if(fd >= 0)
49a699c4 725 t_udpclientsocks->returnSocket(fd);
09e6702a 726 }
9170fbaf 727 return ret;
288f4aa9
BH
728}
729
88def049
BH
730static void writePid(void)
731{
191f2e47 732 if(!::arg().mustDo("write-pid"))
733 return;
18e7758c 734 ofstream of(s_pidfname.c_str(), std::ios_base::app);
88def049 735 if(of)
705f31ae 736 of<< Utility::getpid() <<endl;
88def049 737 else
e6a9dde5 738 g_log<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
88def049
BH
739}
740
2749c3fe 741TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : data(2, 0), d_remote(addr), d_fd(fd)
3ddb9247
PD
742{
743 ++s_currentConnections;
cd989c87 744 (*t_tcpClientCounts)[d_remote]++;
0e408828 745}
cd989c87
BH
746
747TCPConnection::~TCPConnection()
0e408828 748{
a7b68ae7
RG
749 try {
750 if(closesocket(d_fd) < 0)
e6a9dde5 751 g_log<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
a7b68ae7
RG
752 }
753 catch(const PDNSException& e) {
e6a9dde5 754 g_log<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
a7b68ae7
RG
755 }
756
3ddb9247 757 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
cd989c87 758 t_tcpClientCounts->erase(d_remote);
1bc9e6bd 759 --s_currentConnections;
0e408828 760}
0e9d9ce2 761
3ddb9247 762AtomicCounter TCPConnection::s_currentConnections;
d187038c
RG
763
764static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
6dcd28c3 765
92011b8f 766// the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
d187038c 767static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
2cc13433 768{
92011b8f 769 if(packetsize > 1000 && t_largeanswerremotes)
770 t_largeanswerremotes->push_back(remote);
2cc13433
BH
771 switch(res) {
772 case RCode::ServFail:
92011b8f 773 if(t_servfailremotes) {
774 t_servfailremotes->push_back(remote);
5af86fdc 775 if(query && t_servfailqueryring) // packet cache
92011b8f 776 t_servfailqueryring->push_back(make_pair(*query, qtype));
777 }
2cc13433
BH
778 g_stats.servFails++;
779 break;
780 case RCode::NXDomain:
781 g_stats.nxDomains++;
782 break;
783 case RCode::NoError:
784 g_stats.noErrors++;
785 break;
786 }
787}
788
9a864da4 789static string makeLoginfo(const std::unique_ptr<DNSComboWriter>& dc)
a903b39c 790try
791{
5cc8371b 792 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->getRemote())+")";
a903b39c 793}
794catch(...)
795{
796 return "Exception making error message for exception";
797}
798
aa7929a3 799#ifdef HAVE_PROTOBUF
b773359c 800static void protobufLogQuery(uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId)
aa7929a3 801{
b773359c
RG
802 if (!t_protobufServers) {
803 return;
804 }
805
e1c8a4bb
RG
806 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
807 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
808 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
c165308b 809 message.setServerIdentity(SyncRes::s_serverID);
a94bc5d7 810 message.setEDNSSubnet(ednssubnet, ednssubnet.isIpv4() ? maskV4 : maskV6);
67e31ebe 811 message.setRequestorId(requestorId);
590388d2 812 message.setDeviceId(deviceId);
02b47f43 813
02b47f43 814 if (!policyTags.empty()) {
d9d3f9c1 815 message.setPolicyTags(policyTags);
02b47f43 816 }
aa7929a3 817
d9d3f9c1 818// cerr <<message.toDebugString()<<endl;
aa7929a3 819 std::string str;
d9d3f9c1 820 message.serialize(str);
b773359c
RG
821
822 for (auto& server : *t_protobufServers) {
823 server->queueData(str);
824 }
aa7929a3
RG
825}
826
b773359c 827static void protobufLogResponse(const RecProtoBufMessage& message)
aa7929a3 828{
b773359c
RG
829 if (!t_protobufServers) {
830 return;
831 }
832
d9d3f9c1 833// cerr <<message.toDebugString()<<endl;
aa7929a3 834 std::string str;
d9d3f9c1 835 message.serialize(str);
b773359c
RG
836
837 for (auto& server : *t_protobufServers) {
838 server->queueData(str);
839 }
aa7929a3
RG
840}
841#endif
842
53508135
PL
843/**
844 * Chases the CNAME provided by the PolicyCustom RPZ policy.
845 *
846 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
847 * @param qtype: The QType of the original query
848 * @param sr: A SyncRes
849 * @param res: An integer that will contain the RCODE of the lookup we do
850 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
851 */
d187038c 852static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
53508135
PL
853{
854 if (spoofed.d_type == QType::CNAME) {
30ee601a
RG
855 bool oldWantsRPZ = sr.getWantsRPZ();
856 sr.setWantsRPZ(false);
53508135 857 vector<DNSRecord> ans;
6da513b2 858 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, QClass::IN, ans);
53508135
PL
859 for (const auto& rec : ans) {
860 if(rec.d_place == DNSResourceRecord::ANSWER) {
861 ret.push_back(rec);
862 }
863 }
864 // Reset the RPZ state of the SyncRes
30ee601a 865 sr.setWantsRPZ(oldWantsRPZ);
53508135
PL
866 }
867}
868
70fb28d9 869static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, uint32_t ttlCap, const uint16_t maxAnswerSize)
97c6d7e5 870{
70fb28d9 871 pw.startRecord(rec.d_name, rec.d_type, (rec.d_ttl > ttlCap ? ttlCap : rec.d_ttl), rec.d_class, rec.d_place);
97c6d7e5
RG
872
873 if(rec.d_type != QType::OPT) // their TTL ain't real
874 minTTL = min(minTTL, rec.d_ttl);
875
876 rec.d_content->toPacket(pw);
877 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
878 pw.rollback();
879 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
880 pw.getHeader()->tc=1;
881 pw.truncate();
882 }
883 return false;
884 }
885
886 return true;
887}
888
63341e8d 889#ifdef HAVE_PROTOBUF
3fe06137 890static std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> startProtobufServers(const ProtobufExportConfig& config)
63341e8d 891{
3fe06137 892 auto result = std::make_shared<std::vector<std::unique_ptr<RemoteLogger>>>();
b773359c
RG
893
894 for (const auto& server : config.servers) {
895 try {
da71b63b 896 result->emplace_back(new RemoteLogger(server, config.timeout, 100*config.maxQueuedEntries, config.reconnectWaitTime, config.asyncConnect));
b773359c
RG
897 }
898 catch(const std::exception& e) {
899 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.what()<<endl;
900 }
901 catch(const PDNSException& e) {
902 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.reason<<endl;
903 }
63341e8d
RG
904 }
905
906 return result;
907}
908
909static bool checkProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
910{
911 if (!luaconfsLocal->protobufExportConfig.enabled) {
b773359c
RG
912 if (t_protobufServers) {
913 for (auto& server : *t_protobufServers) {
914 server->stop();
915 }
916 t_protobufServers.reset();
63341e8d
RG
917 }
918
919 return false;
920 }
921
922 /* if the server was not running, or if it was running according to a
923 previous configuration */
b773359c
RG
924 if (!t_protobufServers ||
925 t_protobufServersGeneration < luaconfsLocal->generation) {
63341e8d 926
b773359c
RG
927 if (t_protobufServers) {
928 for (auto& server : *t_protobufServers) {
929 server->stop();
930 }
63341e8d 931 }
b773359c 932 t_protobufServers.reset();
63341e8d 933
b773359c
RG
934 t_protobufServers = startProtobufServers(luaconfsLocal->protobufExportConfig);
935 t_protobufServersGeneration = luaconfsLocal->generation;
63341e8d
RG
936 }
937
938 return true;
939}
940
941static bool checkOutgoingProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
942{
943 if (!luaconfsLocal->outgoingProtobufExportConfig.enabled) {
b773359c
RG
944 if (t_outgoingProtobufServers) {
945 for (auto& server : *t_outgoingProtobufServers) {
946 server->stop();
947 }
63341e8d 948 }
b773359c 949 t_outgoingProtobufServers.reset();
63341e8d
RG
950
951 return false;
952 }
953
954 /* if the server was not running, or if it was running according to a
955 previous configuration */
b773359c
RG
956 if (!t_outgoingProtobufServers ||
957 t_outgoingProtobufServersGeneration < luaconfsLocal->generation) {
63341e8d 958
b773359c
RG
959 if (t_outgoingProtobufServers) {
960 for (auto& server : *t_outgoingProtobufServers) {
961 server->stop();
962 }
63341e8d 963 }
b773359c 964 t_outgoingProtobufServers.reset();
63341e8d 965
b773359c
RG
966 t_outgoingProtobufServers = startProtobufServers(luaconfsLocal->outgoingProtobufExportConfig);
967 t_outgoingProtobufServersGeneration = luaconfsLocal->generation;
63341e8d
RG
968 }
969
970 return true;
971}
b9fa43e0
OM
972
973#ifdef HAVE_FSTRM
974
10ba6d01 975static std::shared_ptr<std::vector<std::unique_ptr<FrameStreamLogger>>> startFrameStreamServers(const FrameStreamExportConfig& config)
b9fa43e0 976{
10ba6d01 977 auto result = std::make_shared<std::vector<std::unique_ptr<FrameStreamLogger>>>();
b9fa43e0
OM
978
979 for (const auto& server : config.servers) {
980 try {
573f4ff0
OM
981 std::unordered_map<string,unsigned> options;
982 options["bufferHint"] = config.bufferHint;
983 options["flushTimeout"] = config.flushTimeout;
984 options["inputQueueSize"] = config.inputQueueSize;
985 options["outputQueueSize"] = config.outputQueueSize;
986 options["queueNotifyThreshold"] = config.queueNotifyThreshold;
987 options["reopenInterval"] = config.reopenInterval;
988 auto fsl = new FrameStreamLogger(server.sin4.sin_family, server.toStringWithPort(), true, options);
989 fsl->setLogQueries(config.logQueries);
990 fsl->setLogResponses(config.logResponses);
991 result->emplace_back(fsl);
b9fa43e0
OM
992 }
993 catch(const std::exception& e) {
994 g_log<<Logger::Error<<"Error while starting dnstap framestream logger to '"<<server<<": "<<e.what()<<endl;
995 }
996 catch(const PDNSException& e) {
997 g_log<<Logger::Error<<"Error while starting dnstap framestream logger to '"<<server<<": "<<e.reason<<endl;
998 }
999 }
1000
1001 return result;
1002}
1003
1004static bool checkFrameStreamExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
1005{
1006 if (!luaconfsLocal->frameStreamExportConfig.enabled) {
1007 if (t_frameStreamServers) {
1008 // dt's take care of cleanup
1009 t_frameStreamServers.reset();
1010 }
1011
1012 return false;
1013 }
1014
1015 /* if the server was not running, or if it was running according to a
1016 previous configuration */
1017 if (!t_frameStreamServers ||
1018 t_frameStreamServersGeneration < luaconfsLocal->generation) {
1019
1020 if (t_frameStreamServers) {
1021 // dt's take care of cleanup
1022 t_frameStreamServers.reset();
1023 }
1024
1025 t_frameStreamServers = startFrameStreamServers(luaconfsLocal->frameStreamExportConfig);
1026 t_frameStreamServersGeneration = luaconfsLocal->generation;
1027 }
1028
1029 return true;
1030}
1031#endif /* HAVE_FSTRM */
63341e8d
RG
1032#endif /* HAVE_PROTOBUF */
1033
af1377b7 1034#ifdef NOD_ENABLED
41c542ec 1035static bool nodCheckNewDomain(const DNSName& dname)
af1377b7
NC
1036{
1037 static const QType qt(QType::A);
1038 static const uint16_t qc(QClass::IN);
41c542ec 1039 bool ret = false;
af1377b7
NC
1040 // First check the (sub)domain isn't whitelisted for NOD purposes
1041 if (!g_nodDomainWL.check(dname)) {
1042 // Now check the NODDB (note this is probablistic so can have FNs/FPs)
1043 if (t_nodDBp && t_nodDBp->isNewDomain(dname)) {
1044 if (g_nodLog) {
1045 // This should probably log to a dedicated log file
1046 g_log<<Logger::Notice<<"Newly observed domain nod="<<dname.toLogString()<<endl;
1047 }
1048 if (!(g_nodLookupDomain.isRoot())) {
1049 // Send a DNS A query to <domain>.g_nodLookupDomain
1050 DNSName qname = dname;
1051 vector<DNSRecord> dummy;
1052 qname += g_nodLookupDomain;
1053 directResolve(qname, qt, qc, dummy);
1054 }
41c542ec 1055 ret = true;
af1377b7
NC
1056 }
1057 }
41c542ec 1058 return ret;
af1377b7
NC
1059}
1060
1061static void nodAddDomain(const DNSName& dname)
1062{
1063 // Don't bother adding domains on the nod whitelist
1064 if (!g_nodDomainWL.check(dname)) {
1065 if (t_nodDBp) {
1066 // This keeps the nod info up to date
1067 t_nodDBp->addDomain(dname);
1068 }
1069 }
1070}
41c542ec
NC
1071
1072static bool udrCheckUniqueDNSRecord(const DNSName& dname, uint16_t qtype, const DNSRecord& record)
1073{
1074 bool ret = false;
1075 if (record.d_place == DNSResourceRecord::ANSWER ||
1076 record.d_place == DNSResourceRecord::ADDITIONAL) {
1077 // Create a string that represent a triplet of (qname, qtype and RR[type, name, content])
1078 std::stringstream ss;
1079 ss << dname.toDNSStringLC() << ":" << qtype << ":" << qtype << ":" << record.d_type << ":" << record.d_name.toDNSStringLC() << ":" << record.d_content->getZoneRepresentation();
1080 if (t_udrDBp && t_udrDBp->isUniqueResponse(ss.str())) {
ff4d391d
NC
1081 if (g_udrLog) {
1082 // This should also probably log to a dedicated file.
1083 g_log<<Logger::Notice<<"Unique response observed: qname="<<dname.toLogString()<<" qtype="<<QType(qtype).getName()<< " rrtype=" << QType(record.d_type).getName() << " rrname=" << record.d_name.toLogString() << " rrcontent=" << record.d_content->getZoneRepresentation() << endl;
41c542ec
NC
1084 }
1085 ret = true;
1086 }
1087 }
1088 return ret;
1089}
af1377b7
NC
1090#endif /* NOD_ENABLED */
1091
d187038c 1092static void startDoResolve(void *p)
288f4aa9 1093{
9a864da4 1094 auto dc=std::unique_ptr<DNSComboWriter>(reinterpret_cast<DNSComboWriter*>(p));
288f4aa9 1095 try {
5af86fdc
RG
1096 if (t_queryring)
1097 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
92011b8f 1098
32015748 1099 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
7f7b8d55 1100 EDNSOpts edo;
5164bac3 1101 std::vector<pair<uint16_t, string> > ednsOpts;
eb9444be 1102 bool variableAnswer = dc->d_variable;
8e079f3a 1103 bool haveEDNS=false;
ca2526f5
NC
1104#ifdef NOD_ENABLED
1105 bool hasUDR = false;
1106#endif /* NOD_ENABLED */
f1db0de2
PL
1107 DNSPacketWriter::optvect_t returnedEdnsOptions; // Here we stuff all the options for the return packet
1108 uint8_t ednsExtRCode = 0;
8e079f3a 1109 if(getEDNSOpts(dc->d_mdp, &edo)) {
f1db0de2
PL
1110 haveEDNS=true;
1111 if (edo.d_version != 0) {
1112 ednsExtRCode = ERCode::BADVERS;
1113 }
1114
32015748
RG
1115 if(!dc->d_tcp) {
1116 /* rfc6891 6.2.3:
1117 "Values lower than 512 MUST be treated as equal to 512."
1118 */
1119 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
1120 }
5164bac3 1121 ednsOpts = edo.d_options;
3af35968 1122 maxanswersize -= 11; // EDNS header size
b40562da 1123
1f691b94
PL
1124 for (const auto& o : edo.d_options) {
1125 if (o.first == EDNSOptionCode::ECS && g_useIncomingECS && !dc->d_ecsParsed) {
1126 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
1127 } else if (o.first == EDNSOptionCode::NSID) {
f1db0de2 1128 const static string mode_server_id = ::arg()["server-id"];
8a42919a
PL
1129 if(mode_server_id != "disabled" && !mode_server_id.empty() &&
1130 maxanswersize > (2 + 2 + mode_server_id.size())) {
f1db0de2
PL
1131 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::NSID, mode_server_id));
1132 variableAnswer = true; // Can't packetcache an answer with NSID
1133 // Option Code and Option Length are both 2
1134 maxanswersize -= 2 + 2 + mode_server_id.size();
1135 }
b40562da
RG
1136 }
1137 }
10321a98 1138 }
b40562da
RG
1139 /* perhaps there was no EDNS or no ECS but by now we looked */
1140 dc->d_ecsParsed = true;
e325f20c 1141 vector<DNSRecord> ret;
ea634573 1142 vector<uint8_t> packet;
b23b8614 1143
ad42489c 1144 auto luaconfsLocal = g_luaconfs.getLocal();
b8470add
PL
1145 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
1146 bool wantsRPZ(true);
1fbc6dc5 1147 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
f1c7929a 1148 bool logResponse = false;
aa7929a3 1149#ifdef HAVE_PROTOBUF
63341e8d 1150 if (checkProtobufExport(luaconfsLocal)) {
b773359c 1151 logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
5cc8371b 1152 Netmask requestorNM(dc->d_source, dc->d_source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
e1c8a4bb 1153 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
0bd2e252 1154 pbMessage = RecProtoBufMessage(RecProtoBufMessage::Response, dc->d_uuid, &requestor, &dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass, dc->d_mdp.d_header.id, dc->d_tcp, 0);
c165308b 1155 pbMessage->setServerIdentity(SyncRes::s_serverID);
d362f7c1 1156 pbMessage->setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
d9d3f9c1
RG
1157 }
1158#endif /* HAVE_PROTOBUF */
ad42489c 1159
b9fa43e0
OM
1160#ifdef HAVE_FSTRM
1161 checkFrameStreamExport(luaconfsLocal);
1162#endif
1163
3ddb9247 1164 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
ea634573
BH
1165
1166 pw.getHeader()->aa=0;
1167 pw.getHeader()->ra=1;
c154c8a4 1168 pw.getHeader()->qr=1;
bb4bdbaf 1169 pw.getHeader()->tc=0;
ea634573 1170 pw.getHeader()->id=dc->d_mdp.d_header.id;
10321a98 1171 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
57769f13 1172 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
ea634573 1173
70fb28d9
RG
1174 /* This is the lowest TTL seen in the records of the response,
1175 so we can't cache it for longer than this value.
1176 If we have a TTL cap, this value can't be larger than the
1177 cap no matter what. */
1178 uint32_t minTTL = dc->d_ttlCap;
904d3219
PD
1179
1180 SyncRes sr(dc->d_now);
0c43f455 1181
2e921ec6 1182 bool DNSSECOK=false;
3457a2a0 1183 if(t_pdl) {
f26bf547 1184 sr.setLuaEngine(t_pdl);
3457a2a0 1185 }
9eec8c98 1186 if(g_dnssecmode != DNSSECMode::Off) {
30ee601a 1187 sr.setDoDNSSEC(true);
9eec8c98
PL
1188
1189 // Does the requestor want DNSSEC records?
d6c335ab 1190 if(edo.d_extFlags & EDNSOpts::DNSSECOK) {
9eec8c98
PL
1191 DNSSECOK=true;
1192 g_stats.dnssecQueries++;
1193 }
88c33dca
RG
1194 if (dc->d_mdp.d_header.cd) {
1195 /* Per rfc6840 section 5.9, "When processing a request with
1196 the Checking Disabled (CD) bit set, a resolver SHOULD attempt
1197 to return all response data, even data that has failed DNSSEC
1198 validation. */
1199 ++g_stats.dnssecCheckDisabledQueries;
1200 }
1201 if (dc->d_mdp.d_header.ad) {
1202 /* Per rfc6840 section 5.7, "the AD bit in a query as a signal
1203 indicating that the requester understands and is interested in the
1204 value of the AD bit in the response. This allows a requester to
1205 indicate that it understands the AD bit without also requesting
1206 DNSSEC data via the DO bit. */
1207 ++g_stats.dnssecAuthenticDataQueries;
1208 }
9eec8c98
PL
1209 } else {
1210 // Ignore the client-set CD flag
1211 pw.getHeader()->cd=0;
5b9853c9 1212 }
0c43f455
RG
1213 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
1214
4898a348 1215#ifdef HAVE_PROTOBUF
30ee601a 1216 sr.setInitialRequestId(dc->d_uuid);
b773359c 1217 sr.setOutgoingProtobufServers(t_outgoingProtobufServers);
4898a348 1218#endif
b9fa43e0
OM
1219#ifdef HAVE_FSTRM
1220 sr.setFrameStreamServers(t_frameStreamServers);
1221#endif
2fe3354d 1222 sr.setQuerySource(dc->d_remote, g_useIncomingECS && !dc->d_ednssubnet.source.empty() ? boost::optional<const EDNSSubnetOpts&>(dc->d_ednssubnet) : boost::none);
57769f13 1223
904d3219 1224 bool tracedQuery=false; // we could consider letting Lua know about this too
9fc36e90 1225 bool shouldNotValidate = false;
904d3219 1226
ef3b6cd7
RG
1227 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
1228 int res = RCode::NoError;
1f1ca368 1229 DNSFilterEngine::Policy appliedPolicy;
6da513b2 1230 std::vector<DNSRecord> spoofed;
f1c7929a 1231 RecursorLua4::DNSQuestion dq(dc->d_source, dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ, logResponse);
d6c335ab 1232 dq.ednsFlags = &edo.d_extFlags;
5164bac3 1233 dq.ednsOptions = &ednsOpts;
6e505c5e
RG
1234 dq.tag = dc->d_tag;
1235 dq.discardedPolicies = &sr.d_discardedPolicies;
1236 dq.policyTags = &dc->d_policyTags;
1237 dq.appliedPolicy = &appliedPolicy;
1238 dq.currentRecords = &ret;
1239 dq.dh = &dc->d_mdp.d_header;
05c74122 1240 dq.data = dc->d_data;
67e31ebe
RG
1241#ifdef HAVE_PROTOBUF
1242 dq.requestorId = dc->d_requestorId;
590388d2 1243 dq.deviceId = dc->d_deviceId;
67e31ebe 1244#endif
ba21fcfe 1245
6cf96227
PL
1246 if(ednsExtRCode != 0) {
1247 goto sendit;
1248 }
1249
e661a20b 1250 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
56b4d21b
PD
1251 pw.getHeader()->tc = 1;
1252 res = 0;
1253 variableAnswer = true;
e661a20b
PD
1254 goto sendit;
1255 }
1256
f26bf547 1257 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
77499b05
BH
1258 sr.setLogMode(SyncRes::Store);
1259 tracedQuery=true;
1260 }
3ddb9247 1261
8f7473d7 1262
976ec823 1263 if(!g_quiet || tracedQuery) {
e6a9dde5 1264 g_log<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
976ec823 1265 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
b40562da 1266 if(!dc->d_ednssubnet.source.empty()) {
e6a9dde5 1267 g_log<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
6e986f5e 1268 }
e6a9dde5 1269 g_log<<endl;
976ec823 1270 }
c75a6a9e 1271
fededf47 1272 sr.setId(MT->getTid());
67828389 1273 if(!dc->d_mdp.d_header.rd)
c836dc19
BH
1274 sr.setCacheOnly();
1275
f26bf547
RG
1276 if (t_pdl) {
1277 t_pdl->prerpz(dq, res);
0a273054
RG
1278 }
1279
db486de5 1280 // Check if the query has a policy attached to it
0a273054 1281 if (wantsRPZ) {
5cc8371b 1282 appliedPolicy = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_source, sr.d_discardedPolicies);
0a273054 1283 }
644dd1da 1284
54be222b 1285 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
f26bf547 1286 if(!t_pdl || !t_pdl->preresolve(dq, res)) {
b8470add 1287
30ee601a 1288 sr.setWantsRPZ(wantsRPZ);
b8470add
PL
1289 if(wantsRPZ) {
1290 switch(appliedPolicy.d_kind) {
1291 case DNSFilterEngine::PolicyKind::NoAction:
1292 break;
1293 case DNSFilterEngine::PolicyKind::Drop:
1294 g_stats.policyDrops++;
7a25883a 1295 g_stats.policyResults[appliedPolicy.d_kind]++;
b8470add
PL
1296 return;
1297 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1298 g_stats.policyResults[appliedPolicy.d_kind]++;
1299 res=RCode::NXDomain;
1300 goto haveAnswer;
1301 case DNSFilterEngine::PolicyKind::NODATA:
1302 g_stats.policyResults[appliedPolicy.d_kind]++;
1303 res=RCode::NoError;
db486de5 1304 goto haveAnswer;
b8470add
PL
1305 case DNSFilterEngine::PolicyKind::Custom:
1306 g_stats.policyResults[appliedPolicy.d_kind]++;
1307 res=RCode::NoError;
6da513b2
RG
1308 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1309 for (const auto& dr : spoofed) {
1310 ret.push_back(dr);
1311 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1312 }
b8470add
PL
1313 goto haveAnswer;
1314 case DNSFilterEngine::PolicyKind::Truncate:
1315 if(!dc->d_tcp) {
1316 g_stats.policyResults[appliedPolicy.d_kind]++;
1317 res=RCode::NoError;
1318 pw.getHeader()->tc=1;
1319 goto haveAnswer;
1320 }
1321 break;
1322 }
db486de5
PL
1323 }
1324
b8470add 1325 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
44971ca0
PD
1326 try {
1327 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
9fc36e90 1328 shouldNotValidate = sr.wasOutOfBand();
44971ca0
PD
1329 }
1330 catch(ImmediateServFailException &e) {
854d44e3 1331 if(g_logCommonErrors)
e6a9dde5 1332 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
44971ca0
PD
1333 res = RCode::ServFail;
1334 }
4485aa35 1335
1921a4c2
RG
1336 dq.validationState = sr.getValidationState();
1337
b8470add
PL
1338 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
1339 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
1340 appliedPolicy = sr.d_appliedPolicy;
1341 g_stats.policyResults[appliedPolicy.d_kind]++;
1342 switch(appliedPolicy.d_kind) {
1343 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
1344 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
1345 case DNSFilterEngine::PolicyKind::Drop:
1346 g_stats.policyDrops++;
b8470add
PL
1347 return;
1348 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1349 ret.clear();
1350 res=RCode::NXDomain;
1351 goto haveAnswer;
1352
1353 case DNSFilterEngine::PolicyKind::NODATA:
1354 ret.clear();
1355 res=RCode::NoError;
1356 goto haveAnswer;
1357
1358 case DNSFilterEngine::PolicyKind::Truncate:
1359 if(!dc->d_tcp) {
1360 ret.clear();
1361 res=RCode::NoError;
1362 pw.getHeader()->tc=1;
1363 goto haveAnswer;
1364 }
1365 break;
1366
1367 case DNSFilterEngine::PolicyKind::Custom:
1368 ret.clear();
1369 res=RCode::NoError;
6da513b2
RG
1370 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1371 for (const auto& dr : spoofed) {
1372 ret.push_back(dr);
1373 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1374 }
b8470add
PL
1375 goto haveAnswer;
1376 }
1377 }
1378
1379 if (wantsRPZ) {
1f1ca368 1380 appliedPolicy = luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies);
b8470add 1381 }
db486de5 1382
f26bf547 1383 if(t_pdl) {
db486de5
PL
1384 if(res == RCode::NoError) {
1385 auto i=ret.cbegin();
1386 for(; i!= ret.cend(); ++i)
1387 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
1388 break;
f26bf547 1389 if(i == ret.cend() && t_pdl->nodata(dq, res))
3ca4e735
PL
1390 shouldNotValidate = true;
1391
db486de5 1392 }
f26bf547 1393 else if(res == RCode::NXDomain && t_pdl->nxdomain(dq, res))
3ca4e735 1394 shouldNotValidate = true;
db486de5 1395
f26bf547 1396 if(t_pdl->postresolve(dq, res))
3ca4e735 1397 shouldNotValidate = true;
db486de5
PL
1398 }
1399
b8470add
PL
1400 if (wantsRPZ) { //XXX This block is repeated, see above
1401 g_stats.policyResults[appliedPolicy.d_kind]++;
1402 switch(appliedPolicy.d_kind) {
1403 case DNSFilterEngine::PolicyKind::NoAction:
1404 break;
1405 case DNSFilterEngine::PolicyKind::Drop:
1406 g_stats.policyDrops++;
b8470add
PL
1407 return;
1408 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1409 ret.clear();
1410 res=RCode::NXDomain;
1411 goto haveAnswer;
1412
1413 case DNSFilterEngine::PolicyKind::NODATA:
1414 ret.clear();
1415 res=RCode::NoError;
1416 goto haveAnswer;
1417
1418 case DNSFilterEngine::PolicyKind::Truncate:
1419 if(!dc->d_tcp) {
1420 ret.clear();
1421 res=RCode::NoError;
1422 pw.getHeader()->tc=1;
1423 goto haveAnswer;
1424 }
1425 break;
1426
1427 case DNSFilterEngine::PolicyKind::Custom:
1428 ret.clear();
1429 res=RCode::NoError;
6da513b2
RG
1430 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1431 for (const auto& dr : spoofed) {
1432 ret.push_back(dr);
1433 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1434 }
b8470add
PL
1435 goto haveAnswer;
1436 }
644dd1da 1437 }
4485aa35 1438 }
644dd1da 1439 haveAnswer:;
3e8216c8 1440 if(res == PolicyDecision::DROP) {
e9c2ad3a 1441 g_stats.policyDrops++;
ae7e77ad 1442 return;
3ddb9247 1443 }
9cdfab64 1444 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1dc8f4d0 1445 {
85ffbc53
PD
1446 string trace(sr.getTrace());
1447 if(!trace.empty()) {
1448 vector<string> lines;
1449 boost::split(lines, trace, boost::is_any_of("\n"));
1dc8f4d0 1450 for(const string& line : lines) {
85ffbc53 1451 if(!line.empty())
e6a9dde5 1452 g_log<<Logger::Warning<< line << endl;
85ffbc53
PD
1453 }
1454 }
1455 }
3ddb9247 1456
9cdfab64 1457 if(res == -1) {
0fe1d080
PD
1458 pw.getHeader()->rcode=RCode::ServFail;
1459 // no commit here, because no record
1460 g_stats.servFails++;
1461 }
288f4aa9 1462 else {
ea634573 1463 pw.getHeader()->rcode=res;
92011b8f 1464
f3fe4ae6 1465 // Does the validation mode or query demand validation?
0c43f455 1466 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
b25cae9a 1467 try {
f3fe4ae6 1468 if(sr.doLog()) {
e6a9dde5 1469 g_log<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<endl;
2e921ec6 1470 }
4d2be65d
RG
1471
1472 auto state = sr.getValidationState();
1473
b25cae9a 1474 if(state == Secure) {
2e921ec6 1475 if(sr.doLog()) {
e6a9dde5 1476 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates correctly"<<endl;
2e921ec6 1477 }
b25cae9a 1478
1479 // Is the query source interested in the value of the ad-bit?
885c8881 1480 if (dc->d_mdp.d_header.ad || DNSSECOK)
b25cae9a 1481 pw.getHeader()->ad=1;
1482 }
1483 else if(state == Insecure) {
f3fe4ae6 1484 if(sr.doLog()) {
e6a9dde5 1485 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Insecure"<<endl;
12ce523e 1486 }
b25cae9a 1487
1488 pw.getHeader()->ad=0;
f3fe4ae6 1489 }
b25cae9a 1490 else if(state == Bogus) {
66f2e6ad
KM
1491 if(t_bogusremotes)
1492 t_bogusremotes->push_back(dc->d_source);
1493 if(t_bogusqueryring)
1494 t_bogusqueryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
c87e1876 1495 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
e6a9dde5 1496 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Bogus"<<endl;
b25cae9a 1497 }
1498
1499 // Does the query or validation mode sending out a SERVFAIL on validation errors?
885c8881 1500 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
b25cae9a 1501 if(sr.doLog()) {
e6a9dde5 1502 g_log<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
b25cae9a 1503 }
1504
1505 pw.getHeader()->rcode=RCode::ServFail;
1506 goto sendit;
1507 } else {
1508 if(sr.doLog()) {
e6a9dde5 1509 g_log<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
b25cae9a 1510 }
1511 }
1512 }
1513 }
1514 catch(ImmediateServFailException &e) {
1515 if(g_logCommonErrors)
e6a9dde5 1516 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
b25cae9a 1517 pw.getHeader()->rcode=RCode::ServFail;
1518 goto sendit;
f3fe4ae6 1519 }
b3f0ed10 1520 }
1521
c154c8a4 1522 if(ret.size()) {
92476c8b 1523 orderAndShuffle(ret);
5cc8371b 1524 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_source)) {
20d84f77 1525 stable_sort(ret.begin(), ret.end(), *sl);
3e61e7f7 1526 variableAnswer=true;
1527 }
8e079f3a 1528 }
0afa32d4
RG
1529
1530 bool needCommit = false;
8e079f3a 1531 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
3e80ebce
KM
1532 if( ! DNSSECOK &&
1533 ( i->d_type == QType::NSEC3 ||
1534 (
1535 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1536 (
1537 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1538 i->d_place != DNSResourceRecord::ANSWER
1539 )
1540 )
1541 )
1542 ) {
2e921ec6 1543 continue;
3e80ebce
KM
1544 }
1545
70fb28d9 1546 if (!addRecordToPacket(pw, *i, minTTL, dc->d_ttlCap, maxanswersize)) {
97c6d7e5
RG
1547 needCommit = false;
1548 break;
1549 }
1550 needCommit = true;
1551
41c542ec
NC
1552#ifdef NOD_ENABLED
1553 bool udr = false;
1554 if (g_udrEnabled) {
1555 udr = udrCheckUniqueDNSRecord(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, *i);
ca2526f5
NC
1556 if (!hasUDR && udr)
1557 hasUDR = true;
41c542ec
NC
1558 }
1559#endif /* NOD ENABLED */
1560
aa7929a3 1561#ifdef HAVE_PROTOBUF
b773359c 1562 if (t_protobufServers) {
41c542ec
NC
1563#ifdef NOD_ENABLED
1564 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes, udr);
1565#else
0bd2e252 1566 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes);
41c542ec 1567#endif /* NOD_ENABLED */
aa7929a3
RG
1568 }
1569#endif
ea634573 1570 }
0afa32d4 1571 if(needCommit)
8e079f3a 1572 pw.commit();
288f4aa9 1573 }
10321a98 1574 sendit:;
b3f0ed10 1575
a0ddd130 1576 if(g_useIncomingECS && dc->d_ecsFound && !sr.wasVariable() && !variableAnswer) {
9837850d 1577 // cerr<<"Stuffing in a 0 scope because answer is static"<<endl;
5a7f99b4 1578 EDNSSubnetOpts eo;
1579 eo.source = dc->d_ednssubnet.source;
1580 ComboAddress sa;
1ef18cab 1581 sa.reset();
5a7f99b4 1582 sa.sin4.sin_family = eo.source.getNetwork().sin4.sin_family;
1583 eo.scope = Netmask(sa, 0);
1584
1585 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::ECS, makeEDNSSubnetOptsString(eo)));
1586 }
1587
97c6d7e5
RG
1588 if (haveEDNS) {
1589 /* we try to add the EDNS OPT RR even for truncated answers,
1590 as rfc6891 states:
1591 "The minimal response MUST be the DNS header, question section, and an
1592 OPT record. This MUST also occur when a truncated response (using
1593 the DNS header's TC bit) is returned."
1594 */
9b60fb71 1595 pw.addOpt(512, ednsExtRCode, DNSSECOK ? EDNSOpts::DNSSECOK : 0, returnedEdnsOptions);
1f691b94 1596 pw.commit();
97c6d7e5
RG
1597 }
1598
79332bff 1599 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
5cc8371b 1600 updateResponseStats(res, dc->d_source, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
ff4d391d
NC
1601#ifdef NOD_ENABLED
1602 bool nod = false;
1603 if (g_nodEnabled) {
1604 if (nodCheckNewDomain(dc->d_mdp.d_qname))
1605 nod = true;
1606 }
1607#endif /* NOD_ENABLED */
aa7929a3 1608#ifdef HAVE_PROTOBUF
b773359c 1609 if (t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && (!appliedPolicy.d_name || appliedPolicy.d_name->empty()) && dc->d_policyTags.empty())) {
d362f7c1
RG
1610 pbMessage->setBytes(packet.size());
1611 pbMessage->setResponseCode(pw.getHeader()->rcode);
0a273054 1612 if (appliedPolicy.d_name) {
d362f7c1
RG
1613 pbMessage->setAppliedPolicy(*appliedPolicy.d_name);
1614 pbMessage->setAppliedPolicyType(appliedPolicy.d_type);
0a273054 1615 }
d362f7c1 1616 pbMessage->setPolicyTags(dc->d_policyTags);
c29d820c
RG
1617 if (g_useKernelTimestamp && dc->d_kernelTimestamp.tv_sec) {
1618 pbMessage->setQueryTime(dc->d_kernelTimestamp.tv_sec, dc->d_kernelTimestamp.tv_usec);
1619 }
1620 else {
1621 pbMessage->setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1622 }
d362f7c1
RG
1623 pbMessage->setRequestorId(dq.requestorId);
1624 pbMessage->setDeviceId(dq.deviceId);
41c542ec
NC
1625#ifdef NOD_ENABLED
1626 if (g_nodEnabled) {
ca2526f5 1627 if (nod) {
41c542ec 1628 pbMessage->setNOD(true);
ca2526f5
NC
1629 pbMessage->addPolicyTag(g_nod_pbtag);
1630 }
1631 if (hasUDR) {
1632 pbMessage->addPolicyTag(g_udr_pbtag);
1633 }
41c542ec
NC
1634 }
1635#endif /* NOD_ENABLED */
b773359c 1636 protobufLogResponse(*pbMessage);
ac238ea7 1637#ifdef NOD_ENABLED
ca2526f5
NC
1638 if (g_nodEnabled) {
1639 pbMessage->setNOD(false);
1640 pbMessage->clearUDR();
1641 if (nod)
1642 pbMessage->removePolicyTag(g_nod_pbtag);
1643 if (hasUDR)
1644 pbMessage->removePolicyTag(g_udr_pbtag);
1645 }
ac238ea7 1646#endif /* NOD_ENABLED */
aa7929a3
RG
1647 }
1648#endif
ea634573 1649 if(!dc->d_tcp) {
b71b60ee 1650 struct msghdr msgh;
1651 struct iovec iov;
1652 char cbuf[256];
1653 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
2c0af54f
PD
1654 msgh.msg_control=NULL;
1655
cbc03320 1656 if(g_fromtosockets.count(dc->d_socket)) {
fbe2a2e0 1657 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
2c0af54f 1658 }
cbc03320 1659 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
e6a9dde5 1660 g_log<<Logger::Warning<<"Sending UDP reply to client "<<dc->getRemote()<<" failed with: "<<strerror(errno)<<endl;
70fb28d9 1661
49dc532e 1662 if(variableAnswer || sr.wasVariable()) {
1ef18cab 1663 g_stats.variableResponses++;
49dc532e 1664 }
3762e821 1665 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
b5e675a7 1666 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, std::move(dc->d_query), dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
76e2b9e3 1667 string((const char*)&*packet.begin(), packet.size()),
3ddb9247 1668 g_now.tv_sec,
76e2b9e3 1669 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
d9d3f9c1 1670 min(minTTL,SyncRes::s_packetcachettl),
88694a6a 1671 dq.validationState,
08b02366
RG
1672 dc->d_ecsBegin,
1673 dc->d_ecsEnd,
4b0bdd5f 1674 std::move(pbMessage));
1051f8a9 1675 }
3762e821 1676 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
feccc9fc 1677 }
9c495589
BH
1678 else {
1679 char buf[2];
ea634573
BH
1680 buf[0]=packet.size()/256;
1681 buf[1]=packet.size()%256;
feccc9fc 1682
c038218b 1683 Utility::iovec iov[2];
feccc9fc 1684
ea634573
BH
1685 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1686 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
feccc9fc 1687
dd079764 1688 int wret=Utility::writev(dc->d_socket, iov, 2);
0e9d9ce2 1689 bool hadError=true;
feccc9fc 1690
dd079764 1691 if(wret == 0)
e6a9dde5 1692 g_log<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
dd079764 1693 else if(wret < 0 )
e6a9dde5 1694 g_log<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
dd079764 1695 else if((unsigned int)wret != 2 + packet.size())
e6a9dde5 1696 g_log<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
0e9d9ce2 1697 else
18af64a8 1698 hadError=false;
3ddb9247 1699
09e6702a 1700 // update tcp connection status, either by closing or moving to 'BYTE0'
3ddb9247 1701
09e6702a 1702 if(hadError) {
18af64a8 1703 // no need to remove us from FDM, we weren't there
c36bc97a 1704 dc->d_socket = -1;
09e6702a 1705 }
a6ae6414 1706 else {
fde296a3
RG
1707 dc->d_tcpConnection->queriesCount++;
1708 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1709 dc->d_socket = -1;
1710 }
1711 else {
1712 dc->d_tcpConnection->state=TCPConnection::BYTE0;
1713 Utility::gettimeofday(&g_now, 0); // needs to be updated
27ae2e3c
RG
1714 struct timeval ttd = g_now;
1715 ttd.tv_sec += g_tcpTimeout;
1716
1717 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection, &ttd);
fde296a3 1718 }
0e9d9ce2 1719 }
9c495589 1720 }
2c9119cd 1721 float spent=makeFloat(sr.getNow()-dc->d_now);
1d5b3ce6 1722 if(!g_quiet) {
e6a9dde5
PL
1723 g_log<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1724 g_log<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
2c9119cd 1725 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
1726 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
1727
1728 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
e6a9dde5 1729 g_log<< ", dnssec="<<vStates[sr.getValidationState()];
2c9119cd 1730 }
1731
e6a9dde5 1732 g_log<<endl;
2c9119cd 1733
c75a6a9e 1734 }
b23b8614 1735
f7b8cffa
RG
1736 if (sr.d_outqueries || sr.d_authzonequeries) {
1737 t_RC->cacheMisses++;
1738 }
1739 else {
1740 t_RC->cacheHits++;
1741 }
2c9119cd 1742
fe213470
BH
1743 if(spent < 0.001)
1744 g_stats.answers0_1++;
1745 else if(spent < 0.010)
1746 g_stats.answers1_10++;
1747 else if(spent < 0.1)
1748 g_stats.answers10_100++;
1749 else if(spent < 1.0)
1750 g_stats.answers100_1000++;
1751 else
1752 g_stats.answersSlow++;
1753
574af7ea 1754 uint64_t newLat=(uint64_t)(spent*1000000);
b841314c 1755 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
08f3f638 1756 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
0a6b1027 1757 // no worries, we do this for packet cache hits elsewhere
19178da9 1758
1759 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
1760 if(ourtime < 1)
1761 g_stats.ourtime0_1++;
1762 else if(ourtime < 2)
1763 g_stats.ourtime1_2++;
1764 else if(ourtime < 4)
1765 g_stats.ourtime2_4++;
1766 else if(ourtime < 8)
1767 g_stats.ourtime4_8++;
1768 else if(ourtime < 16)
1769 g_stats.ourtime8_16++;
1770 else if(ourtime < 32)
1771 g_stats.ourtime16_32++;
1772 else {
1773 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1774 g_stats.ourtimeSlow++;
1775 }
042da1a1 1776 if(ourtime >= 0.0) {
1777 newLat=ourtime*1000; // usec
1778 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
1779 }
c6d04bdc 1780 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
288f4aa9 1781 }
3f81d239 1782 catch(PDNSException &ae) {
e6a9dde5 1783 g_log<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
288f4aa9 1784 }
16ce7f18
JS
1785 catch(const MOADNSException &mde) {
1786 g_log<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<mde.what()<<endl;
7b1469bb 1787 }
fdbf35ac 1788 catch(std::exception& e) {
e6a9dde5 1789 g_log<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
068c7634
PD
1790
1791 // Luawrapper nests the exception from Lua, so we unnest it here
1792 try {
1793 std::rethrow_if_nested(e);
2010ac95 1794 } catch(const std::exception& ne) {
e6a9dde5 1795 g_log<<". Extra info: "<<ne.what();
068c7634
PD
1796 } catch(...) {}
1797
e6a9dde5 1798 g_log<<endl;
c154c8a4 1799 }
288f4aa9 1800 catch(...) {
e6a9dde5 1801 g_log<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
288f4aa9 1802 }
3ddb9247 1803
ec6eacbc 1804 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
288f4aa9
BH
1805}
1806
d187038c 1807static void makeControlChannelSocket(int processNum=-1)
1d5b3ce6 1808{
2d733c0f 1809 string sockname=::arg()["socket-dir"]+"/"+s_programname;
677e2a46 1810 if(processNum >= 0)
335da0ba 1811 sockname += "."+std::to_string(processNum);
677e2a46 1812 sockname+=".controlsocket";
41f7a068 1813 s_rcc.listen(sockname);
3ddb9247 1814
387de317
BH
1815 int sockowner = -1;
1816 int sockgroup = -1;
1817
1818 if (!::arg().isEmpty("socket-group"))
1819 sockgroup=::arg().asGid("socket-group");
1820 if (!::arg().isEmpty("socket-owner"))
1821 sockowner=::arg().asUid("socket-owner");
3ddb9247 1822
f838ad8d
BH
1823 if (sockgroup > -1 || sockowner > -1) {
1824 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1825 unixDie("Failed to chown control socket");
1826 }
1827 }
387de317
BH
1828
1829 // do mode change if socket-mode is given
1830 if(!::arg().isEmpty("socket-mode")) {
1831 mode_t sockmode=::arg().asMode("socket-mode");
34c513f9
RG
1832 if(chmod(sockname.c_str(), sockmode) < 0) {
1833 unixDie("Failed to chmod control socket");
1834 }
387de317 1835 }
1d5b3ce6
BH
1836}
1837
5cc8371b 1838static void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass,
29e6303a 1839 bool& foundECS, EDNSSubnetOpts* ednssubnet, EDNSOptionViewMap* options,
5cc8371b 1840 bool& foundXPF, ComboAddress* xpfSource, ComboAddress* xpfDest)
02b47f43 1841{
59cb4a79 1842 const bool lookForXPF = xpfSource != nullptr && g_xpfRRCode != 0;
5cc8371b
RG
1843 const bool lookForECS = ednssubnet != nullptr;
1844 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(question.c_str());
02b47f43
RG
1845 size_t questionLen = question.length();
1846 unsigned int consumed=0;
1847 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1848
1849 size_t pos= sizeof(dnsheader)+consumed+4;
5cc8371b
RG
1850 const size_t headerSize = /* root */ 1 + sizeof(dnsrecordheader);
1851 const uint16_t arcount = ntohs(dh->arcount);
1852
1853 for (uint16_t arpos = 0; arpos < arcount && questionLen > (pos + headerSize) && ((lookForECS && !foundECS) || (lookForXPF && !foundXPF)); arpos++) {
1854 if (question.at(pos) != 0) {
1855 /* not an OPT or a XPF, bye. */
1856 return;
1857 }
1858
1859 pos += 1;
1860 const dnsrecordheader* drh = reinterpret_cast<const dnsrecordheader*>(&question.at(pos));
1861 pos += sizeof(dnsrecordheader);
1862
1863 if (pos >= questionLen) {
1864 return;
1865 }
1866
02b47f43 1867 /* OPT root label (1) followed by type (2) */
5cc8371b 1868 if(lookForECS && ntohs(drh->d_type) == QType::OPT) {
00b8cadc
RG
1869 if (!options) {
1870 char* ecsStart = nullptr;
1871 size_t ecsLen = 0;
5cc8371b
RG
1872 /* we need to pass the record len */
1873 int res = getEDNSOption(const_cast<char*>(reinterpret_cast<const char*>(&question.at(pos - sizeof(drh->d_clen)))), questionLen - pos + sizeof(drh->d_clen), EDNSOptionCode::ECS, &ecsStart, &ecsLen);
00b8cadc
RG
1874 if (res == 0 && ecsLen > 4) {
1875 EDNSSubnetOpts eso;
1876 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1877 *ednssubnet=eso;
5cc8371b 1878 foundECS = true;
00b8cadc
RG
1879 }
1880 }
1881 }
1882 else {
5cc8371b
RG
1883 /* we need to pass the record len */
1884 int res = getEDNSOptions(reinterpret_cast<const char*>(&question.at(pos -sizeof(drh->d_clen))), questionLen - pos + (sizeof(drh->d_clen)), *options);
00b8cadc
RG
1885 if (res == 0) {
1886 const auto& it = options->find(EDNSOptionCode::ECS);
29e6303a 1887 if (it != options->end() && !it->second.values.empty() && it->second.values.at(0).content != nullptr && it->second.values.at(0).size > 0) {
00b8cadc 1888 EDNSSubnetOpts eso;
29e6303a 1889 if(getEDNSSubnetOptsFromString(it->second.values.at(0).content, it->second.values.at(0).size, &eso)) {
00b8cadc 1890 *ednssubnet=eso;
5cc8371b 1891 foundECS = true;
00b8cadc
RG
1892 }
1893 }
02b47f43
RG
1894 }
1895 }
1896 }
59cb4a79 1897 else if (lookForXPF && ntohs(drh->d_type) == g_xpfRRCode && ntohs(drh->d_class) == QClass::IN && drh->d_ttl == 0) {
5cc8371b
RG
1898 if ((questionLen - pos) < ntohs(drh->d_clen)) {
1899 return;
1900 }
1901
1902 foundXPF = parseXPFPayload(reinterpret_cast<const char*>(&question.at(pos)), ntohs(drh->d_clen), *xpfSource, xpfDest);
1903 }
1904
1905 pos += ntohs(drh->d_clen);
02b47f43
RG
1906 }
1907}
1908
d187038c 1909static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1910{
cd989c87 1911 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
c038218b 1912
879b3f70 1913 if(conn->state==TCPConnection::BYTE0) {
2749c3fe 1914 ssize_t bytes=recv(conn->getFD(), &conn->data[0], 2, 0);
09e6702a 1915 if(bytes==1)
667f7e60 1916 conn->state=TCPConnection::BYTE1;
3ddb9247 1917 if(bytes==2) {
a0aa4f64 1918 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2749c3fe 1919 conn->data.resize(conn->qlen);
667f7e60
BH
1920 conn->bytesread=0;
1921 conn->state=TCPConnection::GETQUESTION;
09e6702a
BH
1922 }
1923 if(!bytes || bytes < 0) {
bb4bdbaf 1924 t_fdm->removeReadFD(fd);
09e6702a
BH
1925 return;
1926 }
1927 }
667f7e60 1928 else if(conn->state==TCPConnection::BYTE1) {
2749c3fe 1929 ssize_t bytes=recv(conn->getFD(), &conn->data[1], 1, 0);
09e6702a 1930 if(bytes==1) {
667f7e60 1931 conn->state=TCPConnection::GETQUESTION;
a0aa4f64 1932 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2749c3fe 1933 conn->data.resize(conn->qlen);
667f7e60 1934 conn->bytesread=0;
09e6702a
BH
1935 }
1936 if(!bytes || bytes < 0) {
1937 if(g_logCommonErrors)
e6a9dde5 1938 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected after first byte"<<endl;
bb4bdbaf 1939 t_fdm->removeReadFD(fd);
09e6702a
BH
1940 return;
1941 }
1942 }
667f7e60 1943 else if(conn->state==TCPConnection::GETQUESTION) {
2749c3fe 1944 ssize_t bytes=recv(conn->getFD(), &conn->data[conn->bytesread], conn->qlen - conn->bytesread, 0);
f9d67b41 1945 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
c0f9be19
RG
1946 if(g_logCommonErrors) {
1947 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected while reading question body"<<endl;
1948 }
bb4bdbaf 1949 t_fdm->removeReadFD(fd);
09e6702a
BH
1950 return;
1951 }
b841314c 1952 conn->bytesread+=(uint16_t)bytes;
667f7e60 1953 if(conn->bytesread==conn->qlen) {
bb4bdbaf 1954 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
879b3f70 1955
9a864da4 1956 std::unique_ptr<DNSComboWriter> dc;
09e6702a 1957 try {
9a864da4 1958 dc=std::unique_ptr<DNSComboWriter>(new DNSComboWriter(conn->data, g_now));
09e6702a 1959 }
16ce7f18 1960 catch(const MOADNSException &mde) {
3ddb9247 1961 g_stats.clientParseError++;
4957a608 1962 if(g_logCommonErrors)
e6a9dde5 1963 g_log<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4957a608 1964 return;
09e6702a 1965 }
cd989c87
BH
1966 dc->d_tcpConnection = conn; // carry the torch
1967 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
09e6702a 1968 dc->d_tcp=true;
5cc8371b
RG
1969 dc->setRemote(conn->d_remote);
1970 dc->setSource(conn->d_remote);
a6147cd2 1971 ComboAddress dest;
d38e2ba9 1972 dest.reset();
a6147cd2 1973 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1974 socklen_t len = dest.getSocklen();
1975 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1976 dc->setLocal(dest);
5cc8371b 1977 dc->setDestination(dest);
33dcceba
RG
1978 DNSName qname;
1979 uint16_t qtype=0;
1980 uint16_t qclass=0;
1981 bool needECS = false;
5cc8371b 1982 bool needXPF = g_XPFAcl.match(conn->d_remote);
67e31ebe 1983 string requestorId;
590388d2 1984 string deviceId;
16bbc6e3 1985 bool logQuery = false;
aa7929a3 1986#ifdef HAVE_PROTOBUF
02b47f43 1987 auto luaconfsLocal = g_luaconfs.getLocal();
63341e8d 1988 if (checkProtobufExport(luaconfsLocal)) {
33dcceba
RG
1989 needECS = true;
1990 }
b773359c 1991 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
b9fa43e0
OM
1992#endif /* HAVE_PROTOBUF */
1993
1994#ifdef HAVE_FSTRM
1995 checkFrameStreamExport(luaconfsLocal);
33dcceba
RG
1996#endif
1997
70fb28d9 1998 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag_ffi || t_pdl->d_gettag))) {
33dcceba
RG
1999
2000 try {
29e6303a 2001 EDNSOptionViewMap ednsOptions;
5cc8371b 2002 bool xpfFound = false;
b40562da 2003 dc->d_ecsParsed = true;
5cc8371b 2004 dc->d_ecsFound = false;
2749c3fe 2005 getQNameAndSubnet(conn->data, &qname, &qtype, &qclass,
5cc8371b
RG
2006 dc->d_ecsFound, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2007 xpfFound, needXPF ? &dc->d_source : nullptr, needXPF ? &dc->d_destination : nullptr);
02b47f43 2008
70fb28d9 2009 if(t_pdl) {
33dcceba 2010 try {
70fb28d9 2011 if (t_pdl->d_gettag_ffi) {
f1c7929a 2012 dc->d_tag = t_pdl->gettag_ffi(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, dc->d_ttlCap, dc->d_variable, logQuery);
70fb28d9
RG
2013 }
2014 else if (t_pdl->d_gettag) {
2015 dc->d_tag = t_pdl->gettag(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId);
2016 }
33dcceba 2017 }
70fb28d9 2018 catch(const std::exception& e) {
33dcceba 2019 if(g_logCommonErrors)
e6a9dde5 2020 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
33dcceba
RG
2021 }
2022 }
2023 }
70fb28d9 2024 catch(const std::exception& e)
33dcceba
RG
2025 {
2026 if(g_logCommonErrors)
e6a9dde5 2027 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
33dcceba
RG
2028 }
2029 }
f52177c3
RG
2030
2031 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(&conn->data[0]);
2032
33dcceba 2033#ifdef HAVE_PROTOBUF
b773359c 2034 if(t_protobufServers || t_outgoingProtobufServers) {
67e31ebe 2035 dc->d_requestorId = requestorId;
590388d2 2036 dc->d_deviceId = deviceId;
d61aa945 2037 dc->d_uuid = getUniqueID();
4898a348 2038 }
02b47f43 2039
b773359c 2040 if(t_protobufServers) {
02b47f43 2041 try {
02b47f43 2042
845cbf4c 2043 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && dc->d_policyTags.empty())) {
b773359c 2044 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, dc->d_source, dc->d_destination, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId);
b790ef3d 2045 }
02b47f43
RG
2046 }
2047 catch(std::exception& e) {
2048 if(g_logCommonErrors)
e6a9dde5 2049 g_log<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
02b47f43
RG
2050 }
2051 }
aa7929a3 2052#endif
5034517a
RG
2053 if(t_pdl) {
2054 if(t_pdl->ipfilter(dc->d_source, dc->d_destination, *dh)) {
2055 if(!g_quiet)
2056 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED TCP question from "<<dc->d_source.toStringWithPort()<<(dc->d_source != dc->d_remote ? " (via "+dc->d_remote.toStringWithPort()+")" : "")<<" based on policy"<<endl;
2057 g_stats.policyDrops++;
2058 return;
2059 }
2060 }
2061
879b3f70 2062 if(dc->d_mdp.d_header.qr) {
048f5db6 2063 g_stats.ignoredCount++;
c0f9be19
RG
2064 if(g_logCommonErrors) {
2065 g_log<<Logger::Error<<"Ignoring answer from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
2066 }
4957a608 2067 return;
879b3f70 2068 }
3abcdab2 2069 if(dc->d_mdp.d_header.opcode) {
048f5db6 2070 g_stats.ignoredCount++;
c0f9be19
RG
2071 if(g_logCommonErrors) {
2072 g_log<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
2073 }
c0f9be19
RG
2074 return;
2075 }
2076 else if (dh->qdcount == 0) {
2077 g_stats.emptyQueriesCount++;
2078 if(g_logCommonErrors) {
2079 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<< dc->getRemote() <<" on server socket!"<<endl;
2080 }
3abcdab2
PD
2081 return;
2082 }
09e6702a 2083 else {
4957a608
BH
2084 ++g_stats.qcounter;
2085 ++g_stats.tcpqcounter;
9a864da4 2086 MT->makeThread(startDoResolve, dc.release()); // deletes dc, will set state to BYTE0 again
4957a608 2087 return;
09e6702a
BH
2088 }
2089 }
2090 }
2091}
2092
6dcd28c3 2093//! Handle new incoming TCP connection
d187038c 2094static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
09e6702a 2095{
37d3f960 2096 ComboAddress addr;
09e6702a 2097 socklen_t addrlen=sizeof(addr);
a683e8bd 2098 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
b841314c 2099 if(newsock>=0) {
85c32340
BH
2100 if(MT->numProcesses() > g_maxMThreads) {
2101 g_stats.overCapacityDrops++;
a7b68ae7
RG
2102 try {
2103 closesocket(newsock);
2104 }
2105 catch(const PDNSException& e) {
e6a9dde5 2106 g_log<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
a7b68ae7 2107 }
85c32340
BH
2108 return;
2109 }
2110
92011b8f 2111 if(t_remotes)
2112 t_remotes->push_back(addr);
49a699c4 2113 if(t_allowFrom && !t_allowFrom->match(&addr)) {
3ddb9247 2114 if(!g_quiet)
e6a9dde5 2115 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2914b022 2116
09e6702a 2117 g_stats.unauthorizedTCP++;
a7b68ae7
RG
2118 try {
2119 closesocket(newsock);
2120 }
2121 catch(const PDNSException& e) {
e6a9dde5 2122 g_log<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
a7b68ae7 2123 }
09e6702a
BH
2124 return;
2125 }
bd0289fc 2126 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
09e6702a 2127 g_stats.tcpClientOverflow++;
a7b68ae7
RG
2128 try {
2129 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
2130 }
2131 catch(const PDNSException& e) {
e6a9dde5 2132 g_log<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
a7b68ae7 2133 }
09e6702a
BH
2134 return;
2135 }
3ddb9247 2136
3897b9e1 2137 setNonBlocking(newsock);
f26bf547 2138 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
cd989c87 2139 tc->state=TCPConnection::BYTE0;
3ddb9247 2140
27ae2e3c
RG
2141 struct timeval ttd;
2142 Utility::gettimeofday(&ttd, 0);
2143 ttd.tv_sec += g_tcpTimeout;
c038218b 2144
27ae2e3c 2145 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc, &ttd);
09e6702a
BH
2146 }
2147}
3ddb9247 2148
d187038c 2149static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
1bc3c142 2150{
183eb877 2151 gettimeofday(&g_now, 0);
c29d820c
RG
2152 if (tv.tv_sec) {
2153 struct timeval diff = g_now - tv;
2154 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
183eb877 2155
c29d820c
RG
2156 if(delta > 1000.0) {
2157 g_stats.tooOldDrops++;
2158 return nullptr;
2159 }
b71b60ee 2160 }
2161
1bc3c142 2162 ++g_stats.qcounter;
d7f10541
BH
2163 if(fromaddr.sin4.sin_family==AF_INET6)
2164 g_stats.ipv6qcounter++;
1bc3c142
BH
2165
2166 string response;
93f0da94 2167 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
49a3500d 2168 unsigned int ctag=0;
f57486f1 2169 uint32_t qhash = 0;
12aff2e5 2170 bool needECS = false;
5cc8371b 2171 bool needXPF = g_XPFAcl.match(fromaddr);
02b47f43 2172 std::vector<std::string> policyTags;
5fd2577f 2173 LuaContext::LuaObject data;
5cc8371b
RG
2174 ComboAddress source = fromaddr;
2175 ComboAddress destination = destaddr;
67e31ebe 2176 string requestorId;
590388d2 2177 string deviceId;
16bbc6e3 2178 bool logQuery = false;
12aff2e5 2179#ifdef HAVE_PROTOBUF
02b47f43 2180 boost::uuids::uuid uniqueId;
02b47f43 2181 auto luaconfsLocal = g_luaconfs.getLocal();
63341e8d 2182 if (checkProtobufExport(luaconfsLocal)) {
d61aa945 2183 uniqueId = getUniqueID();
02b47f43 2184 needECS = true;
63341e8d 2185 } else if (checkOutgoingProtobufExport(luaconfsLocal)) {
d61aa945 2186 uniqueId = getUniqueID();
02b47f43 2187 }
b773359c
RG
2188 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
2189 bool logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
b9fa43e0
OM
2190#endif
2191#ifdef HAVE_FSTRM
2192 checkFrameStreamExport(luaconfsLocal);
12aff2e5 2193#endif
b40562da
RG
2194 EDNSSubnetOpts ednssubnet;
2195 bool ecsFound = false;
2196 bool ecsParsed = false;
08b02366
RG
2197 uint16_t ecsBegin = 0;
2198 uint16_t ecsEnd = 0;
70fb28d9
RG
2199 uint32_t ttlCap = std::numeric_limits<uint32_t>::max();
2200 bool variable = false;
1bc3c142 2201 try {
02b47f43
RG
2202 DNSName qname;
2203 uint16_t qtype=0;
2204 uint16_t qclass=0;
1bc3c142 2205 uint32_t age;
c15ff3df 2206 bool qnameParsed=false;
8f7473d7 2207#ifdef MALLOC_TRACE
2208 /*
2209 static uint64_t last=0;
2210 if(!last)
2211 g_mtracer->clearAllocators();
2212 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
2213 last=g_mtracer->getAllocs();
2214 cout<<g_mtracer->topAllocatorsString()<<endl;
2215 g_mtracer->clearAllocators();
2216 */
2217#endif
55a1378f 2218
70fb28d9 2219 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag || t_pdl->d_gettag_ffi))) {
b2eacd67 2220 try {
29e6303a 2221 EDNSOptionViewMap ednsOptions;
5cc8371b
RG
2222 bool xpfFound = false;
2223
2224 ecsFound = false;
2225
2226 getQNameAndSubnet(question, &qname, &qtype, &qclass,
2227 ecsFound, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2228 xpfFound, needXPF ? &source : nullptr, needXPF ? &destination : nullptr);
2229
c15ff3df
RG
2230 qnameParsed = true;
2231 ecsParsed = true;
12aff2e5 2232
70fb28d9 2233 if(t_pdl) {
12aff2e5 2234 try {
70fb28d9 2235 if (t_pdl->d_gettag_ffi) {
f1c7929a 2236 ctag = t_pdl->gettag_ffi(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, ttlCap, variable, logQuery);
70fb28d9
RG
2237 }
2238 else if (t_pdl->d_gettag) {
2239 ctag = t_pdl->gettag(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId);
2240 }
12aff2e5 2241 }
70fb28d9 2242 catch(const std::exception& e) {
12aff2e5 2243 if(g_logCommonErrors)
e6a9dde5 2244 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 2245 }
8ea8c302 2246 }
b2eacd67 2247 }
70fb28d9 2248 catch(const std::exception& e)
b2eacd67 2249 {
2250 if(g_logCommonErrors)
e6a9dde5 2251 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 2252 }
12ce523e 2253 }
3ddb9247 2254
02b47f43 2255 bool cacheHit = false;
1fbc6dc5 2256 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
02b47f43 2257#ifdef HAVE_PROTOBUF
b773359c 2258 if (t_protobufServers) {
d362f7c1 2259 pbMessage = RecProtoBufMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
c165308b 2260 pbMessage->setServerIdentity(SyncRes::s_serverID);
845cbf4c 2261 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && policyTags.empty())) {
b773359c 2262 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, source, destination, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId);
b790ef3d 2263 }
d9d3f9c1
RG
2264 }
2265#endif /* HAVE_PROTOBUF */
02b47f43 2266
70fb28d9
RG
2267 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
2268 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
2269 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
8467ec26 2270 vState valState;
c15ff3df 2271 if (qnameParsed) {
08b02366 2272 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
c15ff3df
RG
2273 }
2274 else {
08b02366 2275 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, &qtype, &qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
c15ff3df
RG
2276 }
2277
d9d3f9c1 2278 if (cacheHit) {
8467ec26
KM
2279 if(valState == Bogus) {
2280 if(t_bogusremotes)
2281 t_bogusremotes->push_back(source);
2282 if(t_bogusqueryring)
2283 t_bogusqueryring->push_back(make_pair(qname, qtype));
2284 }
2285
d9d3f9c1 2286#ifdef HAVE_PROTOBUF
b773359c 2287 if(t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && pbMessage->getAppliedPolicy().empty() && pbMessage->getPolicyTags().empty())) {
5cc8371b 2288 Netmask requestorNM(source, source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
e1c8a4bb 2289 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
d362f7c1
RG
2290 pbMessage->update(uniqueId, &requestor, &destination, false, dh->id);
2291 pbMessage->setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
c29d820c
RG
2292 if (g_useKernelTimestamp && tv.tv_sec) {
2293 pbMessage->setQueryTime(tv.tv_sec, tv.tv_usec);
2294 }
2295 else {
2296 pbMessage->setQueryTime(g_now.tv_sec, g_now.tv_usec);
2297 }
d362f7c1
RG
2298 pbMessage->setRequestorId(requestorId);
2299 pbMessage->setDeviceId(deviceId);
b773359c 2300 protobufLogResponse(*pbMessage);
02b47f43 2301 }
d9d3f9c1 2302#endif /* HAVE_PROTOBUF */
49a3500d 2303 if(!g_quiet)
e6a9dde5 2304 g_log<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<endl;
8f7473d7 2305
1bc3c142
BH
2306 g_stats.packetCacheHits++;
2307 SyncRes::s_queries++;
2308 ageDNSPacket(response, age);
b71b60ee 2309 struct msghdr msgh;
2310 struct iovec iov;
2311 char cbuf[256];
2312 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
2c0af54f
PD
2313 msgh.msg_control=NULL;
2314
cbc03320 2315 if(g_fromtosockets.count(fd)) {
fbe2a2e0 2316 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
b71b60ee 2317 }
cbc03320 2318 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
e6a9dde5 2319 g_log<<Logger::Warning<<"Sending UDP reply to client "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" failed with: "<<strerror(errno)<<endl;
b71b60ee 2320
97bee66d 2321 if(response.length() >= sizeof(struct dnsheader)) {
dd079764
RG
2322 struct dnsheader tmpdh;
2323 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
5cc8371b 2324 updateResponseStats(tmpdh.rcode, source, response.length(), 0, 0);
97bee66d 2325 }
08f3f638 2326 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
19178da9 2327 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
1bc3c142
BH
2328 return 0;
2329 }
3ddb9247 2330 }
1bc3c142 2331 catch(std::exception& e) {
e6a9dde5 2332 g_log<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1bc3c142
BH
2333 return 0;
2334 }
3ddb9247 2335
f26bf547 2336 if(t_pdl) {
5cc8371b 2337 if(t_pdl->ipfilter(source, destination, *dh)) {
4ea94941 2338 if(!g_quiet)
e6a9dde5 2339 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" based on policy"<<endl;
4ea94941 2340 g_stats.policyDrops++;
2341 return 0;
2342 }
2343 }
2344
1bc3c142 2345 if(MT->numProcesses() > g_maxMThreads) {
461df9d2 2346 if(!g_quiet)
e6a9dde5 2347 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<", over capacity"<<endl;
461df9d2 2348
1bc3c142
BH
2349 g_stats.overCapacityDrops++;
2350 return 0;
2351 }
3ddb9247 2352
9a864da4 2353 auto dc = std::unique_ptr<DNSComboWriter>(new DNSComboWriter(question, g_now, std::move(policyTags), std::move(data)));
1bc3c142 2354 dc->setSocket(fd);
49a3500d 2355 dc->d_tag=ctag;
e9f63d47 2356 dc->d_qhash=qhash;
5cc8371b
RG
2357 dc->setRemote(fromaddr);
2358 dc->setSource(source);
b71b60ee 2359 dc->setLocal(destaddr);
5cc8371b 2360 dc->setDestination(destination);
1bc3c142 2361 dc->d_tcp=false;
b40562da
RG
2362 dc->d_ecsFound = ecsFound;
2363 dc->d_ecsParsed = ecsParsed;
08b02366
RG
2364 dc->d_ecsBegin = ecsBegin;
2365 dc->d_ecsEnd = ecsEnd;
b40562da 2366 dc->d_ednssubnet = ednssubnet;
70fb28d9
RG
2367 dc->d_ttlCap = ttlCap;
2368 dc->d_variable = variable;
aa7929a3 2369#ifdef HAVE_PROTOBUF
b773359c 2370 if (t_protobufServers || t_outgoingProtobufServers) {
5164bac3 2371 dc->d_uuid = std::move(uniqueId);
d9d3f9c1 2372 }
67e31ebe 2373 dc->d_requestorId = requestorId;
590388d2 2374 dc->d_deviceId = deviceId;
c29d820c 2375 dc->d_kernelTimestamp = tv;
aa7929a3
RG
2376#endif
2377
9a864da4 2378 MT->makeThread(startDoResolve, (void*) dc.release()); // deletes dc
1bc3c142 2379 return 0;
3ddb9247
PD
2380}
2381
b71b60ee 2382
d187038c 2383static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
5db529f8 2384{
a683e8bd 2385 ssize_t len;
12c2f2b9 2386 static const size_t maxIncomingQuerySize = 512;
04896b99 2387 static thread_local std::string data;
5db529f8 2388 ComboAddress fromaddr;
b71b60ee 2389 struct msghdr msgh;
2390 struct iovec iov;
2391 char cbuf[256];
390f1dab 2392 bool firstQuery = true;
b71b60ee 2393
c0a00acd
RG
2394 for(size_t queriesCounter = 0; queriesCounter < s_maxUDPQueriesPerRound; queriesCounter++) {
2395 data.resize(maxIncomingQuerySize);
2396 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
2397 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), &data[0], data.size(), &fromaddr);
b71b60ee 2398
c0a00acd 2399 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
390f1dab 2400
c0a00acd 2401 firstQuery = false;
390f1dab 2402
c0a00acd
RG
2403 if (static_cast<size_t>(len) < sizeof(dnsheader)) {
2404 g_stats.ignoredCount++;
2405 if (!g_quiet) {
2406 g_log<<Logger::Error<<"Ignoring too-short ("<<std::to_string(len)<<") query from "<<fromaddr.toString()<<endl;
2407 }
2408 return;
04896b99 2409 }
04896b99 2410
c0a00acd
RG
2411 if (msgh.msg_flags & MSG_TRUNC) {
2412 g_stats.truncatedDrops++;
2413 if (!g_quiet) {
2414 g_log<<Logger::Error<<"Ignoring truncated query from "<<fromaddr.toString()<<endl;
2415 }
2416 return;
ba892c7f 2417 }
b23b8614 2418
c0a00acd
RG
2419 if(t_remotes) {
2420 t_remotes->push_back(fromaddr);
2421 }
81859ba5 2422
c0a00acd
RG
2423 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
2424 if(!g_quiet) {
2425 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2426 }
3ddb9247 2427
c0a00acd
RG
2428 g_stats.unauthorizedUDP++;
2429 return;
5db529f8 2430 }
c0a00acd
RG
2431 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
2432 if(!fromaddr.sin4.sin_port) { // also works for IPv6
2433 if(!g_quiet) {
2434 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
2435 }
2436
2437 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
2438 return;
3abcdab2 2439 }
c0a00acd
RG
2440
2441 try {
2442 data.resize(static_cast<size_t>(len));
2443 dnsheader* dh=(dnsheader*)&data[0];
2444
2445 if(dh->qr) {
2446 g_stats.ignoredCount++;
2447 if(g_logCommonErrors) {
2448 g_log<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
2449 }
2450 }
2451 else if(dh->opcode) {
2452 g_stats.ignoredCount++;
2453 if(g_logCommonErrors) {
2454 g_log<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
2455 }
a6147cd2 2456 }
c0f9be19
RG
2457 else if (dh->qdcount == 0) {
2458 g_stats.emptyQueriesCount++;
2459 if(g_logCommonErrors) {
2460 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<<fromaddr.toString()<<" on server socket!"<<endl;
2461 }
2462 }
a6147cd2 2463 else {
c0a00acd
RG
2464 struct timeval tv={0,0};
2465 HarvestTimestamp(&msgh, &tv);
2466 ComboAddress dest;
2467 dest.reset(); // this makes sure we ignore this address if not returned by recvmsg above
2468 auto loc = rplookup(g_listenSocketsAddresses, fd);
2469 if(HarvestDestinationAddress(&msgh, &dest)) {
2470 // but.. need to get port too
2471 if(loc) {
2472 dest.sin4.sin_port = loc->sin4.sin_port;
2473 }
a6147cd2 2474 }
2475 else {
c0a00acd
RG
2476 if(loc) {
2477 dest = *loc;
2478 }
2479 else {
2480 dest.sin4.sin_family = fromaddr.sin4.sin_family;
2481 socklen_t slen = dest.getSocklen();
2482 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
2483 }
2484 }
2485
2486 if(g_weDistributeQueries) {
2487 distributeAsyncFunction(data, boost::bind(doProcessUDPQuestion, data, fromaddr, dest, tv, fd));
2488 }
2489 else {
144040be 2490 ++s_threadInfos[t_id].numberOfDistributedQueries;
c0a00acd 2491 doProcessUDPQuestion(data, fromaddr, dest, tv, fd);
a6147cd2 2492 }
2493 }
c0a00acd 2494 }
16ce7f18 2495 catch(const MOADNSException &mde) {
c0a00acd
RG
2496 g_stats.clientParseError++;
2497 if(g_logCommonErrors) {
2498 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
2499 }
2500 }
2501 catch(const std::runtime_error& e) {
2502 g_stats.clientParseError++;
2503 if(g_logCommonErrors) {
2504 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
2505 }
5db529f8
BH
2506 }
2507 }
c0a00acd
RG
2508 else {
2509 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
2510 if(firstQuery && errno == EAGAIN) {
2511 g_stats.noPacketError++;
2512 }
390f1dab 2513
c0a00acd
RG
2514 break;
2515 }
ac0e821b 2516 }
5db529f8
BH
2517}
2518
adb6cd72 2519static void makeTCPServerSockets(deferredAdd_t& deferredAdds, std::set<int>& tcpSockets)
9c495589 2520{
37d3f960 2521 int fd;
f28307ad 2522 vector<string>locals;
2e3d8a19 2523 stringtok(locals,::arg()["local-address"]," ,");
9c495589 2524
f28307ad 2525 if(locals.empty())
3f81d239 2526 throw PDNSException("No local address specified");
3ddb9247 2527
f28307ad 2528 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
2529 ServiceTuple st;
2530 st.port=::arg().asNum("local-port");
2531 parseService(*i, st);
3ddb9247 2532
32252594
BH
2533 ComboAddress sin;
2534
d38e2ba9 2535 sin.reset();
37d3f960 2536 sin.sin4.sin_family = AF_INET;
32252594 2537 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 2538 sin.sin6.sin6_family = AF_INET6;
f71bc087 2539 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 2540 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
37d3f960
BH
2541 }
2542
2543 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
3ddb9247 2544 if(fd<0)
3f81d239 2545 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
f28307ad 2546
3897b9e1 2547 setCloseOnExec(fd);
a903b39c 2548
f28307ad 2549 int tmp=1;
810ff705 2550 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
e6a9dde5 2551 g_log<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
c8ddb7c2 2552 exit(1);
f28307ad 2553 }
0dfa94ab 2554 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
e6a9dde5 2555 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
0dfa94ab 2556 }
2557
c8ddb7c2 2558#ifdef TCP_DEFER_ACCEPT
38ac0821 2559 if(setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
37d3f960 2560 if(i==locals.begin())
377602e3 2561 g_log<<Logger::Info<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
c8ddb7c2
BH
2562 }
2563#endif
2564
fec7dd5a
SS
2565 if( ::arg().mustDo("non-local-bind") )
2566 Utility::setBindAny(AF_INET, fd);
2567
2332f42d 2568#ifdef SO_REUSEPORT
810ff705
RG
2569 if(g_reusePort) {
2570 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
2332f42d 2571 throw PDNSException("SO_REUSEPORT: "+stringerror());
2572 }
2573#endif
2574
0735b17e
RG
2575 if (::arg().asNum("tcp-fast-open") > 0) {
2576#ifdef TCP_FASTOPEN
2577 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
2578 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
e6a9dde5 2579 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno)<<endl;
0735b17e
RG
2580 }
2581#else
e6a9dde5 2582 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
0735b17e
RG
2583#endif
2584 }
2585
32252594 2586 sin.sin4.sin_port = htons(st.port);
a683e8bd 2587 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
3ddb9247 2588 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
3f81d239 2589 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
3ddb9247 2590
3897b9e1 2591 setNonBlocking(fd);
49a699c4 2592 setSocketSendBuffer(fd, 65000);
37d3f960 2593 listen(fd, 128);
b243ca3b 2594 deferredAdds.push_back(make_pair(fd, handleNewTCPQuestion));
adb6cd72
RG
2595 tcpSockets.insert(fd);
2596
84433b79 2597 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
2598 // - fd is not that which we know here, but returned from accept()
3ddb9247 2599 if(sin.sin4.sin_family == AF_INET)
377602e3 2600 g_log<<Logger::Info<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 2601 else
377602e3 2602 g_log<<Logger::Info<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 2603 }
9c495589
BH
2604}
2605
b243ca3b 2606static void makeUDPServerSockets(deferredAdd_t& deferredAdds)
288f4aa9 2607{
fec7dd5a 2608 int one=1;
f28307ad 2609 vector<string>locals;
2e3d8a19 2610 stringtok(locals,::arg()["local-address"]," ,");
288f4aa9 2611
f28307ad 2612 if(locals.empty())
3f81d239 2613 throw PDNSException("No local address specified");
3ddb9247 2614
f28307ad 2615 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
2616 ServiceTuple st;
2617 st.port=::arg().asNum("local-port");
2618 parseService(*i, st);
2619
37d3f960 2620 ComboAddress sin;
996c89cc 2621
d38e2ba9 2622 sin.reset();
37d3f960 2623 sin.sin4.sin_family = AF_INET;
32252594 2624 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 2625 sin.sin6.sin6_family = AF_INET6;
f71bc087 2626 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 2627 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
37d3f960 2628 }
3ddb9247 2629
bb4bdbaf 2630 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
d3b4137e 2631 if(fd < 0) {
3f81d239 2632 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
d3b4137e 2633 }
915b0c39 2634 if (!setSocketTimestamps(fd))
e6a9dde5 2635 g_log<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
0dfa94ab 2636
b71b60ee 2637 if(IsAnyAddress(sin)) {
cbc03320 2638 if(sin.sin4.sin_family == AF_INET)
2639 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
2640 g_fromtosockets.insert(fd);
757d3179 2641#ifdef IPV6_RECVPKTINFO
cbc03320 2642 if(sin.sin4.sin_family == AF_INET6)
2643 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
2644 g_fromtosockets.insert(fd);
757d3179 2645#endif
0dfa94ab 2646 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
e6a9dde5 2647 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
0dfa94ab 2648 }
b71b60ee 2649 }
fec7dd5a
SS
2650 if( ::arg().mustDo("non-local-bind") )
2651 Utility::setBindAny(AF_INET6, fd);
2652
3897b9e1 2653 setCloseOnExec(fd);
a903b39c 2654
4e9a20e6 2655 setSocketReceiveBuffer(fd, 250000);
32252594 2656 sin.sin4.sin_port = htons(st.port);
37d3f960 2657
2332f42d 2658
2573d4a6 2659#ifdef SO_REUSEPORT
810ff705 2660 if(g_reusePort) {
2332f42d 2661 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
2662 throw PDNSException("SO_REUSEPORT: "+stringerror());
2663 }
2664#endif
90f9fbc0
RG
2665
2666 if (sin.isIPv4()) {
2667 try {
2668 setSocketIgnorePMTU(fd);
2669 }
2670 catch(const std::exception& e) {
2671 g_log<<Logger::Warning<<"Failed to set IP_MTU_DISCOVER on UDP server socket: "<<e.what()<<endl;
2672 }
2673 }
2674
2675 socklen_t socklen=sin.getSocklen();
3ddb9247 2676 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
335da0ba 2677 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
3ddb9247 2678
3897b9e1 2679 setNonBlocking(fd);
c2136bf0 2680
b243ca3b 2681 deferredAdds.push_back(make_pair(fd, handleNewUDPQuestion));
40a3dd64 2682 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
3ddb9247 2683 if(sin.sin4.sin_family == AF_INET)
377602e3 2684 g_log<<Logger::Info<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 2685 else
377602e3 2686 g_log<<Logger::Info<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 2687 }
c836dc19 2688}
caa6eefa 2689
d187038c 2690static void daemonize(void)
c836dc19
BH
2691{
2692 if(fork())
2693 exit(0); // bye bye
3ddb9247
PD
2694
2695 setsid();
c836dc19 2696
27a5ead5 2697 int i=open("/dev/null",O_RDWR); /* open stdin */
3ddb9247 2698 if(i < 0)
e6a9dde5 2699 g_log<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
27a5ead5
BH
2700 else {
2701 dup2(i,0); /* stdin */
2702 dup2(i,1); /* stderr */
2703 dup2(i,2); /* stderr */
2704 close(i);
2705 }
288f4aa9 2706}
caa6eefa 2707
d187038c 2708static void usr1Handler(int)
c75a6a9e
BH
2709{
2710 statsWanted=true;
2711}
ae1b2e98 2712
d187038c 2713static void usr2Handler(int)
9170fbaf 2714{
f1f34cc2 2715 g_quiet= !g_quiet;
2716 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
2717 ::arg().set("quiet")=g_quiet ? "" : "no";
9170fbaf
BH
2718}
2719
d187038c 2720static void doStats(void)
c75a6a9e 2721{
16beeaa4
BH
2722 static time_t lastOutputTime;
2723 static uint64_t lastQueryCount;
d299d4f5 2724
2725 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
2726 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
3ddb9247 2727
d299d4f5 2728 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
e6a9dde5 2729 g_log<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
3427fa8a
BH
2730 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
2731 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
3ddb9247
PD
2732 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
2733
e6a9dde5 2734 g_log<<Logger::Notice<<"stats: throttle map: "
3427fa8a 2735 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
3ddb9247 2736 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
e6a9dde5
PL
2737 g_log<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2738 g_log<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
525b8a7c 2739 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
e6a9dde5 2740 g_log<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
3427fa8a 2741 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
81883dcc 2742
e6a9dde5 2743 //g_log<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
16beeaa4 2744 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
3ddb9247 2745
e6a9dde5 2746 g_log<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
16beeaa4 2747 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
3ddb9247 2748
144040be
RG
2749 size_t idx = 0;
2750 for (const auto& threadInfo : s_threadInfos) {
2751 if(threadInfo.isWorker) {
ad9fc3dc 2752 g_log<<Logger::Notice<<"stats: thread "<<idx<<" has been distributed "<<threadInfo.numberOfDistributedQueries<<" queries"<<endl;
144040be
RG
2753 ++idx;
2754 }
2755 }
2756
16beeaa4
BH
2757 time_t now = time(0);
2758 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
e6a9dde5 2759 g_log<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
16beeaa4
BH
2760 }
2761 lastOutputTime = now;
2762 lastQueryCount = SyncRes::s_queries;
c75a6a9e 2763 }
3ddb9247 2764 else if(statsWanted)
e6a9dde5 2765 g_log<<Logger::Notice<<"stats: no stats yet!"<<endl;
7becf07f 2766
c75a6a9e
BH
2767 statsWanted=false;
2768}
c836dc19 2769
29f0b1ce 2770static void houseKeeping(void *)
c836dc19 2771{
e4ae55e5 2772 static thread_local time_t last_rootupdate, last_prune, last_secpoll, last_trustAnchorUpdate{0};
3337c2f7
RG
2773 static thread_local int cleanCounter=0;
2774 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
e4ae55e5
PL
2775 auto luaconfsLocal = g_luaconfs.getLocal();
2776
2777 if (last_trustAnchorUpdate == 0 && !luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0) {
2778 // Loading the Lua config file already "refreshed" the TAs
2779 last_trustAnchorUpdate = g_now.tv_sec + luaconfsLocal->trustAnchorFileInfo.interval * 3600;
2780 }
2781
cc59bce6 2782 try {
6b0d90ea 2783 if(s_running) {
cc59bce6 2784 return;
6b0d90ea 2785 }
cc59bce6 2786 s_running=true;
3ddb9247 2787
cc59bce6 2788 struct timeval now;
2789 Utility::gettimeofday(&now, 0);
3ddb9247
PD
2790
2791 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
a6f7f5fe 2792 t_RC->doPrune(g_maxCacheEntries / g_numThreads); // this function is local to a thread, so fine anyhow
2793 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
3ddb9247 2794
a6f7f5fe 2795 SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
3ddb9247 2796
cc59bce6 2797 if(!((cleanCounter++)%40)) { // this is a full scan!
2798 time_t limit=now.tv_sec-300;
a712cb56 2799 SyncRes::pruneNSSpeeds(limit);
cc59bce6 2800 }
2801 last_prune=time(0);
d67620e4 2802 }
3ddb9247 2803
cc59bce6 2804 if(now.tv_sec - last_rootupdate > 7200) {
30ee601a 2805 int res = SyncRes::getRootNS(g_now, nullptr);
7836f7b4
PL
2806 if (!res)
2807 last_rootupdate=now.tv_sec;
cc59bce6 2808 }
3ddb9247 2809
b243ca3b 2810 if(isHandlerThread()) {
3ddb9247 2811
cc59bce6 2812 if(now.tv_sec - last_secpoll >= 3600) {
2813 try {
2814 doSecPoll(&last_secpoll);
2815 }
581d4ea3 2816 catch(std::exception& e)
2817 {
e6a9dde5 2818 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
581d4ea3 2819 }
47e9b74f 2820 catch(PDNSException& e)
2821 {
e6a9dde5 2822 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
47e9b74f 2823 }
d0992a65
CH
2824 catch(ImmediateServFailException &e)
2825 {
e6a9dde5 2826 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
d0992a65 2827 }
47e9b74f 2828 catch(...)
2829 {
e6a9dde5 2830 g_log<<Logger::Error<<"Exception while performing security poll"<<endl;
47e9b74f 2831 }
18b73338 2832 }
e4ae55e5
PL
2833
2834 if (!luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0 &&
2835 g_now.tv_sec - last_trustAnchorUpdate >= (luaconfsLocal->trustAnchorFileInfo.interval * 3600)) {
2836 g_log<<Logger::Debug<<"Refreshing Trust Anchors from file"<<endl;
2837 try {
2838 map<DNSName, dsmap_t> dsAnchors;
2839 if (updateTrustAnchorsFromFile(luaconfsLocal->trustAnchorFileInfo.fname, dsAnchors)) {
2840 g_luaconfs.modify([&dsAnchors](LuaConfigItems& lci) {
2841 lci.dsAnchors = dsAnchors;
2842 });
2843 }
2844 last_trustAnchorUpdate = now.tv_sec;
2845 } catch (const PDNSException &pe) {
2846 g_log<<Logger::Error<<"Unable to update Trust Anchors: "<<pe.reason<<endl;
2847 }
2848 }
d67620e4 2849 }
6b0d90ea 2850 s_running=false;
d67620e4 2851 }
cc59bce6 2852 catch(PDNSException& ae)
2853 {
2854 s_running=false;
e6a9dde5 2855 g_log<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
cc59bce6 2856 throw;
2857 }
779828c4 2858}
d6d5dea7 2859
d187038c 2860static void makeThreadPipes()
49a699c4 2861{
ee271fc4
RG
2862 auto pipeBufferSize = ::arg().asNum("distribution-pipe-buffer-size");
2863 if (pipeBufferSize > 0) {
2864 g_log<<Logger::Info<<"Resizing the buffer of the distribution pipe to "<<pipeBufferSize<<endl;
2865 }
2866
b243ca3b
RG
2867 /* thread 0 is the handler / SNMP, we start at 1 */
2868 for(unsigned int n = 1; n <= (g_numWorkerThreads + g_numDistributorThreads); ++n) {
2869 auto& threadInfos = s_threadInfos.at(n);
2870
49a699c4
BH
2871 int fd[2];
2872 if(pipe(fd) < 0)
2873 unixDie("Creating pipe for inter-thread communications");
3ddb9247 2874
b243ca3b
RG
2875 threadInfos.pipes.readToThread = fd[0];
2876 threadInfos.pipes.writeToThread = fd[1];
3ddb9247 2877
49a699c4
BH
2878 if(pipe(fd) < 0)
2879 unixDie("Creating pipe for inter-thread communications");
b243ca3b
RG
2880
2881 threadInfos.pipes.readFromThread = fd[0];
2882 threadInfos.pipes.writeFromThread = fd[1];
3ddb9247 2883
cf8cda18
RG
2884 if(pipe(fd) < 0)
2885 unixDie("Creating pipe for inter-thread communications");
d10307c5 2886
b243ca3b
RG
2887 threadInfos.pipes.readQueriesToThread = fd[0];
2888 threadInfos.pipes.writeQueriesToThread = fd[1];
2889
ee271fc4
RG
2890 if (pipeBufferSize > 0) {
2891 if (!setPipeBufferSize(threadInfos.pipes.writeQueriesToThread, pipeBufferSize)) {
2892 g_log<<Logger::Warning<<"Error resizing the buffer of the distribution pipe for thread "<<n<<" to "<<pipeBufferSize<<": "<<strerror(errno)<<endl;
2893 auto existingSize = getPipeBufferSize(threadInfos.pipes.writeQueriesToThread);
2894 if (existingSize > 0) {
2895 g_log<<Logger::Warning<<"The current size of the distribution pipe's buffer for thread "<<n<<" is "<<existingSize<<endl;
2896 }
2897 }
2898 }
2899
b243ca3b 2900 if (!setNonBlocking(threadInfos.pipes.writeQueriesToThread)) {
d10307c5
RG
2901 unixDie("Making pipe for inter-thread communications non-blocking");
2902 }
49a699c4
BH
2903 }
2904}
2905
00c9b8c1
BH
2906struct ThreadMSG
2907{
2908 pipefunc_t func;
2909 bool wantAnswer;
2910};
2911
b4e76a18 2912void broadcastFunction(const pipefunc_t& func)
49a699c4 2913{
b243ca3b
RG
2914 /* This function might be called by the worker with t_id 0 during startup
2915 for the initialization of ACLs and domain maps. After that it should only
2916 be called by the handler. */
d77abca1 2917
b243ca3b
RG
2918 if (s_threadInfos.empty() && isHandlerThread()) {
2919 /* the handler and distributors will call themselves below, but
2920 during startup we get called while s_threadInfos has not been
2921 populated yet to update the ACL or domain maps, so we need to
2922 handle that case.
2923 */
2924 func();
2925 }
b4e76a18 2926
b243ca3b
RG
2927 unsigned int n = 0;
2928 for (const auto& threadInfo : s_threadInfos) {
49a699c4 2929 if(n++ == t_id) {
b4e76a18 2930 func(); // don't write to ourselves!
49a699c4
BH
2931 continue;
2932 }
3ddb9247 2933
00c9b8c1
BH
2934 ThreadMSG* tmsg = new ThreadMSG();
2935 tmsg->func = func;
2936 tmsg->wantAnswer = true;
b243ca3b 2937 if(write(threadInfo.pipes.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
b841314c 2938 delete tmsg;
b243ca3b 2939
49a699c4 2940 unixDie("write to thread pipe returned wrong size or error");
b841314c 2941 }
3ddb9247 2942
49467864 2943 string* resp = nullptr;
b243ca3b 2944 if(read(threadInfo.pipes.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
49a699c4 2945 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 2946
49a699c4 2947 if(resp) {
49a699c4 2948 delete resp;
49467864 2949 resp = nullptr;
49a699c4
BH
2950 }
2951 }
2952}
06ea9015 2953
592d7ade 2954static bool trySendingQueryToWorker(unsigned int target, ThreadMSG* tmsg)
00c9b8c1 2955{
144040be 2956 auto& targetInfo = s_threadInfos[target];
b243ca3b
RG
2957 if(!targetInfo.isWorker) {
2958 g_log<<Logger::Error<<"distributeAsyncFunction() tried to assign a query to a non-worker thread"<<endl;
d77abca1 2959 exit(1);
00c9b8c1 2960 }
d77abca1 2961
b243ca3b 2962 const auto& tps = targetInfo.pipes;
3ddb9247 2963
cf8cda18
RG
2964 ssize_t written = write(tps.writeQueriesToThread, &tmsg, sizeof(tmsg));
2965 if (written > 0) {
2966 if (static_cast<size_t>(written) != sizeof(tmsg)) {
2967 delete tmsg;
2968 unixDie("write to thread pipe returned wrong size or error");
2969 }
2970 }
2971 else {
2972 int error = errno;
cf8cda18 2973 if (error == EAGAIN || error == EWOULDBLOCK) {
592d7ade 2974 return false;
cf8cda18 2975 } else {
592d7ade 2976 delete tmsg;
17634427 2977 unixDie("write to thread pipe returned wrong size or error:" + std::to_string(error));
cf8cda18 2978 }
b841314c 2979 }
592d7ade 2980
144040be
RG
2981 ++targetInfo.numberOfDistributedQueries;
2982
592d7ade
RG
2983 return true;
2984}
2985
144040be
RG
2986static unsigned int getWorkerLoad(size_t workerIdx)
2987{
2988 const auto mt = s_threadInfos[/* skip handler */ 1 + g_numDistributorThreads + workerIdx].mt;
2989 if (mt != nullptr) {
2990 return mt->numProcesses();
2991 }
2992 return 0;
2993}
2994
2995static unsigned int selectWorker(unsigned int hash)
2996{
2997 if (s_balancingFactor == 0) {
2998 return /* skip handler */ 1 + g_numDistributorThreads + (hash % g_numWorkerThreads);
2999 }
3000
3001 /* we start with one, representing the query we are currently handling */
3002 double currentLoad = 1;
3003 std::vector<unsigned int> load(g_numWorkerThreads);
3004 for (size_t idx = 0; idx < g_numWorkerThreads; idx++) {
3005 load[idx] = getWorkerLoad(idx);
3006 currentLoad += load[idx];
3007 // cerr<<"load for worker "<<idx<<" is "<<load[idx]<<endl;
3008 }
3009
3010 double targetLoad = (currentLoad / g_numWorkerThreads) * s_balancingFactor;
3011 // cerr<<"total load is "<<currentLoad<<", number of workers is "<<g_numWorkerThreads<<", target load is "<<targetLoad<<endl;
3012
3013 unsigned int worker = hash % g_numWorkerThreads;
1b9d2d46 3014 /* at least one server has to be at or below the average load */
596bf482
RG
3015 if (load[worker] > targetLoad) {
3016 ++g_stats.rebalancedQueries;
3017 do {
3018 // cerr<<"worker "<<worker<<" is above the target load, selecting another one"<<endl;
3019 worker = (worker + 1) % g_numWorkerThreads;
3020 }
3021 while(load[worker] > targetLoad);
144040be
RG
3022 }
3023
3024 return /* skip handler */ 1 + g_numDistributorThreads + worker;
3025}
3026
592d7ade
RG
3027// This function is only called by the distributor threads, when pdns-distributes-queries is set
3028void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
3029{
3030 if (!isDistributorThread()) {
3031 g_log<<Logger::Error<<"distributeAsyncFunction() has been called by a worker ("<<t_id<<")"<<endl;
3032 exit(1);
3033 }
3034
3035 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
144040be 3036 unsigned int target = selectWorker(hash);
592d7ade
RG
3037
3038 ThreadMSG* tmsg = new ThreadMSG();
3039 tmsg->func = func;
3040 tmsg->wantAnswer = false;
3041
3042 if (!trySendingQueryToWorker(target, tmsg)) {
3043 /* if this function failed but did not raise an exception, it means that the pipe
3044 was full, let's try another one */
3045 unsigned int newTarget = 0;
3046 do {
3047 newTarget = /* skip handler */ 1 + g_numDistributorThreads + dns_random(g_numWorkerThreads);
3048 } while (newTarget == target);
3049
3050 if (!trySendingQueryToWorker(newTarget, tmsg)) {
3051 g_stats.queryPipeFullDrops++;
3052 delete tmsg;
3053 }
3054 }
00c9b8c1 3055}
3427fa8a 3056
d187038c 3057static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
49a699c4 3058{
f26bf547 3059 ThreadMSG* tmsg = nullptr;
3ddb9247 3060
cf8cda18 3061 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread || fd == readQueriesToThread
49a699c4
BH
3062 unixDie("read from thread pipe returned wrong size or error");
3063 }
3ddb9247 3064
2f22827a 3065 void *resp=0;
3066 try {
3067 resp = tmsg->func();
3068 }
3069 catch(std::exception& e) {
6d2010a8 3070 if(g_logCommonErrors)
e6a9dde5 3071 g_log<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2f22827a 3072 }
3073 catch(PDNSException& e) {
6d2010a8 3074 if(g_logCommonErrors)
e6a9dde5 3075 g_log<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2f22827a 3076 }
d7c676a5 3077 if(tmsg->wantAnswer) {
b243ca3b
RG
3078 const auto& threadInfo = s_threadInfos.at(t_id);
3079 if(write(threadInfo.pipes.writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
d7c676a5 3080 delete tmsg;
00c9b8c1 3081 unixDie("write to thread pipe returned wrong size or error");
d7c676a5
RG
3082 }
3083 }
3ddb9247 3084
00c9b8c1 3085 delete tmsg;
49a699c4 3086}
09e6702a 3087
13034931
BH
3088template<class T> void *voider(const boost::function<T*()>& func)
3089{
3090 return func();
3091}
3092
b3b5459d
BH
3093vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
3094{
3095 a.insert(a.end(), b.begin(), b.end());
3096 return a;
3097}
3098
92011b8f 3099vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
3100{
3101 a.insert(a.end(), b.begin(), b.end());
3102 return a;
3103}
3104
3ddb9247
PD
3105vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
3106{
3107 a.insert(a.end(), b.begin(), b.end());
3108 return a;
3109}
3110
92011b8f 3111
387b9ca6
RG
3112/*
3113 This function should only be called by the handler to gather metrics, wipe the cache,
788eeb4c
RG
3114 reload the Lua script (not the Lua config) or change the current trace regex,
3115 and by the SNMP thread to gather metrics. */
b4e76a18 3116template<class T> T broadcastAccFunction(const boost::function<T*()>& func)
3427fa8a 3117{
b243ca3b 3118 if (!isHandlerThread()) {
788eeb4c 3119 g_log<<Logger::Error<<"broadcastAccFunction has been called by a worker ("<<t_id<<")"<<endl;
d77abca1 3120 exit(1);
d77abca1
RG
3121 }
3122
b243ca3b 3123 unsigned int n = 0;
3427fa8a 3124 T ret=T();
b243ca3b
RG
3125 for (const auto& threadInfo : s_threadInfos) {
3126 if (n++ == t_id) {
3127 continue;
3128 }
3129
3130 const auto& tps = threadInfo.pipes;
00c9b8c1
BH
3131 ThreadMSG* tmsg = new ThreadMSG();
3132 tmsg->func = boost::bind(voider<T>, func);
3133 tmsg->wantAnswer = true;
3ddb9247 3134
b841314c
RG
3135 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
3136 delete tmsg;
3427fa8a 3137 unixDie("write to thread pipe returned wrong size or error");
b841314c 3138 }
3ddb9247 3139
49467864 3140 T* resp = nullptr;
3427fa8a
BH
3141 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
3142 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 3143
3427fa8a 3144 if(resp) {
3427fa8a
BH
3145 ret += *resp;
3146 delete resp;
49467864 3147 resp = nullptr;
3427fa8a
BH
3148 }
3149 }
3150 return ret;
3151}
3152
b4e76a18
RG
3153template string broadcastAccFunction(const boost::function<string*()>& fun); // explicit instantiation
3154template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun); // explicit instantiation
3155template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun); // explicit instantiation
3156template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun); // explicit instantiation
5ac6d761 3157template ThreadTimes broadcastAccFunction(const boost::function<ThreadTimes*()>& fun);
3427fa8a 3158
d187038c 3159static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3160{
fbfc1809
RG
3161 try {
3162 string remote;
3163 string msg=s_rcc.recv(&remote);
3164 RecursorControlParser rcp;
3165 RecursorControlParser::func_t* command;
3ddb9247 3166
fbfc1809 3167 string answer=rcp.getAnswer(msg, &command);
f0f3f0b0 3168
fbfc1809
RG
3169 // If we are inside a chroot, we need to strip
3170 if (!arg()["chroot"].empty()) {
3171 size_t len = arg()["chroot"].length();
3172 remote = remote.substr(len);
3173 }
f0f3f0b0 3174
ab5c053d
BH
3175 s_rcc.send(answer, &remote);
3176 command();
3177 }
fbfc1809 3178 catch(const std::exception& e) {
e6a9dde5 3179 g_log<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
ab5c053d 3180 }
fbfc1809 3181 catch(const PDNSException& ae) {
e6a9dde5 3182 g_log<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
ab5c053d 3183 }
09e6702a
BH
3184}
3185
d187038c 3186static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3187{
0b18b22e 3188 PacketID* pident=any_cast<PacketID>(&var);
667f7e60 3189 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
09e6702a 3190
667f7e60 3191 shared_array<char> buffer(new char[pident->inNeeded]);
09e6702a 3192
a683e8bd 3193 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
09e6702a 3194 if(ret > 0) {
667f7e60 3195 pident->inMSG.append(&buffer[0], &buffer[ret]);
a683e8bd 3196 pident->inNeeded-=(size_t)ret;
825fa717 3197 if(!pident->inNeeded || pident->inIncompleteOkay) {
667f7e60
BH
3198 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
3199 PacketID pid=*pident;
3200 string msg=pident->inMSG;
3ddb9247 3201
bb4bdbaf 3202 t_fdm->removeReadFD(fd);
3ddb9247 3203 MT->sendEvent(pid, &msg);
09e6702a
BH
3204 }
3205 else {
667f7e60 3206 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
09e6702a
BH
3207 }
3208 }
3209 else {
667f7e60 3210 PacketID tmp=*pident;
bb4bdbaf 3211 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
09e6702a
BH
3212 string empty;
3213 MT->sendEvent(tmp, &empty); // this conveys error status
3214 }
3215}
3216
d187038c 3217static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3218{
0b18b22e 3219 PacketID* pid=any_cast<PacketID>(&var);
a683e8bd 3220 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
09e6702a 3221 if(ret > 0) {
a683e8bd 3222 pid->outPos+=(ssize_t)ret;
667f7e60
BH
3223 if(pid->outPos==pid->outMSG.size()) {
3224 PacketID tmp=*pid;
bb4bdbaf 3225 t_fdm->removeWriteFD(fd);
09e6702a
BH
3226 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
3227 }
3228 }
3229 else { // error or EOF
667f7e60 3230 PacketID tmp(*pid);
bb4bdbaf 3231 t_fdm->removeWriteFD(fd);
09e6702a 3232 string sent;
998a4334 3233 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
09e6702a
BH
3234 }
3235}
3236
34801ab1 3237// resend event to everybody chained onto it
d187038c 3238static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
34801ab1
BH
3239{
3240 if(iter->key.chain.empty())
3241 return;
e27e91a8 3242 // cerr<<"doResends called!\n";
34801ab1
BH
3243 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
3244 resend.fd=-1;
3245 resend.id=*i;
e27e91a8 3246 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
4665c31e 3247
34801ab1
BH
3248 MT->sendEvent(resend, &content);
3249 g_stats.chainResends++;
34801ab1
BH
3250 }
3251}
3252
d187038c 3253static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3254{
600fc20b 3255 PacketID pid=any_cast<PacketID>(var);
a683e8bd 3256 ssize_t len;
fae8fe07
RG
3257 std::string packet;
3258 packet.resize(g_outgoingEDNSBufsize);
996c89cc 3259 ComboAddress fromaddr;
09e6702a
BH
3260 socklen_t addrlen=sizeof(fromaddr);
3261
fae8fe07 3262 len=recvfrom(fd, &packet.at(0), packet.size(), 0, (sockaddr *)&fromaddr, &addrlen);
c1da7976 3263
a683e8bd 3264 if(len < (ssize_t) sizeof(dnsheader)) {
998a4334 3265 if(len < 0)
996c89cc 3266 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
09e6702a 3267 else {
3ddb9247 3268 g_stats.serverParseError++;
09e6702a 3269 if(g_logCommonErrors)
e6a9dde5 3270 g_log<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
e44d9fa7 3271 ": packet smaller than DNS header"<<endl;
998a4334 3272 }
34801ab1 3273
49a699c4 3274 t_udpclientsocks->returnSocket(fd);
34801ab1
BH
3275 string empty;
3276
3277 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3ddb9247 3278 if(iter != MT->d_waiters.end())
34801ab1 3279 doResends(iter, pid, empty);
3ddb9247 3280
34801ab1 3281 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
998a4334 3282 return;
3ddb9247 3283 }
998a4334 3284
fae8fe07 3285 packet.resize(len);
998a4334 3286 dnsheader dh;
fae8fe07 3287 memcpy(&dh, &packet.at(0), sizeof(dh));
3ddb9247 3288
6da3b3ad
PD
3289 PacketID pident;
3290 pident.remote=fromaddr;
3291 pident.id=dh.id;
3292 pident.fd=fd;
34801ab1 3293
33a928af 3294 if(!dh.qr && g_logCommonErrors) {
e6a9dde5 3295 g_log<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
6da3b3ad
PD
3296 }
3297
3298 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
3299 !dh.qr) { // one weird server
3300 pident.domain.clear();
3301 pident.type = 0;
3302 }
3303 else {
3304 try {
0b31e67e 3305 if(len > 12)
fae8fe07 3306 pident.domain=DNSName(&packet.at(0), len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
6da3b3ad
PD
3307 }
3308 catch(std::exception& e) {
3309 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
e6a9dde5 3310 g_log<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
6da3b3ad 3311 return;
34801ab1 3312 }
6da3b3ad 3313 }
34801ab1 3314
6da3b3ad
PD
3315 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
3316 if(iter != MT->d_waiters.end()) {
3317 doResends(iter, pident, packet);
3318 }
c1da7976 3319
6da3b3ad 3320retryWithName:
4957a608 3321
6da3b3ad
PD
3322 if(!MT->sendEvent(pident, &packet)) {
3323 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
3324 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
3325 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
e325f20c 3326 pident.domain == mthread->key.domain) {
6da3b3ad 3327 mthread->key.nearMisses++;
998a4334 3328 }
6da3b3ad
PD
3329
3330 // be a bit paranoid here since we're weakening our matching
3ddb9247 3331 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
6da3b3ad
PD
3332 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
3333 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
3334 pident.domain = mthread->key.domain;
3335 pident.type = mthread->key.type;
3336 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
d4fb76e9 3337 }
09e6702a 3338 }
6da3b3ad
PD
3339 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
3340 if(g_logCommonErrors) {
e6a9dde5 3341 g_log<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
d8f6d49f 3342 }
09e6702a 3343 }
6da3b3ad
PD
3344 else if(fd >= 0) {
3345 t_udpclientsocks->returnSocket(fd);
3346 }
09e6702a
BH
3347}
3348
1f4abb20
BH
3349FDMultiplexer* getMultiplexer()
3350{
3351 FDMultiplexer* ret;
f26bf547 3352 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
1f4abb20 3353 try {
f26bf547 3354 ret=i.second();
1f4abb20
BH
3355 return ret;
3356 }
98d0ee4a 3357 catch(FDMultiplexerException &fe) {
e6a9dde5 3358 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
98d0ee4a
BH
3359 }
3360 catch(...) {
e6a9dde5 3361 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
98d0ee4a 3362 }
1f4abb20 3363 }
e6a9dde5 3364 g_log<<Logger::Error<<"No working multiplexer found!"<<endl;
1f4abb20
BH
3365 exit(1);
3366}
3367
3ddb9247 3368
d187038c 3369static string* doReloadLuaScript()
4485aa35 3370{
674cf0f6 3371 string fname= ::arg()["lua-dns-script"];
4485aa35 3372 try {
674cf0f6 3373 if(fname.empty()) {
f26bf547 3374 t_pdl.reset();
377602e3 3375 g_log<<Logger::Info<<t_id<<" Unloaded current lua script"<<endl;
0f39c1a3 3376 return new string("unloaded\n");
4485aa35
BH
3377 }
3378 else {
9694e14f
AT
3379 t_pdl = std::make_shared<RecursorLua4>();
3380 t_pdl->loadFile(fname);
4485aa35
BH
3381 }
3382 }
fdbf35ac 3383 catch(std::exception& e) {
e6a9dde5 3384 g_log<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
0f39c1a3 3385 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
4485aa35 3386 }
3ddb9247 3387
e6a9dde5 3388 g_log<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
0f39c1a3 3389 return new string("(re)loaded '"+fname+"'\n");
4485aa35
BH
3390}
3391
49a699c4
BH
3392string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3393{
3ddb9247 3394 if(begin != end)
49a699c4 3395 ::arg().set("lua-dns-script") = *begin;
3ddb9247 3396
0f39c1a3 3397 return broadcastAccFunction<string>(doReloadLuaScript);
3ddb9247 3398}
49a699c4 3399
d187038c 3400static string* pleaseUseNewTraceRegex(const std::string& newRegex)
77499b05
BH
3401try
3402{
3403 if(newRegex.empty()) {
f26bf547 3404 t_traceRegex.reset();
77499b05
BH
3405 return new string("unset\n");
3406 }
3407 else {
f26bf547 3408 t_traceRegex = std::make_shared<Regex>(newRegex);
77499b05
BH
3409 return new string("ok\n");
3410 }
3411}
3f81d239 3412catch(PDNSException& ae)
77499b05
BH
3413{
3414 return new string(ae.reason+"\n");
3415}
3416
3417string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3418{
3419 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
3420}
3421
4e9a20e6 3422static void checkLinuxIPv6Limits()
3423{
3424#ifdef __linux__
3425 string line;
3426 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
335da0ba 3427 int lim=std::stoi(line);
4e9a20e6 3428 if(lim < 16384) {
e6a9dde5 3429 g_log<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
4e9a20e6 3430 }
3431 }
3432#endif
3433}
36849ff2 3434static void checkOrFixFDS()
4e9a20e6 3435{
c0063e60 3436 unsigned int availFDs=getFilenumLimit();
3437 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
3438
3439 if(wantFDs > availFDs) {
067ad20e 3440 unsigned int hardlimit= getFilenumLimit(true);
3441 if(hardlimit >= wantFDs) {
c0063e60 3442 setFilenumLimit(wantFDs);
e6a9dde5 3443 g_log<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
36849ff2 3444 }
3445 else {
067ad20e 3446 int newval = (hardlimit - 25) / g_numWorkerThreads;
e6a9dde5 3447 g_log<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
36849ff2 3448 g_maxMThreads = newval;
067ad20e 3449 setFilenumLimit(hardlimit);
36849ff2 3450 }
3451 }
4e9a20e6 3452}
77499b05 3453
c390b2da 3454static void* recursorThread(unsigned int tid, const string& threadName);
51e2144e 3455
f26bf547 3456static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
49a699c4
BH
3457{
3458 t_allowFrom = ng;
f26bf547 3459 return nullptr;
49a699c4
BH
3460}
3461
dbd23fc2
BH
3462int g_argc;
3463char** g_argv;
3464
18af64a8 3465void parseACLs()
f7c1d4e3 3466{
18af64a8 3467 static bool l_initialized;
3ddb9247 3468
49a699c4 3469 if(l_initialized) { // only reload configuration file on second call
18af64a8 3470 string configname=::arg()["config-dir"]+"/recursor.conf";
3e63da83
JR
3471 if(::arg()["config-name"]!="") {
3472 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3473 }
18af64a8 3474 cleanSlashes(configname);
3ddb9247
PD
3475
3476 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
7e818521 3477 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
49a699c4 3478 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
242b90e1 3479 ::arg().preParseFile(configname.c_str(), "include-dir");
829849d6
AT
3480 ::arg().preParse(g_argc, g_argv, "include-dir");
3481
3482 // then process includes
3483 std::vector<std::string> extraConfigs;
242b90e1
AT
3484 ::arg().gatherIncludes(extraConfigs);
3485
1dc8f4d0 3486 for(const std::string& fn : extraConfigs) {
7e818521 3487 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
3488 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
3489 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
3490 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
829849d6 3491 }
ca2c884c
AT
3492
3493 ::arg().preParse(g_argc, g_argv, "allow-from-file");
3494 ::arg().preParse(g_argc, g_argv, "allow-from");
f27e6356 3495 }
49a699c4 3496
f26bf547
RG
3497 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
3498 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
3ddb9247 3499
2c95fc65
BH
3500 if(!::arg()["allow-from-file"].empty()) {
3501 string line;
2c95fc65
BH
3502 ifstream ifs(::arg()["allow-from-file"].c_str());
3503 if(!ifs) {
9c61b9d0 3504 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2c95fc65
BH
3505 }
3506
3507 string::size_type pos;
3508 while(getline(ifs,line)) {
3509 pos=line.find('#');
3510 if(pos!=string::npos)
3511 line.resize(pos);
3512 trim(line);
3513 if(line.empty())
3514 continue;
3515
18af64a8 3516 allowFrom->addMask(line);
2c95fc65 3517 }
e6a9dde5 3518 g_log<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2c95fc65
BH
3519 }
3520 else if(!::arg()["allow-from"].empty()) {
f7c1d4e3
BH
3521 vector<string> ips;
3522 stringtok(ips, ::arg()["allow-from"], ", ");
3ddb9247 3523
e6a9dde5 3524 g_log<<Logger::Warning<<"Only allowing queries from: ";
f7c1d4e3 3525 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
18af64a8 3526 allowFrom->addMask(*i);
f7c1d4e3 3527 if(i!=ips.begin())
e6a9dde5
PL
3528 g_log<<Logger::Warning<<", ";
3529 g_log<<Logger::Warning<<*i;
f7c1d4e3 3530 }
e6a9dde5 3531 g_log<<Logger::Warning<<endl;
f7c1d4e3 3532 }
49a699c4 3533 else {
3ddb9247 3534 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
377602e3 3535 g_log<<Logger::Warning<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
f26bf547 3536 allowFrom = nullptr;
49a699c4 3537 }
3ddb9247 3538
49a699c4 3539 g_initialAllowFrom = allowFrom;
d7dae798 3540 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
f26bf547 3541 oldAllowFrom = nullptr;
3ddb9247 3542
49a699c4 3543 l_initialized = true;
18af64a8
BH
3544}
3545
795215f2 3546
756e82cf 3547static void setupDelegationOnly()
3548{
3549 vector<string> parts;
3550 stringtok(parts, ::arg()["delegation-only"], ", \t");
3551 for(const auto& p : parts) {
9065eb05 3552 SyncRes::addDelegationOnly(DNSName(p));
756e82cf 3553 }
3554}
795215f2 3555
8fd25133
RG
3556static std::map<unsigned int, std::set<int> > parseCPUMap()
3557{
3558 std::map<unsigned int, std::set<int> > result;
3559
3560 const std::string value = ::arg()["cpu-map"];
3561
3562 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
e6a9dde5 3563 g_log<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
8fd25133
RG
3564 return result;
3565 }
3566
3567 std::vector<std::string> parts;
3568
3569 stringtok(parts, value, " \t");
3570
3571 for(const auto& part : parts) {
3572 if (part.find('=') == string::npos)
3573 continue;
3574
3575 try {
3576 auto headers = splitField(part, '=');
3577 trim(headers.first);
3578 trim(headers.second);
3579
3580 unsigned int threadId = pdns_stou(headers.first);
3581 std::vector<std::string> cpus;
3582
3583 stringtok(cpus, headers.second, ",");
3584
3585 for(const auto& cpu : cpus) {
3586 int cpuId = std::stoi(cpu);
3587
3588 result[threadId].insert(cpuId);
3589 }
3590 }
3591 catch(const std::exception& e) {
e6a9dde5 3592 g_log<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
8fd25133
RG
3593 }
3594 }
3595
3596 return result;
3597}
3598
3599static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
3600{
3601 const auto& cpuMapping = cpusMap.find(n);
3602 if (cpuMapping != cpusMap.cend()) {
3603 int rc = mapThreadToCPUList(tid, cpuMapping->second);
3604 if (rc == 0) {
e6a9dde5 3605 g_log<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
8fd25133 3606 for (const auto cpu : cpuMapping->second) {
e6a9dde5 3607 g_log<<Logger::Info<<" "<<cpu;
8fd25133 3608 }
e6a9dde5 3609 g_log<<Logger::Info<<endl;
8fd25133
RG
3610 }
3611 else {
e6a9dde5 3612 g_log<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
8fd25133 3613 for (const auto cpu : cpuMapping->second) {
e6a9dde5 3614 g_log<<Logger::Info<<" "<<cpu;
8fd25133 3615 }
e6a9dde5 3616 g_log<<Logger::Info<<strerror(rc)<<endl;
8fd25133
RG
3617 }
3618 }
3619}
3620
af1377b7
NC
3621#ifdef NOD_ENABLED
3622static void setupNODThread()
3623{
3624 if (g_nodEnabled) {
b78727c6
NC
3625 uint32_t num_cells = ::arg().asNum("new-domain-db-size");
3626 t_nodDBp = std::make_shared<nod::NODDB>(num_cells);
af1377b7
NC
3627 try {
3628 t_nodDBp->setCacheDir(::arg()["new-domain-history-dir"]);
3629 }
3630 catch (const PDNSException& e) {
3631 g_log<<Logger::Error<<"new-domain-history-dir (" << ::arg()["new-domain-history-dir"] << ") is not readable or does not exist"<<endl;
3632 _exit(1);
3633 }
3634 if (!t_nodDBp->init()) {
3635 g_log<<Logger::Error<<"Could not initialize domain tracking"<<endl;
3636 _exit(1);
3637 }
41c542ec 3638 std::thread t(nod::NODDB::startHousekeepingThread, t_nodDBp, std::this_thread::get_id());
af1377b7 3639 t.detach();
ca2526f5 3640 g_nod_pbtag = ::arg()["new-domain-pb-tag"];
41c542ec
NC
3641 }
3642 if (g_udrEnabled) {
b78727c6
NC
3643 uint32_t num_cells = ::arg().asNum("unique-response-db-size");
3644 t_udrDBp = std::make_shared<nod::UniqueResponseDB>(num_cells);
41c542ec
NC
3645 try {
3646 t_udrDBp->setCacheDir(::arg()["unique-response-history-dir"]);
3647 }
3648 catch (const PDNSException& e) {
3649 g_log<<Logger::Error<<"unique-response-history-dir (" << ::arg()["unique-response-history-dir"] << ") is not readable or does not exist"<<endl;
3650 _exit(1);
3651 }
3652 if (!t_udrDBp->init()) {
3653 g_log<<Logger::Error<<"Could not initialize unique response tracking"<<endl;
3654 _exit(1);
3655 }
3656 std::thread t(nod::UniqueResponseDB::startHousekeepingThread, t_udrDBp, std::this_thread::get_id());
af1377b7 3657 t.detach();
ca2526f5 3658 g_udr_pbtag = ::arg()["unique-response-pb-tag"];
af1377b7
NC
3659 }
3660}
3661
3662void parseNODWhitelist(const std::string& wlist)
3663{
3664 vector<string> parts;
3665 stringtok(parts, wlist, ",; ");
3666 for(const auto& a : parts) {
3667 g_nodDomainWL.add(DNSName(a));
3668 }
3669}
3670
3671static void setupNODGlobal()
3672{
3673 // Setup NOD subsystem
3674 g_nodEnabled = ::arg().mustDo("new-domain-tracking");
3675 g_nodLookupDomain = DNSName(::arg()["new-domain-lookup"]);
3676 g_nodLog = ::arg().mustDo("new-domain-log");
3677 parseNODWhitelist(::arg()["new-domain-whitelist"]);
41c542ec
NC
3678
3679 // Setup Unique DNS Response subsystem
3680 g_udrEnabled = ::arg().mustDo("unique-response-tracking");
3681 g_udrLog = ::arg().mustDo("unique-response-log");
af1377b7
NC
3682}
3683#endif /* NOD_ENABLED */
3684
d187038c 3685static int serviceMain(int argc, char*argv[])
18af64a8 3686{
e6a9dde5
PL
3687 g_log.setName(s_programname);
3688 g_log.disableSyslog(::arg().mustDo("disable-syslog"));
3689 g_log.setTimestamps(::arg().mustDo("log-timestamp"));
18af64a8
BH
3690
3691 if(!::arg()["logging-facility"].empty()) {
f8499e52
BH
3692 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
3693 if(val >= 0)
e6a9dde5 3694 g_log.setFacility(val);
18af64a8 3695 else
e6a9dde5 3696 g_log<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
18af64a8
BH
3697 }
3698
ba1a571d 3699 showProductVersion();
3afde9b2 3700
06ea9015 3701 g_disthashseed=dns_random(0xffffffff);
3702
b7ef5828
PL
3703 checkLinuxIPv6Limits();
3704 try {
3705 vector<string> addrs;
3706 if(!::arg()["query-local-address6"].empty()) {
3707 SyncRes::s_doIPv6=true;
e6a9dde5 3708 g_log<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
b7ef5828
PL
3709
3710 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
3711 for(const string& addr : addrs) {
3712 g_localQueryAddresses6.push_back(ComboAddress(addr));
3713 }
3714 }
3715 else {
e6a9dde5 3716 g_log<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
b7ef5828
PL
3717 }
3718 addrs.clear();
3719 stringtok(addrs, ::arg()["query-local-address"], ", ;");
3720 for(const string& addr : addrs) {
3721 g_localQueryAddresses4.push_back(ComboAddress(addr));
3722 }
3723 }
3724 catch(std::exception& e) {
e6a9dde5 3725 g_log<<Logger::Error<<"Assigning local query addresses: "<<e.what();
b7ef5828
PL
3726 exit(99);
3727 }
3728
e48c6b8a
PL
3729 // keep this ABOVE loadRecursorLuaConfig!
3730 if(::arg()["dnssec"]=="off")
3731 g_dnssecmode=DNSSECMode::Off;
3732 else if(::arg()["dnssec"]=="process-no-validate")
3733 g_dnssecmode=DNSSECMode::ProcessNoValidate;
3734 else if(::arg()["dnssec"]=="process")
3735 g_dnssecmode=DNSSECMode::Process;
3736 else if(::arg()["dnssec"]=="validate")
3737 g_dnssecmode=DNSSECMode::ValidateAll;
3738 else if(::arg()["dnssec"]=="log-fail")
3739 g_dnssecmode=DNSSECMode::ValidateForLog;
3740 else {
e6a9dde5 3741 g_log<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
e48c6b8a
PL
3742 exit(1);
3743 }
3744
9a3ab3e4
KM
3745 g_signatureInceptionSkew = ::arg().asNum("signature-inception-skew");
3746 if (g_signatureInceptionSkew < 0) {
3747 g_log<<Logger::Error<<"A negative value for 'signature-inception-skew' is not allowed"<<endl;
3748 exit(1);
3749 }
3750
e48c6b8a 3751 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
d377bb54 3752 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
e48c6b8a 3753
a6f7f5fe 3754 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
3755 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
e6ec15bf
RG
3756
3757 luaConfigDelayedThreads delayedLuaThreads;
0f5785a6 3758 try {
e6ec15bf 3759 loadRecursorLuaConfig(::arg()["lua-config-file"], delayedLuaThreads);
0f5785a6
PL
3760 }
3761 catch (PDNSException &e) {
e6a9dde5 3762 g_log<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
0f5785a6
PL
3763 exit(1);
3764 }
ad42489c 3765
18af64a8 3766 parseACLs();
d6f3fcfa 3767 initPublicSuffixList(::arg()["public-suffix-list-file"]);
92011b8f 3768
eb5bae86 3769 if(!::arg()["dont-query"].empty()) {
eb5bae86
BH
3770 vector<string> ips;
3771 stringtok(ips, ::arg()["dont-query"], ", ");
66e0b6ea
BH
3772 ips.push_back("0.0.0.0");
3773 ips.push_back("::");
c36bc97a 3774
e6a9dde5 3775 g_log<<Logger::Warning<<"Will not send queries to: ";
eb5bae86 3776 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
9065eb05 3777 SyncRes::addDontQuery(*i);
eb5bae86 3778 if(i!=ips.begin())
e6a9dde5
PL
3779 g_log<<Logger::Warning<<", ";
3780 g_log<<Logger::Warning<<*i;
eb5bae86 3781 }
e6a9dde5 3782 g_log<<Logger::Warning<<endl;
eb5bae86
BH
3783 }
3784
f7c1d4e3 3785 g_quiet=::arg().mustDo("quiet");
3ddb9247 3786
b243ca3b 3787 /* this needs to be done before parseACLs(), which call broadcastFunction() */
1bc3c142
BH
3788 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
3789 if(g_weDistributeQueries) {
b243ca3b 3790 g_log<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
1bc3c142 3791 }
3ddb9247 3792
756e82cf 3793 setupDelegationOnly();
b33c2462 3794 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
756e82cf 3795
77499b05
BH
3796 if(::arg()["trace"]=="fail") {
3797 SyncRes::setDefaultLogMode(SyncRes::Store);
3798 }
3799 else if(::arg().mustDo("trace")) {
3800 SyncRes::setDefaultLogMode(SyncRes::Log);
f7c1d4e3
BH
3801 ::arg().set("quiet")="no";
3802 g_quiet=false;
3e9c6c0a 3803 g_dnssecLOG=true;
f7c1d4e3 3804 }
43a9b290
PL
3805 string myHostname = getHostname();
3806 if (myHostname == "UNKNOWN"){
3807 g_log<<Logger::Warning<<"Unable to get the hostname, NSID and id.server values will be empty"<<endl;
3808 myHostname = "";
d0983bff 3809 }
3ddb9247 3810
aadceba8 3811 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
5cf4b2e7 3812 SyncRes::s_minimumECSTTL = ::arg().asNum("ecs-minimum-ttl-override");
aadceba8 3813
1051f8a9
BH
3814 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
3815
f7c1d4e3 3816 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
b9473937 3817 SyncRes::s_maxbogusttl=::arg().asNum("max-cache-bogus-ttl");
63637fd8 3818 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
1051f8a9 3819 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
79ec0627
PL
3820 // Cap the packetcache-servfail-ttl to the packetcache-ttl
3821 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
3822 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
628e2c7b
PA
3823 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
3824 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
f7c1d4e3 3825 SyncRes::s_serverID=::arg()["server-id"];
173d790e 3826 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
9de3e034 3827 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
7c3398aa 3828 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
01402d56 3829 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
f7c1d4e3 3830 if(SyncRes::s_serverID.empty()) {
d0983bff 3831 SyncRes::s_serverID = myHostname;
f7c1d4e3 3832 }
3ddb9247 3833
e9f9b8ec
RG
3834 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
3835 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
c9783016 3836 SyncRes::clearECSStats();
fd8898fb 3837 SyncRes::s_ecsipv4cachelimit = ::arg().asNum("ecs-ipv4-cache-bits");
3838 SyncRes::s_ecsipv6cachelimit = ::arg().asNum("ecs-ipv6-cache-bits");
ed9019c9 3839 SyncRes::s_ecscachelimitttl = ::arg().asNum("ecs-cache-limit-ttl");
e9f9b8ec 3840
8a3a3822
RG
3841 if (!::arg().isEmpty("ecs-scope-zero-address")) {
3842 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
3843 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
3844 }
3845 else {
3846 bool found = false;
3847 for (const auto& addr : g_localQueryAddresses4) {
3848 if (!IsAnyAddress(addr)) {
3849 SyncRes::setECSScopeZeroAddress(Netmask(addr, 32));
3850 found = true;
3851 break;
3852 }
3853 }
3854 if (!found) {
3855 for (const auto& addr : g_localQueryAddresses6) {
3856 if (!IsAnyAddress(addr)) {
3857 SyncRes::setECSScopeZeroAddress(Netmask(addr, 128));
3858 found = true;
3859 break;
3860 }
3861 }
3862 if (!found) {
3863 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
3864 }
3865 }
3866 }
3867
2fe3354d
CH
3868 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
3869 SyncRes::parseEDNSSubnetAddFor(::arg()["ecs-add-for"]);
3870 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
3871
5cc8371b 3872 g_XPFAcl.toMasks(::arg()["xpf-allow-from"]);
59cb4a79 3873 g_xpfRRCode = ::arg().asNum("xpf-rr-code");
5cc8371b 3874
5b0ddd18 3875 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
bb4bdbaf 3876
49a699c4 3877 g_initialDomainMap = parseAuthAndForwards();
3ddb9247 3878
08f3f638 3879 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3ddb9247 3880
f7c1d4e3 3881 g_logCommonErrors=::arg().mustDo("log-common-errors");
98d36505 3882 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
e661a20b
PD
3883
3884 g_anyToTcp = ::arg().mustDo("any-to-tcp");
a09a8ce0
PD
3885 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
3886
b3adda56
PD
3887 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
3888
b243ca3b 3889 g_numDistributorThreads = ::arg().asNum("distributor-threads");
810ff705 3890 g_numWorkerThreads = ::arg().asNum("threads");
1c4f2e1b 3891 if (g_numWorkerThreads < 1) {
e6a9dde5 3892 g_log<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
1c4f2e1b
RG
3893 g_numWorkerThreads = 1;
3894 }
3895
b243ca3b 3896 g_numThreads = g_numDistributorThreads + g_numWorkerThreads;
810ff705
RG
3897 g_maxMThreads = ::arg().asNum("max-mthreads");
3898
00b8cadc
RG
3899 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
3900
0ec489bf 3901 g_statisticsInterval = ::arg().asNum("statistics-interval");
3902
559b6c93
PL
3903 {
3904 SuffixMatchNode dontThrottleNames;
3905 vector<string> parts;
3906 stringtok(parts, ::arg()["dont-throttle-names"]);
3907 for (const auto &p : parts) {
3908 dontThrottleNames.add(DNSName(p));
3909 }
3910 g_dontThrottleNames.setState(dontThrottleNames);
3911
3912 NetmaskGroup dontThrottleNetmasks;
3913 stringtok(parts, ::arg()["dont-throttle-netmasks"]);
3914 for (const auto &p : parts) {
3915 dontThrottleNetmasks.addMask(Netmask(p));
3916 }
3917 g_dontThrottleNetmasks.setState(dontThrottleNetmasks);
3918 }
3919
144040be 3920 s_balancingFactor = ::arg().asDouble("distribution-load-factor");
078be17f
RG
3921 if (s_balancingFactor != 0.0 && s_balancingFactor < 1.0) {
3922 s_balancingFactor = 0.0;
3923 g_log<<Logger::Warning<<"Asked to run with a distribution-load-factor below 1.0, disabling it instead"<<endl;
3924 }
144040be 3925
810ff705
RG
3926#ifdef SO_REUSEPORT
3927 g_reusePort = ::arg().mustDo("reuseport");
3928#endif
3929
b243ca3b 3930 s_threadInfos.resize(g_numDistributorThreads + g_numWorkerThreads + /* handler */ 1);
810ff705 3931
b243ca3b
RG
3932 if (g_reusePort) {
3933 if (g_weDistributeQueries) {
3934 /* first thread is the handler, then distributors */
3935 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3936 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
adb6cd72 3937 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
b243ca3b 3938 makeUDPServerSockets(deferredAdds);
adb6cd72 3939 makeTCPServerSockets(deferredAdds, tcpSockets);
b243ca3b
RG
3940 }
3941 }
3942 else {
3943 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3944 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3945 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
adb6cd72 3946 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
b243ca3b 3947 makeUDPServerSockets(deferredAdds);
adb6cd72 3948 makeTCPServerSockets(deferredAdds, tcpSockets);
b243ca3b 3949 }
810ff705
RG
3950 }
3951 }
3952 else {
c47f201b 3953 std::set<int> tcpSockets;
b243ca3b
RG
3954 /* we don't have reuseport so we can only open one socket per
3955 listening addr:port and everyone will listen on it */
3956 makeUDPServerSockets(g_deferredAdds);
c47f201b
RG
3957 makeTCPServerSockets(g_deferredAdds, tcpSockets);
3958
3959 /* every listener (so distributor if g_weDistributeQueries, workers otherwise)
3960 needs to listen to the shared sockets */
3961 if (g_weDistributeQueries) {
3962 /* first thread is the handler, then distributors */
3963 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3964 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3965 }
3966 }
3967 else {
3968 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3969 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3970 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3971 }
3972 }
810ff705 3973 }
815099b2 3974
af1377b7
NC
3975#ifdef NOD_ENABLED
3976 // Setup newly observed domain globals
3977 setupNODGlobal();
3978#endif /* NOD_ENABLED */
3979
677e2a46
BH
3980 int forks;
3981 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
1bc3c142
BH
3982 if(!fork()) // we are child
3983 break;
3984 }
3ddb9247 3985
f7c1d4e3 3986 if(::arg().mustDo("daemon")) {
e6a9dde5
PL
3987 g_log<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
3988 g_log.toConsole(Logger::Critical);
f7c1d4e3
BH
3989 daemonize();
3990 }
3991 signal(SIGUSR1,usr1Handler);
3992 signal(SIGUSR2,usr2Handler);
3993 signal(SIGPIPE,SIG_IGN);
810ff705 3994
a6414fdc 3995 checkOrFixFDS();
3ddb9247 3996
d1b28475
KM
3997#ifdef HAVE_LIBSODIUM
3998 if (sodium_init() == -1) {
e6a9dde5 3999 g_log<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
d1b28475
KM
4000 exit(99);
4001 }
4002#endif
4003
3afde9b2
PL
4004 openssl_thread_setup();
4005 openssl_seed();
e97cb679
AT
4006 /* setup rng before chroot */
4007 dns_random_init();
3afde9b2 4008
bdbb07e0 4009 if(::arg()["server-id"].empty()) {
d0983bff 4010 ::arg().set("server-id") = myHostname;
bdbb07e0
PL
4011 }
4012
138435cb
BH
4013 int newgid=0;
4014 if(!::arg()["setgid"].empty())
4015 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
4016 int newuid=0;
4017 if(!::arg()["setuid"].empty())
4018 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
4019
f1d6a7ce
KM
4020 Utility::dropGroupPrivs(newuid, newgid);
4021
138435cb 4022 if (!::arg()["chroot"].empty()) {
75336810
PL
4023#ifdef HAVE_SYSTEMD
4024 char *ns;
4025 ns = getenv("NOTIFY_SOCKET");
4026 if (ns != nullptr) {
e6a9dde5 4027 g_log<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
75336810
PL
4028 exit(1);
4029 }
4030#endif
138435cb 4031 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
e6a9dde5 4032 g_log<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
138435cb
BH
4033 exit(1);
4034 }
f0f3f0b0 4035 else
377602e3 4036 g_log<<Logger::Info<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
138435cb
BH
4037 }
4038
f0f3f0b0
PL
4039 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
4040 if(!s_pidfname.empty())
4041 unlink(s_pidfname.c_str()); // remove possible old pid file
4042 writePid();
4043
4044 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
4045
f1d6a7ce 4046 Utility::dropUserPrivs(newuid);
1f2b341e
RG
4047 try {
4048 /* we might still have capabilities remaining, for example if we have been started as root
4049 without --setuid (please don't do that) or as an unprivileged user with ambient capabilities
4050 like CAP_NET_BIND_SERVICE.
4051 */
4052 dropCapabilities();
4053 }
4054 catch(const std::exception& e) {
4055 g_log<<Logger::Warning<<e.what()<<endl;
4056 }
c0063e60 4057
e6ec15bf
RG
4058 startLuaConfigDelayedThreads(delayedLuaThreads, g_luaconfs.getCopy().generation);
4059
49a699c4 4060 makeThreadPipes();
3ddb9247 4061
5d4dd7fe
BH
4062 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
4063 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
fde296a3 4064 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
a5886e6a 4065 s_maxUDPQueriesPerRound=::arg().asNum("max-udp-queries-per-round");
343257a4 4066
c29d820c
RG
4067 g_useKernelTimestamp = ::arg().mustDo("protobuf-use-kernel-timestamp");
4068
563517f3
RG
4069 blacklistStats(StatComponent::API, ::arg()["stats-api-blacklist"]);
4070 blacklistStats(StatComponent::Carbon, ::arg()["stats-carbon-blacklist"]);
4071 blacklistStats(StatComponent::RecControl, ::arg()["stats-rec-control-blacklist"]);
4072 blacklistStats(StatComponent::SNMP, ::arg()["stats-snmp-blacklist"]);
72259676 4073
d705aad9
RG
4074 if (::arg().mustDo("snmp-agent")) {
4075 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
4076 g_snmpAgent->run();
4077 }
4078
b47026fd 4079 int port = ::arg().asNum("udp-source-port-min");
58da9034 4080 if(port < 1024 || port > 65535){
e6a9dde5 4081 g_log<<Logger::Error<<"Unable to launch, udp-source-port-min is not a valid port number"<<endl;
bf6f28ca
CHB
4082 exit(99); // this isn't going to fix itself either
4083 }
4084 s_minUdpSourcePort = port;
b47026fd 4085 port = ::arg().asNum("udp-source-port-max");
58da9034 4086 if(port < 1024 || port > 65535 || port < s_minUdpSourcePort){
e6a9dde5 4087 g_log<<Logger::Error<<"Unable to launch, udp-source-port-max is not a valid port number or is smaller than udp-source-port-min"<<endl;
bf6f28ca
CHB
4088 exit(99); // this isn't going to fix itself either
4089 }
4090 s_maxUdpSourcePort = port;
4091 std::vector<string> parts {};
b47026fd 4092 stringtok(parts, ::arg()["udp-source-port-avoid"], ", ");
bf6f28ca
CHB
4093 for (const auto &part : parts)
4094 {
4095 port = std::stoi(part);
58da9034 4096 if(port < 1024 || port > 65535){
e6a9dde5 4097 g_log<<Logger::Error<<"Unable to launch, udp-source-port-avoid contains an invalid port number: "<<part<<endl;
bf6f28ca
CHB
4098 exit(99); // this isn't going to fix itself either
4099 }
4100 s_avoidUdpSourcePorts.insert(port);
4101 }
4102
b243ca3b 4103 unsigned int currentThreadId = 1;
8fd25133 4104 const auto cpusMap = parseCPUMap();
d77abca1 4105
c3828c03 4106 if(g_numThreads == 1) {
e6a9dde5 4107 g_log<<Logger::Warning<<"Operating unthreaded"<<endl;
6b6720de
PL
4108#ifdef HAVE_SYSTEMD
4109 sd_notify(0, "READY=1");
4110#endif
b243ca3b
RG
4111
4112 /* This thread handles the web server, carbon, statistics and the control channel */
4113 auto& handlerInfos = s_threadInfos.at(0);
4114 handlerInfos.isHandler = true;
c390b2da 4115 handlerInfos.thread = std::thread(recursorThread, 0, "main");
b243ca3b
RG
4116
4117 setCPUMap(cpusMap, currentThreadId, pthread_self());
4118
4119 auto& infos = s_threadInfos.at(currentThreadId);
4120 infos.isListener = true;
4121 infos.isWorker = true;
c390b2da 4122 recursorThread(currentThreadId++, "worker");
76698c6e
BH
4123 }
4124 else {
8fd25133 4125
b243ca3b
RG
4126 if (g_weDistributeQueries) {
4127 g_log<<Logger::Warning<<"Launching "<< g_numDistributorThreads <<" distributor threads"<<endl;
4128 for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
4129 auto& infos = s_threadInfos.at(currentThreadId);
4130 infos.isListener = true;
c390b2da 4131 infos.thread = std::thread(recursorThread, currentThreadId++, "distr");
b243ca3b
RG
4132
4133 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
4134 }
4135 }
8fd25133 4136
62b549e0
RG
4137 g_log<<Logger::Warning<<"Launching "<< g_numWorkerThreads <<" worker threads"<<endl;
4138
b243ca3b
RG
4139 for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
4140 auto& infos = s_threadInfos.at(currentThreadId);
4141 infos.isListener = g_weDistributeQueries ? false : true;
4142 infos.isWorker = true;
c390b2da 4143 infos.thread = std::thread(recursorThread, currentThreadId++, "worker");
b243ca3b
RG
4144
4145 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
76698c6e 4146 }
b243ca3b 4147
6b6720de
PL
4148#ifdef HAVE_SYSTEMD
4149 sd_notify(0, "READY=1");
4150#endif
b243ca3b
RG
4151
4152 /* This thread handles the web server, carbon, statistics and the control channel */
4153 auto& infos = s_threadInfos.at(0);
4154 infos.isHandler = true;
c390b2da 4155 infos.thread = std::thread(recursorThread, 0, "web+stat");
b243ca3b
RG
4156
4157 s_threadInfos.at(0).thread.join();
bb4bdbaf 4158 }
bb4bdbaf
BH
4159 return 0;
4160}
4161
c390b2da 4162static void* recursorThread(unsigned int n, const string& threadName)
bb4bdbaf
BH
4163try
4164{
d77abca1 4165 t_id=n;
b243ca3b 4166 auto& threadInfo = s_threadInfos.at(t_id);
c390b2da
PL
4167
4168 static string threadPrefix = "pdns-r/";
519f5484 4169 setThreadName(threadPrefix + threadName);
c390b2da 4170
49a699c4 4171 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
a712cb56 4172 SyncRes::setDomainMap(g_initialDomainMap);
49a699c4 4173 t_allowFrom = g_initialAllowFrom;
f26bf547
RG
4174 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
4175 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
49a699c4 4176 primeHints();
3ddb9247 4177
f26bf547 4178 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
3ddb9247 4179
e6a9dde5 4180 g_log<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3ddb9247 4181
af1377b7 4182#ifdef NOD_ENABLED
41c542ec
NC
4183 if (threadInfo.isWorker)
4184 setupNODThread();
af1377b7 4185#endif /* NOD_ENABLED */
c1751a59
RG
4186
4187 /* the listener threads handle TCP queries */
4188 if(threadInfo.isWorker || threadInfo.isListener) {
5b388d28
PD
4189 try {
4190 if(!::arg()["lua-dns-script"].empty()) {
4191 t_pdl = std::make_shared<RecursorLua4>();
4192 t_pdl->loadFile(::arg()["lua-dns-script"]);
4193 g_log<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
4194 }
4195 }
4196 catch(std::exception &e) {
4197 g_log<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
4198 _exit(99);
674cf0f6 4199 }
674cf0f6 4200 }
3ddb9247 4201
f8f243b0 4202 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
92011b8f 4203 if(ringsize) {
f26bf547 4204 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
b243ca3b
RG
4205 if(g_weDistributeQueries)
4206 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries") / g_numDistributorThreads);
f8f243b0 4207 else
3ddb9247 4208 t_remotes->set_capacity(ringsize);
f26bf547 4209 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 4210 t_servfailremotes->set_capacity(ringsize);
66f2e6ad
KM
4211 t_bogusremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4212 t_bogusremotes->set_capacity(ringsize);
f26bf547 4213 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 4214 t_largeanswerremotes->set_capacity(ringsize);
621ccf89 4215 t_timeouts = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4216 t_timeouts->set_capacity(ringsize);
92011b8f 4217
f26bf547 4218 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 4219 t_queryring->set_capacity(ringsize);
f26bf547 4220 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 4221 t_servfailqueryring->set_capacity(ringsize);
66f2e6ad
KM
4222 t_bogusqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4223 t_bogusqueryring->set_capacity(ringsize);
92011b8f 4224 }
3ddb9247 4225
f26bf547 4226 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
144040be 4227 threadInfo.mt = MT.get();
3ddb9247 4228
63341e8d
RG
4229#ifdef HAVE_PROTOBUF
4230 /* start protobuf export threads if needed */
4231 auto luaconfsLocal = g_luaconfs.getLocal();
4232 checkProtobufExport(luaconfsLocal);
4233 checkOutgoingProtobufExport(luaconfsLocal);
4234#endif /* HAVE_PROTOBUF */
b9fa43e0
OM
4235#ifdef HAVE_FSTRM
4236 checkFrameStreamExport(luaconfsLocal);
4237#endif
63341e8d 4238
bb4bdbaf
BH
4239 PacketID pident;
4240
4241 t_fdm=getMultiplexer();
d77abca1 4242
b243ca3b 4243 if(threadInfo.isHandler) {
d07bf7ff 4244 if(::arg().mustDo("webserver")) {
e6a9dde5 4245 g_log<<Logger::Warning << "Enabling web server" << endl;
8989097d 4246 try {
1ce57618 4247 new RecursorWebServer(t_fdm);
8989097d
CH
4248 }
4249 catch(PDNSException &e) {
e6a9dde5 4250 g_log<<Logger::Error<<"Exception: "<<e.reason<<endl;
8989097d
CH
4251 exit(99);
4252 }
f3d1d67b 4253 }
377602e3 4254 g_log<<Logger::Info<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
f3d1d67b 4255 }
810ff705 4256 else {
d77abca1 4257
b243ca3b
RG
4258 t_fdm->addReadFD(threadInfo.pipes.readToThread, handlePipeRequest);
4259 t_fdm->addReadFD(threadInfo.pipes.readQueriesToThread, handlePipeRequest);
4260
4261 if (threadInfo.isListener) {
4262 if (g_reusePort) {
4263 /* then every listener has its own FDs */
4264 for(const auto deferred : threadInfo.deferredAdds) {
4265 t_fdm->addReadFD(deferred.first, deferred.second);
4266 }
810ff705 4267 }
b243ca3b
RG
4268 else {
4269 /* otherwise all listeners are listening on the same ones */
4270 for(const auto deferred : g_deferredAdds) {
4271 t_fdm->addReadFD(deferred.first, deferred.second);
d77abca1
RG
4272 }
4273 }
4274 }
810ff705 4275 }
3ddb9247 4276
b0b37121 4277 registerAllStats();
d77abca1 4278
b243ca3b 4279 if(threadInfo.isHandler) {
674cf0f6
BH
4280 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
4281 }
1bc3c142 4282
f7c1d4e3 4283 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3ddb9247 4284
f7c1d4e3 4285 bool listenOnTCP(true);
49a699c4 4286
cb1523d1 4287 time_t last_stat = 0;
a2f87dd1 4288 time_t last_carbon=0, last_lua_maintenance=0;
2c78bd57 4289 time_t carbonInterval=::arg().asNum("carbon-interval");
a2f87dd1 4290 time_t luaMaintenanceInterval=::arg().asNum("lua-maintenance-interval");
ac0995bb 4291 counter.store(0); // used to periodically execute certain tasks
f7c1d4e3 4292 for(;;) {
ac0e821b 4293 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3ddb9247 4294
3427fa8a
BH
4295 if(!(counter%500)) {
4296 MT->makeThread(houseKeeping, 0);
f7c1d4e3
BH
4297 }
4298
d2392145 4299 if(!(counter%55)) {
d8f6d49f 4300 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
bb4bdbaf 4301 expired_t expired=t_fdm->getTimeouts(g_now);
3ddb9247 4302
f7c1d4e3 4303 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
cd989c87 4304 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4957a608 4305 if(g_logCommonErrors)
e6a9dde5 4306 g_log<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4957a608 4307 t_fdm->removeReadFD(i->first);
f7c1d4e3
BH
4308 }
4309 }
3ddb9247 4310
f7c1d4e3
BH
4311 counter++;
4312
b243ca3b 4313 if(threadInfo.isHandler) {
cb1523d1
RG
4314 if(statsWanted || (g_statisticsInterval > 0 && (g_now.tv_sec - last_stat) >= g_statisticsInterval)) {
4315 doStats();
4316 last_stat = g_now.tv_sec;
4317 }
f7c1d4e3 4318
cb1523d1 4319 Utility::gettimeofday(&g_now, 0);
2c78bd57 4320
cb1523d1
RG
4321 if((g_now.tv_sec - last_carbon) >= carbonInterval) {
4322 MT->makeThread(doCarbonDump, 0);
4323 last_carbon = g_now.tv_sec;
4324 }
2c78bd57 4325 }
2a0276a9 4326 if (t_pdl != nullptr) {
9adbe790 4327 // lua-dns-script directive is present, call the maintenance callback if needed
c1751a59
RG
4328 /* remember that the listener threads handle TCP queries */
4329 if (threadInfo.isWorker || threadInfo.isListener) {
2a0276a9
CHB
4330 // Only on threads processing queries
4331 if(g_now.tv_sec - last_lua_maintenance >= luaMaintenanceInterval) {
4332 t_pdl->maintenance();
4333 last_lua_maintenance = g_now.tv_sec;
4334 }
9adbe790 4335 }
a2f87dd1 4336 }
2c78bd57 4337
bb4bdbaf 4338 t_fdm->run(&g_now);
3ea54bf0 4339 // 'run' updates g_now for us
f7c1d4e3 4340
b243ca3b 4341 if(threadInfo.isListener) {
5c889cf5 4342 if(listenOnTCP) {
c47f201b
RG
4343 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
4344 for(const auto fd : threadInfo.tcpSockets) {
4345 t_fdm->removeReadFD(fd);
b243ca3b 4346 }
c47f201b
RG
4347 listenOnTCP=false;
4348 }
f7c1d4e3 4349 }
5c889cf5 4350 else {
c47f201b
RG
4351 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
4352 for(const auto fd : threadInfo.tcpSockets) {
4353 t_fdm->addReadFD(fd, handleNewTCPQuestion);
b243ca3b 4354 }
c47f201b
RG
4355 listenOnTCP=true;
4356 }
f7c1d4e3
BH
4357 }
4358 }
4359 }
4360}
3f81d239 4361catch(PDNSException &ae) {
e6a9dde5 4362 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
bb4bdbaf
BH
4363 return 0;
4364}
4365catch(std::exception &e) {
e6a9dde5 4366 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
bb4bdbaf
BH
4367 return 0;
4368}
4369catch(...) {
e6a9dde5 4370 g_log<<Logger::Error<<"any other exception in main: "<<endl;
bb4bdbaf
BH
4371 return 0;
4372}
4373
51e2144e 4374
3ddb9247 4375int main(int argc, char **argv)
288f4aa9 4376{
dbd23fc2
BH
4377 g_argc = argc;
4378 g_argv = argv;
5e3de507 4379 g_stats.startupTime=time(0);
b51ef4f9 4380 Utility::srandom();
3e135495 4381 versionSetProduct(ProductRecursor);
8a63d3ce 4382 reportBasicTypes();
0007c2e5 4383 reportOtherTypes();
ea634573 4384
22030c37 4385 int ret = EXIT_SUCCESS;
caa6eefa 4386
288f4aa9 4387 try {
f888311c 4388 ::arg().set("stack-size","stack size per mthread")="200000";
2e3d8a19 4389 ::arg().set("soa-minimum-ttl","Don't change")="0";
2e3d8a19 4390 ::arg().set("no-shuffle","Don't change")="off";
2e3d8a19 4391 ::arg().set("local-port","port to listen on")="53";
32252594 4392 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
fec7dd5a 4393 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
77499b05 4394 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
a6415142 4395 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
c87e1876 4396 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
13c46e62 4397 ::arg().set("signature-inception-skew", "Allow the signature inception to be off by this number of seconds")="60";
d3f809bf 4398 ::arg().set("daemon","Operate as a daemon")="no";
191f2e47 4399 ::arg().setSwitch("write-pid","Write a PID file")="yes";
9afa8662 4400 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
b6cfa948 4401 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
b18fa400 4402 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
22e0810c 4403 ::arg().set("log-common-errors","If we should log rather common errors")="no";
2e3d8a19
BH
4404 ::arg().set("chroot","switch to chroot jail")="";
4405 ::arg().set("setgid","If set, change group id to this gid for more security")="";
4406 ::arg().set("setuid","If set, change user id to this uid for more security")="";
c83ee49d 4407 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
bb4bdbaf 4408 ::arg().set("threads", "Launch this number of threads")="2";
b243ca3b 4409 ::arg().set("distributor-threads", "Launch this number of distributor threads, distributing queries to other threads")="0";
adabfcb9 4410 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
5124de27 4411 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
d07bf7ff 4412 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
479e0976 4413 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
479e0976 4414 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
d07bf7ff
PL
4415 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
4416 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
4417 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
be3e1477 4418 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
8ca656a8 4419 ::arg().set("webserver-loglevel", "Amount of logging in the webserver (none, normal, detailed)") = "normal";
cc08b5a9 4420 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
e12f8407 4421 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
2c78bd57 4422 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
f7a645ec
RG
4423 ::arg().set("carbon-namespace", "If set overwrites the first part of the carbon string")="pdns";
4424 ::arg().set("carbon-instance", "If set overwrites the the instance name default")="recursor";
4425
0ec489bf 4426 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
c038218b 4427 ::arg().set("quiet","Suppress logging of questions and answers")="";
f27e6356 4428 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
2e3d8a19 4429 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
fdbf35ac
BH
4430 ::arg().set("socket-owner","Owner of socket")="";
4431 ::arg().set("socket-group","Group of socket")="";
4432 ::arg().set("socket-mode", "Permissions for socket")="";
3ddb9247 4433
f0f3f0b0 4434 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
2e3d8a19
BH
4435 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
4436 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
d4fb76e9 4437 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
2e3d8a19 4438 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
85c32340 4439 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
2e3d8a19 4440 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
324dc148 4441 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
979edd70 4442 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
559b6c93
PL
4443 ::arg().set("dont-throttle-names", "Do not throttle nameservers with this name or suffix")="";
4444 ::arg().set("dont-throttle-netmasks", "Do not throttle nameservers with this IP netmask")="";
2e3d8a19 4445 ::arg().set("hint-file", "If set, load root hints from this file")="";
b45eb27c 4446 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
a9af3782 4447 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
b9473937 4448 ::arg().set("max-cache-bogus-ttl", "maximum number of seconds to keep a Bogus (positive or negative) cached entry in memory")="3600";
c3e753c7 4449 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
1051f8a9 4450 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
927c12b0 4451 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
1051f8a9 4452 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
950626be 4453 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname, set custom or 'disabled'")="";
92011b8f 4454 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
ba1a571d 4455 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
49a699c4 4456 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
2c95fc65 4457 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
51e2144e 4458 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3ddb9247 4459 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4e120339 4460 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
fde296a3 4461 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
0d5f0a9f 4462 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4ef015cd 4463 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
5605c067 4464 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3e61e7f7 4465 ::arg().set("lua-config-file", "More powerful configuration options")="";
644dd1da 4466
5605c067 4467 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
927c12b0
BH
4468 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
4469 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
5605c067 4470 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
ac0b4eb3 4471 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3ea54bf0 4472 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
e498dac1 4473 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
4485aa35 4474 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
a2f87dd1 4475 ::arg().set("lua-maintenance-interval", "Number of seconds between calls to the lua user defined maintenance() function")="1";
08f3f638 4476 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3ddb9247 4477 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
35695d18 4478 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
fd8898fb 4479 ::arg().set("ecs-ipv4-cache-bits", "Maximum number of bits of IPv4 mask to cache ECS response")="24";
35695d18 4480 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
fd8898fb 4481 ::arg().set("ecs-ipv6-cache-bits", "Maximum number of bits of IPv6 mask to cache ECS response")="56";
5cf4b2e7 4482 ::arg().set("ecs-minimum-ttl-override", "Set under adverse conditions, a minimum TTL for records in ECS-specific answers")="0";
ed9019c9 4483 ::arg().set("ecs-cache-limit-ttl", "Minimum TTL to cache ECS response")="0";
3f975863 4484 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
2fe3354d 4485 ::arg().set("ecs-add-for", "List of client netmasks for which EDNS Client Subnet will be added")="0.0.0.0/0, ::/0, " LOCAL_NETS_INVERSE;
8a3a3822 4486 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
a16c4536 4487 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
e498dac1 4488 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
4ca2d205 4489 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
e661a20b 4490 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
b3adda56 4491 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
00b8cadc 4492 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
54c36063
PL
4493 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1232";
4494 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1232";
aadceba8 4495 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
173d790e 4496 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
c5950146 4497 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
7c3398aa 4498 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
78227847 4499 ::arg().set("max-udp-queries-per-round", "Maximum number of UDP queries processed per recvmsg() round, before returning back to normal processing")="10000";
c29d820c 4500 ::arg().set("protobuf-use-kernel-timestamp", "Compute the latency of queries in protobuf messages by using the timestamp set by the kernel when the query was received (when available)")="";
ee271fc4 4501 ::arg().set("distribution-pipe-buffer-size", "Size in bytes of the internal buffer of the pipe used by the distributor to pass incoming queries to a worker thread")="0";
a09a8ce0 4502
68e6df3c 4503 ::arg().set("include-dir","Include *.conf files from this directory")="";
d67620e4 4504 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
2332f42d 4505
4506 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
2e3d8a19 4507
d705aad9 4508 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
396f126e 4509 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
d705aad9 4510
72259676
RG
4511 std::string defaultBlacklistedStats = "cache-bytes, packetcache-bytes, special-memory-usage";
4512 for (size_t idx = 0; idx < 32; idx++) {
4513 defaultBlacklistedStats += ", ecs-v4-response-bits-" + std::to_string(idx + 1);
4514 }
4515 for (size_t idx = 0; idx < 128; idx++) {
4516 defaultBlacklistedStats += ", ecs-v6-response-bits-" + std::to_string(idx + 1);
4517 }
563517f3
RG
4518 ::arg().set("stats-api-blacklist", "List of statistics that are disabled when retrieving the complete list of statistics via the API")=defaultBlacklistedStats;
4519 ::arg().set("stats-carbon-blacklist", "List of statistics that are prevented from being exported via Carbon")=defaultBlacklistedStats;
4520 ::arg().set("stats-rec-control-blacklist", "List of statistics that are prevented from being exported via rec_control get-all")=defaultBlacklistedStats;
4521 ::arg().set("stats-snmp-blacklist", "List of statistics that are prevented from being exported via SNMP")=defaultBlacklistedStats;
d705aad9 4522
0735b17e 4523 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
d377bb54 4524 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
0735b17e 4525
8fd25133
RG
4526 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
4527
98d36505
RG
4528 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
4529
5cc8371b 4530 ::arg().set("xpf-allow-from","XPF information is only processed from these subnets")="";
59cb4a79 4531 ::arg().set("xpf-rr-code","XPF option code to use")="0";
5cc8371b 4532
58da9034 4533 ::arg().set("udp-source-port-min", "Minimum UDP port to bind on")="1024";
b47026fd
CHB
4534 ::arg().set("udp-source-port-max", "Maximum UDP port to bind on")="65535";
4535 ::arg().set("udp-source-port-avoid", "List of comma separated UDP port number to avoid")="11211";
e97cb679 4536 ::arg().set("rng", "Specify random number generator to use. Valid values are auto,sodium,openssl,getrandom,arc4random,urandom.")="auto";
d6f3fcfa 4537 ::arg().set("public-suffix-list-file", "Path to the Public Suffix List file, if any")="";
144040be 4538 ::arg().set("distribution-load-factor", "The load factor used when PowerDNS is distributing queries to worker threads")="0.0";
af1377b7
NC
4539#ifdef NOD_ENABLED
4540 ::arg().set("new-domain-tracking", "Track newly observed domains (i.e. never seen before).")="no";
4541 ::arg().set("new-domain-log", "Log newly observed domains.")="yes";
4542 ::arg().set("new-domain-lookup", "Perform a DNS lookup newly observed domains as a subdomain of the configured domain")="";
4543 ::arg().set("new-domain-history-dir", "Persist new domain tracking data here to persist between restarts")=string(NODCACHEDIR)+"/nod";
4544 ::arg().set("new-domain-whitelist", "List of domains (and implicitly all subdomains) which will never be considered a new domain")="";
b78727c6 4545 ::arg().set("new-domain-db-size", "Size of the DB used to track new domains in terms of number of cells. Defaults to 67108864")="67108864";
ca2526f5 4546 ::arg().set("new-domain-pb-tag", "If protobuf is configured, the tag to use for messages containing newly observed domains. Defaults to 'pdns-nod'")="pdns-nod";
41c542ec
NC
4547 ::arg().set("unique-response-tracking", "Track unique responses (tuple of query name, type and RR).")="no";
4548 ::arg().set("unique-response-log", "Log unique responses")="yes";
4549 ::arg().set("unique-response-history-dir", "Persist unique response tracking data here to persist between restarts")=string(NODCACHEDIR)+"/udr";
b78727c6 4550 ::arg().set("unique-response-db-size", "Size of the DB used to track unique responses in terms of number of cells. Defaults to 67108864")="67108864";
ca2526f5 4551 ::arg().set("unique-response-pb-tag", "If protobuf is configured, the tag to use for messages containing unique DNS responses. Defaults to 'pdns-udr'")="pdns-udr";
af1377b7 4552#endif /* NOD_ENABLED */
2e3d8a19 4553 ::arg().setCmd("help","Provide a helpful message");
ba1a571d 4554 ::arg().setCmd("version","Print version string");
d5141417 4555 ::arg().setCmd("config","Output blank configuration");
e6a9dde5 4556 g_log.toConsole(Logger::Info);
2e3d8a19 4557 ::arg().laxParse(argc,argv); // do a lax parse
c75a6a9e 4558
2d733c0f
CH
4559 string configname=::arg()["config-dir"]+"/recursor.conf";
4560 if(::arg()["config-name"]!="") {
4561 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
5124de27 4562 s_programname+="-"+::arg()["config-name"];
2d733c0f
CH
4563 }
4564 cleanSlashes(configname);
5124de27 4565
5cc1ea1d
CH
4566 if(!::arg().getCommands().empty()) {
4567 cerr<<"Fatal: non-option on the command line, perhaps a '--setting=123' statement missed the '='?"<<endl;
4568 exit(99);
4569 }
4570
577cf284
BH
4571 if(::arg().mustDo("config")) {
4572 cout<<::arg().configstring()<<endl;
4573 exit(0);
4574 }
4575
3ddb9247 4576 if(!::arg().file(configname.c_str()))
e6a9dde5 4577 g_log<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
c75a6a9e 4578
2e3d8a19 4579 ::arg().parse(argc,argv);
c836dc19 4580
2054afbb
CH
4581 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() ) {
4582 g_log<<Logger::Error<<"Using chroot and enabling the API is not possible"<<endl;
f0f3f0b0
PL
4583 exit(EXIT_FAILURE);
4584 }
4585
4586 if (::arg()["socket-dir"].empty()) {
4587 if (::arg()["chroot"].empty())
4588 ::arg().set("socket-dir") = LOCALSTATEDIR;
4589 else
4590 ::arg().set("socket-dir") = "/";
4591 }
4592
2e3d8a19 4593 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
562588a3 4594
b243ca3b
RG
4595 if(::arg().asNum("threads")==1) {
4596 if (::arg().mustDo("pdns-distributes-queries")) {
4597 g_log<<Logger::Warning<<"Only one thread, no need to distribute queries ourselves"<<endl;
4598 ::arg().set("pdns-distributes-queries")="no";
4599 }
4600 }
4601
4602 if(::arg().mustDo("pdns-distributes-queries") && ::arg().asNum("distributor-threads") <= 0) {
4603 g_log<<Logger::Warning<<"Asked to run with pdns-distributes-queries set but no distributor threads, raising to 1"<<endl;
4604 ::arg().set("distributor-threads")="1";
4605 }
4606
4607 if (!::arg().mustDo("pdns-distributes-queries")) {
4608 ::arg().set("distributor-threads")="0";
4609 }
61d74169 4610
2e3d8a19 4611 if(::arg().mustDo("help")) {
ff5ba4f9
WA
4612 cout<<"syntax:"<<endl<<endl;
4613 cout<<::arg().helpstring(::arg()["help"])<<endl;
4614 exit(0);
b636533b 4615 }
5e3de507 4616 if(::arg().mustDo("version")) {
ba1a571d 4617 showProductVersion();
3613a51c 4618 showBuildConfiguration();
67076869 4619 exit(0);
5e3de507 4620 }
b636533b 4621
34162f8f 4622 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
f48d7b65 4623
34162f8f
CH
4624 if (logUrgency < Logger::Error)
4625 logUrgency = Logger::Error;
f48d7b65 4626 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
4627 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
4628 }
e6a9dde5
PL
4629 g_log.setLoglevel(logUrgency);
4630 g_log.toConsole(logUrgency);
34162f8f 4631
f7c1d4e3 4632 serviceMain(argc, argv);
288f4aa9 4633 }
3f81d239 4634 catch(PDNSException &ae) {
e6a9dde5 4635 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
22030c37 4636 ret=EXIT_FAILURE;
288f4aa9 4637 }
fdbf35ac 4638 catch(std::exception &e) {
e6a9dde5 4639 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
22030c37 4640 ret=EXIT_FAILURE;
288f4aa9
BH
4641 }
4642 catch(...) {
e6a9dde5 4643 g_log<<Logger::Error<<"any other exception in main: "<<endl;
22030c37 4644 ret=EXIT_FAILURE;
288f4aa9 4645 }
3ddb9247 4646
22030c37 4647 return ret;
288f4aa9 4648}