]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/pdns_recursor.cc
Qname minimizaton.
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
CommitLineData
288f4aa9 1/*
6edbf68a
PL
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
870a0fe4
AT
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
3e61e7f7 25
76473b92
KM
26#include <netdb.h>
27#include <sys/stat.h>
28#include <unistd.h>
f097141b 29#ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
b47026fd 30#include <boost/container/flat_set.hpp>
f097141b 31#endif
2470b36e 32#include "ws-recursor.hh"
c390b2da 33#include <thread>
519f5484 34#include "threadname.hh"
3ea54bf0 35#include "recpacketcache.hh"
3ddb9247 36#include "utility.hh"
51e2144e 37#include "dns_random.hh"
d1b28475
KM
38#ifdef HAVE_LIBSODIUM
39#include <sodium.h>
40#endif
3afde9b2 41#include "opensslsigners.hh"
288f4aa9
BH
42#include <iostream>
43#include <errno.h>
81859ba5 44#include <boost/static_assert.hpp>
288f4aa9
BH
45#include <map>
46#include <set>
97bb160b 47#include "recursor_cache.hh"
38c9ceaa 48#include "cachecleaner.hh"
288f4aa9 49#include <stdio.h>
c75a6a9e 50#include <signal.h>
288f4aa9 51#include <stdlib.h>
bb4bdbaf 52#include "misc.hh"
288f4aa9
BH
53#include "mtasker.hh"
54#include <utility>
288f4aa9
BH
55#include "arguments.hh"
56#include "syncres.hh"
88def049
BH
57#include <fcntl.h>
58#include <fstream>
3e61e7f7 59#include "sortlist.hh"
5c633640
BH
60#include "sstuff.hh"
61#include <boost/tuple/tuple.hpp>
62#include <boost/tuple/tuple_comparison.hpp>
72df400f 63#include <boost/shared_array.hpp>
7f1fa77d 64#include <boost/function.hpp>
5605c067 65#include <boost/algorithm/string.hpp>
8f7473d7 66#ifdef MALLOC_TRACE
67#include "malloctrace.hh"
68#endif
40a3dd64 69#include <netinet/tcp.h>
f12666f2 70#include "capabilities.hh"
ea634573
BH
71#include "dnsparser.hh"
72#include "dnswriter.hh"
73#include "dnsrecords.hh"
f814d7c8 74#include "zoneparser-tng.hh"
1d5b3ce6 75#include "rec_channel.hh"
aaacf7f2 76#include "logger.hh"
c8ddb7c2 77#include "iputils.hh"
09e6702a 78#include "mplexer.hh"
c038218b 79#include "config.h"
808c5ef7 80#include "lua-recursor4.hh"
ba1a571d 81#include "version.hh"
79332bff 82#include "responsestats.hh"
d67620e4 83#include "secpoll-recursor.hh"
c5c066bf 84#include "dnsname.hh"
644dd1da 85#include "filterpo.hh"
86#include "rpzloader.hh"
b3f0ed10 87#include "validate-recursor.hh"
f3c18728 88#include "rec-lua-conf.hh"
5c3b5e7f 89#include "ednsoptions.hh"
85c7ca75 90#include "gettime.hh"
d6f3fcfa 91#include "pubsuffix.hh"
af1377b7
NC
92#ifdef NOD_ENABLED
93#include "nod.hh"
94#endif /* NOD_ENABLED */
f3c18728 95
d9d3f9c1 96#include "rec-protobuf.hh"
d705aad9 97#include "rec-snmp.hh"
aa7929a3 98
6b6720de
PL
99#ifdef HAVE_SYSTEMD
100#include <systemd/sd-daemon.h>
101#endif
102
d187038c
RG
103#include "namespaces.hh"
104
d61aa945
RG
105#ifdef HAVE_PROTOBUF
106#include "uuid-utils.hh"
b9fa43e0 107#endif /* HAVE_PROTOBUF */
d61aa945 108
5cc8371b
RG
109#include "xpf.hh"
110
d187038c
RG
111typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
112
f26bf547 113static thread_local std::shared_ptr<RecursorLua4> t_pdl;
b243ca3b 114static thread_local unsigned int t_id = 0;
f26bf547
RG
115static thread_local std::shared_ptr<Regex> t_traceRegex;
116static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
63341e8d 117#ifdef HAVE_PROTOBUF
3fe06137 118static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_protobufServers{nullptr};
b773359c 119static thread_local uint64_t t_protobufServersGeneration;
3fe06137 120static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_outgoingProtobufServers{nullptr};
b773359c 121static thread_local uint64_t t_outgoingProtobufServersGeneration;
63341e8d 122#endif /* HAVE_PROTOBUF */
f26bf547 123
b9fa43e0 124#ifdef HAVE_FSTRM
10ba6d01 125static thread_local std::shared_ptr<std::vector<std::unique_ptr<FrameStreamLogger>>> t_frameStreamServers{nullptr};
b9fa43e0
OM
126static thread_local uint64_t t_frameStreamServersGeneration;
127#endif /* HAVE_FSTRM */
128
f26bf547
RG
129thread_local std::unique_ptr<MT_t> MT; // the big MTasker
130thread_local std::unique_ptr<MemRecursorCache> t_RC;
131thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
3337c2f7 132thread_local FDMultiplexer* t_fdm{nullptr};
be9078b3 133thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes, t_bogusremotes;
66f2e6ad 134thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring, t_bogusqueryring;
f26bf547 135thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
af1377b7
NC
136#ifdef NOD_ENABLED
137thread_local std::shared_ptr<nod::NODDB> t_nodDBp;
41c542ec 138thread_local std::shared_ptr<nod::UniqueResponseDB> t_udrDBp;
af1377b7 139#endif /* NOD_ENABLED */
d187038c 140__thread struct timeval g_now; // timestamp, updated (too) frequently
d7dae798 141
b243ca3b
RG
142typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
143
d7dae798 144// for communicating with our threads
b243ca3b
RG
145// effectively readonly after startup
146struct RecThreadInfo
147{
148 struct ThreadPipeSet
149 {
150 int writeToThread{-1};
151 int readToThread{-1};
152 int writeFromThread{-1};
153 int readFromThread{-1};
154 int writeQueriesToThread{-1}; // this one is non-blocking
155 int readQueriesToThread{-1};
156 };
157
adb6cd72 158 /* FD corresponding to TCP sockets this thread is listening
c47f201b 159 on.
adb6cd72
RG
160 These FDs are also in deferredAdds when we have one
161 socket per listener, and in g_deferredAdds instead. */
162 std::set<int> tcpSockets;
b243ca3b
RG
163 /* FD corresponding to listening sockets if we have one socket per
164 listener (with reuseport), otherwise all listeners share the
165 same FD and g_deferredAdds is then used instead */
166 deferredAdd_t deferredAdds;
167 struct ThreadPipeSet pipes;
168 std::thread thread;
144040be
RG
169 MT_t* mt{nullptr};
170 uint64_t numberOfDistributedQueries{0};
b243ca3b
RG
171 /* handle the web server, carbon, statistics and the control channel */
172 bool isHandler{false};
173 /* accept incoming queries (and distributes them to the workers if pdns-distributes-queries is set) */
174 bool isListener{false};
175 /* process queries */
176 bool isWorker{false};
49a699c4 177};
810ff705 178
b243ca3b
RG
179/* first we have the handler thread, t_id == 0 (some other
180 helper threads like SNMP might have t_id == 0 as well)
181 then the distributor threads if any
182 and finally the workers */
183static std::vector<RecThreadInfo> s_threadInfos;
184/* without reuseport, all listeners share the same sockets */
185static deferredAdd_t g_deferredAdds;
faf580f5 186
d187038c
RG
187typedef vector<int> tcpListenSockets_t;
188typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
3ea54bf0 189
d187038c 190static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
d187038c 191static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
d187038c
RG
192static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
193static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
194static AtomicCounter counter;
9065eb05 195static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
f26bf547 196static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
5cc8371b 197static NetmaskGroup g_XPFAcl;
d187038c 198static size_t g_tcpMaxQueriesPerConn;
a5886e6a 199static size_t s_maxUDPQueriesPerRound;
d187038c
RG
200static uint64_t g_latencyStatSize;
201static uint32_t g_disthashseed;
202static unsigned int g_maxTCPPerClient;
d187038c 203static unsigned int g_maxMThreads;
b243ca3b 204static unsigned int g_numDistributorThreads;
d187038c
RG
205static unsigned int g_numWorkerThreads;
206static int g_tcpTimeout;
207static uint16_t g_udpTruncationThreshold;
59cb4a79 208static uint16_t g_xpfRRCode{0};
d187038c
RG
209static std::atomic<bool> statsWanted;
210static std::atomic<bool> g_quiet;
211static bool g_logCommonErrors;
212static bool g_anyToTcp;
b243ca3b 213static bool g_weDistributeQueries; // if true, 1 or more threads listen on the incoming query sockets and distribute them to workers
810ff705 214static bool g_reusePort{false};
00b8cadc 215static bool g_gettagNeedsEDNSOptions{false};
0ec489bf 216static time_t g_statisticsInterval;
9065eb05 217static bool g_useIncomingECS;
c29d820c 218static bool g_useKernelTimestamp;
a6f7f5fe 219std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
af1377b7
NC
220#ifdef NOD_ENABLED
221static bool g_nodEnabled;
222static DNSName g_nodLookupDomain;
223static bool g_nodLog;
224static SuffixMatchNode g_nodDomainWL;
ca2526f5 225static std::string g_nod_pbtag;
41c542ec
NC
226static bool g_udrEnabled;
227static bool g_udrLog;
ca2526f5 228static std::string g_udr_pbtag;
af1377b7 229#endif /* NOD_ENABLED */
f097141b 230#ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
bf6f28ca 231static boost::container::flat_set<uint16_t> s_avoidUdpSourcePorts;
f097141b
CHB
232#else
233static std::set<uint16_t> s_avoidUdpSourcePorts;
234#endif
bf6f28ca
CHB
235static uint16_t s_minUdpSourcePort;
236static uint16_t s_maxUdpSourcePort;
144040be 237static double s_balancingFactor;
49a699c4 238
b243ca3b 239RecursorControlChannel s_rcc; // only active in the handler thread
d187038c 240RecursorStats g_stats;
2d733c0f 241string s_programname="pdns_recursor";
d187038c 242string s_pidfname;
c1c29961 243bool g_lowercaseOutgoing;
bf19ccfd 244unsigned int g_networkTimeoutMsec;
d187038c
RG
245unsigned int g_numThreads;
246uint16_t g_outgoingEDNSBufsize;
98d36505 247bool g_logRPZChanges{false};
c3828c03 248
559b6c93
PL
249// Used in the Syncres to not throttle certain servers
250GlobalStateHolder<SuffixMatchNode> g_dontThrottleNames;
251GlobalStateHolder<NetmaskGroup> g_dontThrottleNetmasks;
252
12cd44ee 253#define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
2fe3354d 254#define LOCAL_NETS_INVERSE "!127.0.0.0/8, !10.0.0.0/8, !100.64.0.0/10, !169.254.0.0/16, !192.168.0.0/16, !172.16.0.0/12, !::1/128, !fc00::/7, !fe80::/10"
12cd44ee 255// Bad Nets taken from both:
3ddb9247 256// http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
12cd44ee 257// and
258// http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
259// where such a network may not be considered a valid destination
260#define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
261#define DONT_QUERY LOCAL_NETS ", " BAD_NETS
49a699c4 262
d7dae798 263//! used to send information to a newborn mthread
ea634573 264struct DNSComboWriter {
08b02366 265 DNSComboWriter(const std::string& query, const struct timeval& now): d_mdp(true, query), d_now(now), d_query(query)
2749c3fe
RG
266 {
267 }
5cc8371b 268
08b02366 269 DNSComboWriter(const std::string& query, const struct timeval& now, std::vector<std::string>&& policyTags, LuaContext::LuaObject&& data): d_mdp(true, query), d_now(now), d_query(query), d_policyTags(std::move(policyTags)), d_data(std::move(data))
5164bac3
RG
270 {
271 }
272
5cc8371b
RG
273 void setRemote(const ComboAddress& sa)
274 {
275 d_remote=sa;
276 }
277
278 void setSource(const ComboAddress& sa)
ea634573 279 {
5cc8371b 280 d_source=sa;
ea634573
BH
281 }
282
b71b60ee 283 void setLocal(const ComboAddress& sa)
284 {
285 d_local=sa;
286 }
287
5cc8371b
RG
288 void setDestination(const ComboAddress& sa)
289 {
290 d_destination=sa;
291 }
b71b60ee 292
ea634573
BH
293 void setSocket(int sock)
294 {
295 d_socket=sock;
296 }
a1754c6a
BH
297
298 string getRemote() const
299 {
5cc8371b
RG
300 if (d_source == d_remote) {
301 return d_source.toStringWithPort();
302 }
303 return d_source.toStringWithPort() + " (proxied by " + d_remote.toStringWithPort() + ")";
a1754c6a
BH
304 }
305
5cc8371b 306 MOADNSParser d_mdp;
c9e9e5e0 307 struct timeval d_now;
5cc8371b
RG
308 /* Remote client, might differ from d_source
309 in case of XPF, in which case d_source holds
310 the IP of the client and d_remote of the proxy
311 */
312 ComboAddress d_remote;
313 ComboAddress d_source;
314 /* Destination address, might differ from
315 d_destination in case of XPF, in which case
316 d_destination holds the IP of the proxy and
317 d_local holds our own. */
318 ComboAddress d_local;
319 ComboAddress d_destination;
aa7929a3
RG
320#ifdef HAVE_PROTOBUF
321 boost::uuids::uuid d_uuid;
67e31ebe 322 string d_requestorId;
590388d2 323 string d_deviceId;
c29d820c 324 struct timeval d_kernelTimestamp{0,0};
aa7929a3 325#endif
08b02366 326 std::string d_query;
5164bac3
RG
327 std::vector<std::string> d_policyTags;
328 LuaContext::LuaObject d_data;
b40562da 329 EDNSSubnetOpts d_ednssubnet;
5164bac3 330 shared_ptr<TCPConnection> d_tcpConnection;
ea634573 331 int d_socket;
b673817a 332 unsigned int d_tag{0};
e9f63d47 333 uint32_t d_qhash{0};
70fb28d9 334 uint32_t d_ttlCap{std::numeric_limits<uint32_t>::max()};
08b02366
RG
335 uint16_t d_ecsBegin{0};
336 uint16_t d_ecsEnd{0};
70fb28d9 337 bool d_variable{false};
5164bac3
RG
338 bool d_ecsFound{false};
339 bool d_ecsParsed{false};
340 bool d_tcp;
ea634573
BH
341};
342
06857845
RG
343MT_t* getMT()
344{
345 return MT ? MT.get() : nullptr;
346}
ea634573 347
288f4aa9
BH
348ArgvMap &arg()
349{
350 static ArgvMap theArg;
351 return theArg;
352}
4ef015cd 353
8fb594ba 354unsigned int getRecursorThreadId()
b4015453 355{
30da2030 356 return t_id;
b4015453 357}
09e6702a 358
30ee601a
RG
359int getMTaskerTID()
360{
361 return MT->getTid();
362}
363
b243ca3b
RG
364static bool isDistributorThread()
365{
366 if (t_id == 0) {
367 return false;
368 }
369
370 return g_weDistributeQueries && s_threadInfos.at(t_id).isListener;
371}
372
373static bool isHandlerThread()
374{
375 if (t_id == 0) {
376 return true;
377 }
378
379 return s_threadInfos.at(t_id).isHandler;
380}
381
d187038c 382static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 383
50c81227 384// -1 is error, 0 is timeout, 1 is success
3ddb9247 385int asendtcp(const string& data, Socket* sock)
5c633640
BH
386{
387 PacketID pident;
388 pident.sock=sock;
389 pident.outMSG=data;
3ddb9247 390
bb4bdbaf 391 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
50c81227 392 string packet;
5c633640 393
5b0ddd18 394 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
23db0a09 395
9170fbaf 396 if(!ret || ret==-1) { // timeout
bb4bdbaf 397 t_fdm->removeWriteFD(sock->getHandle());
5c633640 398 }
50c81227
BH
399 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
400 return -1;
401 }
9170fbaf 402 return ret;
5c633640
BH
403}
404
d187038c 405static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 406
9170fbaf 407// -1 is error, 0 is timeout, 1 is success
a683e8bd 408int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
288f4aa9 409{
50c81227 410 data.clear();
5c633640
BH
411 PacketID pident;
412 pident.sock=sock;
413 pident.inNeeded=len;
825fa717 414 pident.inIncompleteOkay=incompleteOkay;
bb4bdbaf 415 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
5c633640 416
bb4bdbaf 417 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
9170fbaf 418 if(!ret || ret==-1) { // timeout
bb4bdbaf 419 t_fdm->removeReadFD(sock->getHandle());
288f4aa9 420 }
50c81227
BH
421 else if(data.empty()) {// error, EOF or other
422 return -1;
423 }
424
9170fbaf 425 return ret;
288f4aa9
BH
426}
427
d187038c 428static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
4465e941 429{
fba1e944 430 PacketID pident=*any_cast<PacketID>(&var);
4465e941 431 char resp[512];
7c77ce63
RG
432 ComboAddress fromaddr;
433 socklen_t addrlen=sizeof(fromaddr);
434
435 ssize_t ret=recvfrom(fd, resp, sizeof(resp), 0, (sockaddr *)&fromaddr, &addrlen);
436 if (fromaddr != pident.remote) {
e6a9dde5 437 g_log<<Logger::Notice<<"Response received from the wrong remote host ("<<fromaddr.toStringWithPort()<<" instead of "<<pident.remote.toStringWithPort()<<"), discarding"<<endl;
7c77ce63
RG
438
439 }
440
4465e941 441 t_fdm->removeReadFD(fd);
442 if(ret >= 0) {
a683e8bd 443 string data(resp, (size_t) ret);
fba1e944 444 MT->sendEvent(pident, &data);
4465e941 445 }
446 else {
fba1e944 447 string empty;
448 MT->sendEvent(pident, &empty);
449 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
4465e941 450 }
451}
fba1e944 452string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
4465e941 453{
4465e941 454 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
455 s.setNonBlocking();
456 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
457
458 s.bind(local);
459 s.connect(dest);
4465e941 460 s.send(query);
461
462 PacketID pident;
463 pident.sock=&s;
7c77ce63 464 pident.remote=dest;
4465e941 465 pident.type=0;
fba1e944 466 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
4465e941 467
468 string data;
fba1e944 469
4465e941 470 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
fba1e944 471
4465e941 472 if(!ret || ret==-1) { // timeout
4465e941 473 t_fdm->removeReadFD(s.getHandle());
474 }
475 else if(data.empty()) {// error, EOF or other
fba1e944 476 // we could special case this
4465e941 477 return data;
478 }
4465e941 479 return data;
480}
481
d7dae798 482//! pick a random query local address
1652a63e 483ComboAddress getQueryLocalAddress(int family, uint16_t port)
5a38281c 484{
1652a63e 485 ComboAddress ret;
5a38281c 486 if(family==AF_INET) {
3ddb9247 487 if(g_localQueryAddresses4.empty())
1652a63e 488 ret = g_local4;
3ddb9247 489 else
1652a63e
BH
490 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
491 ret.sin4.sin_port = htons(port);
5a38281c
BH
492 }
493 else {
494 if(g_localQueryAddresses6.empty())
1652a63e
BH
495 ret = g_local6;
496 else
497 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
3ddb9247 498
1652a63e 499 ret.sin6.sin6_port = htons(port);
5a38281c 500 }
1652a63e 501 return ret;
5a38281c 502}
4ef015cd 503
d187038c 504static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
09e6702a 505
d187038c 506static void setSocketBuffer(int fd, int optname, uint32_t size)
d7dae798
BH
507{
508 uint32_t psize=0;
509 socklen_t len=sizeof(psize);
3ddb9247 510
d7dae798 511 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
e6a9dde5 512 g_log<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
3ddb9247 513 return;
d7dae798
BH
514 }
515
516 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
e6a9dde5 517 g_log<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
d7dae798
BH
518}
519
520
521static void setSocketReceiveBuffer(int fd, uint32_t size)
522{
523 setSocketBuffer(fd, SO_RCVBUF, size);
524}
525
526static void setSocketSendBuffer(int fd, uint32_t size)
527{
528 setSocketBuffer(fd, SO_SNDBUF, size);
529}
530
531
4ef015cd
BH
532// you can ask this class for a UDP socket to send a query from
533// this socket is not yours, don't even think about deleting it
534// but after you call 'returnSocket' on it, don't assume anything anymore
535class UDPClientSocks
536{
4ef015cd 537 unsigned int d_numsocks;
4ef015cd 538public:
e2642526 539 UDPClientSocks() : d_numsocks(0)
4ef015cd
BH
540 {
541 }
542
2ee280cf 543 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
d8f6d49f 544 int getSocket(const ComboAddress& toaddr, int* fd)
4ef015cd 545 {
d8f6d49f
BH
546 *fd=makeClientSocket(toaddr.sin4.sin_family);
547 if(*fd < 0) // temporary error - receive exception otherwise
2ee280cf 548 return -2;
d8f6d49f
BH
549
550 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
551 int err = errno;
a7b68ae7
RG
552 try {
553 closesocket(*fd);
554 }
555 catch(const PDNSException& e) {
e6a9dde5 556 g_log<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
a7b68ae7
RG
557 }
558
d8f6d49f 559 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
4957a608 560 return -2;
998a4334 561 return -1;
d8f6d49f 562 }
998a4334 563
998a4334 564 d_numsocks++;
d8f6d49f 565 return 0;
4ef015cd
BH
566 }
567
568 // return a socket to the pool, or simply erase it
2bee9b7c 569 void returnSocket(int fd)
4ef015cd 570 {
80baf329 571 try {
2bee9b7c 572 t_fdm->removeReadFD(fd);
80baf329 573 }
2bee9b7c 574 catch(const FDMultiplexerException& e) {
bb4bdbaf 575 // we sometimes return a socket that has not yet been assigned to t_fdm
80baf329 576 }
2bee9b7c 577
a7b68ae7 578 try {
2bee9b7c 579 closesocket(fd);
a7b68ae7
RG
580 }
581 catch(const PDNSException& e) {
e6a9dde5 582 g_log<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
a7b68ae7 583 }
3ddb9247 584
998a4334 585 --d_numsocks;
4ef015cd 586 }
d8f6d49f 587
2bee9b7c
RG
588private:
589
d8f6d49f 590 // returns -1 for errors which might go away, throws for ones that won't
bb4bdbaf 591 static int makeClientSocket(int family)
d8f6d49f 592 {
a683e8bd 593 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
42c235e5 594
d8f6d49f
BH
595 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
596 return ret;
3ddb9247
PD
597
598 if(ret<0)
335da0ba 599 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
36855b53 600
7eb73ffa 601 // setCloseOnExec(ret); // we're not going to exec
5a38281c 602
d8f6d49f 603 int tries=10;
3aa91c3e 604 ComboAddress sin;
d8f6d49f 605 while(--tries) {
1652a63e 606 uint16_t port;
3ddb9247 607
d8f6d49f 608 if(tries==1) // fall back to kernel 'random'
4957a608 609 port = 0;
bf6f28ca
CHB
610 else {
611 do {
612 port = s_minUdpSourcePort + dns_random(s_maxUdpSourcePort - s_minUdpSourcePort + 1);
613 }
614 while (s_avoidUdpSourcePorts.count(port));
615 }
5a38281c 616
3aa91c3e 617 sin=getQueryLocalAddress(family, port); // does htons for us
5a38281c 618
3ddb9247 619 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
4957a608 620 break;
d8f6d49f 621 }
9ec48f21
RG
622
623 if(!tries) {
624 closesocket(ret);
3aa91c3e 625 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
9ec48f21
RG
626 }
627
628 try {
629 setReceiveSocketErrors(ret, family);
630 setNonBlocking(ret);
631 }
632 catch(...) {
633 closesocket(ret);
634 throw;
635 }
3ddb9247 636
d8f6d49f
BH
637 return ret;
638 }
49a699c4
BH
639};
640
f26bf547 641static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
4ef015cd 642
288f4aa9 643/* these two functions are used by LWRes */
34801ab1 644// -2 is OS error, -1 is error that depends on the remote, > 0 is success
a683e8bd 645int asendto(const char *data, size_t len, int flags,
3ddb9247 646 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
288f4aa9 647{
34801ab1
BH
648
649 PacketID pident;
787e5eab
BH
650 pident.domain = domain;
651 pident.remote = toaddr;
652 pident.type = qtype;
34801ab1
BH
653
654 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
655 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
656
657 for(; chain.first != chain.second; chain.first++) {
658 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
e27e91a8 659 /*
4665c31e
BH
660 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
661 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
4957a608 662 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
e27e91a8 663 */
34801ab1
BH
664 chain.first->key.chain.insert(id); // we can chain
665 *fd=-1; // gets used in waitEvent / sendEvent later on
666 return 1;
667 }
668 }
669
49a699c4 670 int ret=t_udpclientsocks->getSocket(toaddr, fd);
d8f6d49f
BH
671 if(ret < 0)
672 return ret;
34801ab1 673
998a4334
BH
674 pident.fd=*fd;
675 pident.id=id;
3ddb9247 676
bb4bdbaf
BH
677 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
678 ret = send(*fd, data, len, 0);
679
5b0ddd18 680 int tmp = errno;
bb4bdbaf 681
7302ed0a 682 if(ret < 0)
49a699c4 683 t_udpclientsocks->returnSocket(*fd);
bb4bdbaf 684
5b0ddd18 685 errno = tmp; // this is for logging purposes only
7302ed0a 686 return ret;
288f4aa9
BH
687}
688
9170fbaf 689// -1 is error, 0 is timeout, 1 is success
f128d20d 690int arecvfrom(std::string& packet, int flags, const ComboAddress& fromaddr, size_t *d_len,
c5c066bf 691 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
288f4aa9 692{
0d5f0a9f 693 static optional<unsigned int> nearMissLimit;
3ddb9247 694 if(!nearMissLimit)
0d5f0a9f
BH
695 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
696
288f4aa9 697 PacketID pident;
4ef015cd 698 pident.fd=fd;
288f4aa9 699 pident.id=id;
0d5f0a9f 700 pident.domain=domain;
787e5eab 701 pident.type = qtype;
996c89cc 702 pident.remote=fromaddr;
b636533b 703
5b0ddd18 704 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
34801ab1 705
9ec48f21 706 /* -1 means error, 0 means timeout, 1 means a result from handleUDPServerResponse() which might still be an error */
9170fbaf 707 if(ret > 0) {
9ec48f21 708 /* handleUDPServerResponse() will close the socket for us no matter what */
996c89cc 709 if(packet.empty()) // means "error"
3ddb9247 710 return -1;
998a4334 711
a683e8bd 712 *d_len=packet.size();
f128d20d 713
0d5f0a9f 714 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
e6a9dde5 715 g_log<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
0d5f0a9f 716 g_stats.spoofCount++;
35ce8576
BH
717 return -1;
718 }
288f4aa9 719 }
09e6702a 720 else {
9ec48f21 721 /* getting there means error or timeout, it's up to us to close the socket */
34801ab1 722 if(fd >= 0)
49a699c4 723 t_udpclientsocks->returnSocket(fd);
09e6702a 724 }
9170fbaf 725 return ret;
288f4aa9
BH
726}
727
88def049
BH
728static void writePid(void)
729{
191f2e47 730 if(!::arg().mustDo("write-pid"))
731 return;
18e7758c 732 ofstream of(s_pidfname.c_str(), std::ios_base::app);
88def049 733 if(of)
705f31ae 734 of<< Utility::getpid() <<endl;
88def049 735 else
e6a9dde5 736 g_log<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
88def049
BH
737}
738
2749c3fe 739TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : data(2, 0), d_remote(addr), d_fd(fd)
3ddb9247
PD
740{
741 ++s_currentConnections;
cd989c87 742 (*t_tcpClientCounts)[d_remote]++;
0e408828 743}
cd989c87
BH
744
745TCPConnection::~TCPConnection()
0e408828 746{
a7b68ae7
RG
747 try {
748 if(closesocket(d_fd) < 0)
e6a9dde5 749 g_log<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
a7b68ae7
RG
750 }
751 catch(const PDNSException& e) {
e6a9dde5 752 g_log<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
a7b68ae7
RG
753 }
754
3ddb9247 755 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
cd989c87 756 t_tcpClientCounts->erase(d_remote);
1bc9e6bd 757 --s_currentConnections;
0e408828 758}
0e9d9ce2 759
3ddb9247 760AtomicCounter TCPConnection::s_currentConnections;
d187038c
RG
761
762static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
6dcd28c3 763
92011b8f 764// the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
d187038c 765static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
2cc13433 766{
92011b8f 767 if(packetsize > 1000 && t_largeanswerremotes)
768 t_largeanswerremotes->push_back(remote);
2cc13433
BH
769 switch(res) {
770 case RCode::ServFail:
92011b8f 771 if(t_servfailremotes) {
772 t_servfailremotes->push_back(remote);
5af86fdc 773 if(query && t_servfailqueryring) // packet cache
92011b8f 774 t_servfailqueryring->push_back(make_pair(*query, qtype));
775 }
2cc13433
BH
776 g_stats.servFails++;
777 break;
778 case RCode::NXDomain:
779 g_stats.nxDomains++;
780 break;
781 case RCode::NoError:
782 g_stats.noErrors++;
783 break;
784 }
785}
786
9a864da4 787static string makeLoginfo(const std::unique_ptr<DNSComboWriter>& dc)
a903b39c 788try
789{
5cc8371b 790 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->getRemote())+")";
a903b39c 791}
792catch(...)
793{
794 return "Exception making error message for exception";
795}
796
aa7929a3 797#ifdef HAVE_PROTOBUF
b773359c 798static void protobufLogQuery(uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId)
aa7929a3 799{
b773359c
RG
800 if (!t_protobufServers) {
801 return;
802 }
803
e1c8a4bb
RG
804 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
805 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
806 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
c165308b 807 message.setServerIdentity(SyncRes::s_serverID);
a94bc5d7 808 message.setEDNSSubnet(ednssubnet, ednssubnet.isIpv4() ? maskV4 : maskV6);
67e31ebe 809 message.setRequestorId(requestorId);
590388d2 810 message.setDeviceId(deviceId);
02b47f43 811
02b47f43 812 if (!policyTags.empty()) {
d9d3f9c1 813 message.setPolicyTags(policyTags);
02b47f43 814 }
aa7929a3 815
d9d3f9c1 816// cerr <<message.toDebugString()<<endl;
aa7929a3 817 std::string str;
d9d3f9c1 818 message.serialize(str);
b773359c
RG
819
820 for (auto& server : *t_protobufServers) {
821 server->queueData(str);
822 }
aa7929a3
RG
823}
824
b773359c 825static void protobufLogResponse(const RecProtoBufMessage& message)
aa7929a3 826{
b773359c
RG
827 if (!t_protobufServers) {
828 return;
829 }
830
d9d3f9c1 831// cerr <<message.toDebugString()<<endl;
aa7929a3 832 std::string str;
d9d3f9c1 833 message.serialize(str);
b773359c
RG
834
835 for (auto& server : *t_protobufServers) {
836 server->queueData(str);
837 }
aa7929a3
RG
838}
839#endif
840
53508135
PL
841/**
842 * Chases the CNAME provided by the PolicyCustom RPZ policy.
843 *
844 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
845 * @param qtype: The QType of the original query
846 * @param sr: A SyncRes
847 * @param res: An integer that will contain the RCODE of the lookup we do
848 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
849 */
d187038c 850static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
53508135
PL
851{
852 if (spoofed.d_type == QType::CNAME) {
30ee601a
RG
853 bool oldWantsRPZ = sr.getWantsRPZ();
854 sr.setWantsRPZ(false);
53508135 855 vector<DNSRecord> ans;
6da513b2 856 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, QClass::IN, ans);
53508135
PL
857 for (const auto& rec : ans) {
858 if(rec.d_place == DNSResourceRecord::ANSWER) {
859 ret.push_back(rec);
860 }
861 }
862 // Reset the RPZ state of the SyncRes
30ee601a 863 sr.setWantsRPZ(oldWantsRPZ);
53508135
PL
864 }
865}
866
70fb28d9 867static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, uint32_t ttlCap, const uint16_t maxAnswerSize)
97c6d7e5 868{
70fb28d9 869 pw.startRecord(rec.d_name, rec.d_type, (rec.d_ttl > ttlCap ? ttlCap : rec.d_ttl), rec.d_class, rec.d_place);
97c6d7e5
RG
870
871 if(rec.d_type != QType::OPT) // their TTL ain't real
872 minTTL = min(minTTL, rec.d_ttl);
873
874 rec.d_content->toPacket(pw);
875 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
876 pw.rollback();
877 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
878 pw.getHeader()->tc=1;
879 pw.truncate();
880 }
881 return false;
882 }
883
884 return true;
885}
886
63341e8d 887#ifdef HAVE_PROTOBUF
3fe06137 888static std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> startProtobufServers(const ProtobufExportConfig& config)
63341e8d 889{
3fe06137 890 auto result = std::make_shared<std::vector<std::unique_ptr<RemoteLogger>>>();
b773359c
RG
891
892 for (const auto& server : config.servers) {
893 try {
da71b63b 894 result->emplace_back(new RemoteLogger(server, config.timeout, 100*config.maxQueuedEntries, config.reconnectWaitTime, config.asyncConnect));
b773359c
RG
895 }
896 catch(const std::exception& e) {
897 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.what()<<endl;
898 }
899 catch(const PDNSException& e) {
900 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.reason<<endl;
901 }
63341e8d
RG
902 }
903
904 return result;
905}
906
907static bool checkProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
908{
909 if (!luaconfsLocal->protobufExportConfig.enabled) {
b773359c
RG
910 if (t_protobufServers) {
911 for (auto& server : *t_protobufServers) {
912 server->stop();
913 }
914 t_protobufServers.reset();
63341e8d
RG
915 }
916
917 return false;
918 }
919
920 /* if the server was not running, or if it was running according to a
921 previous configuration */
b773359c
RG
922 if (!t_protobufServers ||
923 t_protobufServersGeneration < luaconfsLocal->generation) {
63341e8d 924
b773359c
RG
925 if (t_protobufServers) {
926 for (auto& server : *t_protobufServers) {
927 server->stop();
928 }
63341e8d 929 }
b773359c 930 t_protobufServers.reset();
63341e8d 931
b773359c
RG
932 t_protobufServers = startProtobufServers(luaconfsLocal->protobufExportConfig);
933 t_protobufServersGeneration = luaconfsLocal->generation;
63341e8d
RG
934 }
935
936 return true;
937}
938
939static bool checkOutgoingProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
940{
941 if (!luaconfsLocal->outgoingProtobufExportConfig.enabled) {
b773359c
RG
942 if (t_outgoingProtobufServers) {
943 for (auto& server : *t_outgoingProtobufServers) {
944 server->stop();
945 }
63341e8d 946 }
b773359c 947 t_outgoingProtobufServers.reset();
63341e8d
RG
948
949 return false;
950 }
951
952 /* if the server was not running, or if it was running according to a
953 previous configuration */
b773359c
RG
954 if (!t_outgoingProtobufServers ||
955 t_outgoingProtobufServersGeneration < luaconfsLocal->generation) {
63341e8d 956
b773359c
RG
957 if (t_outgoingProtobufServers) {
958 for (auto& server : *t_outgoingProtobufServers) {
959 server->stop();
960 }
63341e8d 961 }
b773359c 962 t_outgoingProtobufServers.reset();
63341e8d 963
b773359c
RG
964 t_outgoingProtobufServers = startProtobufServers(luaconfsLocal->outgoingProtobufExportConfig);
965 t_outgoingProtobufServersGeneration = luaconfsLocal->generation;
63341e8d
RG
966 }
967
968 return true;
969}
b9fa43e0
OM
970
971#ifdef HAVE_FSTRM
972
10ba6d01 973static std::shared_ptr<std::vector<std::unique_ptr<FrameStreamLogger>>> startFrameStreamServers(const FrameStreamExportConfig& config)
b9fa43e0 974{
10ba6d01 975 auto result = std::make_shared<std::vector<std::unique_ptr<FrameStreamLogger>>>();
b9fa43e0
OM
976
977 for (const auto& server : config.servers) {
978 try {
573f4ff0
OM
979 std::unordered_map<string,unsigned> options;
980 options["bufferHint"] = config.bufferHint;
981 options["flushTimeout"] = config.flushTimeout;
982 options["inputQueueSize"] = config.inputQueueSize;
983 options["outputQueueSize"] = config.outputQueueSize;
984 options["queueNotifyThreshold"] = config.queueNotifyThreshold;
985 options["reopenInterval"] = config.reopenInterval;
dea8a6bc
OM
986 FrameStreamLogger *fsl = nullptr;
987 try {
988 ComboAddress address(server);
989 fsl = new FrameStreamLogger(address.sin4.sin_family, address.toStringWithPort(), true, options);
990 }
991 catch (const PDNSException& e) {
992 fsl = new FrameStreamLogger(AF_UNIX, server, true, options);
993 }
573f4ff0
OM
994 fsl->setLogQueries(config.logQueries);
995 fsl->setLogResponses(config.logResponses);
996 result->emplace_back(fsl);
b9fa43e0
OM
997 }
998 catch(const std::exception& e) {
999 g_log<<Logger::Error<<"Error while starting dnstap framestream logger to '"<<server<<": "<<e.what()<<endl;
1000 }
1001 catch(const PDNSException& e) {
1002 g_log<<Logger::Error<<"Error while starting dnstap framestream logger to '"<<server<<": "<<e.reason<<endl;
1003 }
1004 }
1005
1006 return result;
1007}
1008
1009static bool checkFrameStreamExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
1010{
1011 if (!luaconfsLocal->frameStreamExportConfig.enabled) {
1012 if (t_frameStreamServers) {
1013 // dt's take care of cleanup
1014 t_frameStreamServers.reset();
1015 }
1016
1017 return false;
1018 }
1019
1020 /* if the server was not running, or if it was running according to a
1021 previous configuration */
1022 if (!t_frameStreamServers ||
1023 t_frameStreamServersGeneration < luaconfsLocal->generation) {
1024
1025 if (t_frameStreamServers) {
1026 // dt's take care of cleanup
1027 t_frameStreamServers.reset();
1028 }
1029
1030 t_frameStreamServers = startFrameStreamServers(luaconfsLocal->frameStreamExportConfig);
1031 t_frameStreamServersGeneration = luaconfsLocal->generation;
1032 }
1033
1034 return true;
1035}
1036#endif /* HAVE_FSTRM */
63341e8d
RG
1037#endif /* HAVE_PROTOBUF */
1038
af1377b7 1039#ifdef NOD_ENABLED
41c542ec 1040static bool nodCheckNewDomain(const DNSName& dname)
af1377b7
NC
1041{
1042 static const QType qt(QType::A);
1043 static const uint16_t qc(QClass::IN);
41c542ec 1044 bool ret = false;
af1377b7
NC
1045 // First check the (sub)domain isn't whitelisted for NOD purposes
1046 if (!g_nodDomainWL.check(dname)) {
1047 // Now check the NODDB (note this is probablistic so can have FNs/FPs)
1048 if (t_nodDBp && t_nodDBp->isNewDomain(dname)) {
1049 if (g_nodLog) {
1050 // This should probably log to a dedicated log file
1051 g_log<<Logger::Notice<<"Newly observed domain nod="<<dname.toLogString()<<endl;
1052 }
1053 if (!(g_nodLookupDomain.isRoot())) {
1054 // Send a DNS A query to <domain>.g_nodLookupDomain
1055 DNSName qname = dname;
1056 vector<DNSRecord> dummy;
1057 qname += g_nodLookupDomain;
1058 directResolve(qname, qt, qc, dummy);
1059 }
41c542ec 1060 ret = true;
af1377b7
NC
1061 }
1062 }
41c542ec 1063 return ret;
af1377b7
NC
1064}
1065
1066static void nodAddDomain(const DNSName& dname)
1067{
1068 // Don't bother adding domains on the nod whitelist
1069 if (!g_nodDomainWL.check(dname)) {
1070 if (t_nodDBp) {
1071 // This keeps the nod info up to date
1072 t_nodDBp->addDomain(dname);
1073 }
1074 }
1075}
41c542ec
NC
1076
1077static bool udrCheckUniqueDNSRecord(const DNSName& dname, uint16_t qtype, const DNSRecord& record)
1078{
1079 bool ret = false;
1080 if (record.d_place == DNSResourceRecord::ANSWER ||
1081 record.d_place == DNSResourceRecord::ADDITIONAL) {
1082 // Create a string that represent a triplet of (qname, qtype and RR[type, name, content])
1083 std::stringstream ss;
1084 ss << dname.toDNSStringLC() << ":" << qtype << ":" << qtype << ":" << record.d_type << ":" << record.d_name.toDNSStringLC() << ":" << record.d_content->getZoneRepresentation();
1085 if (t_udrDBp && t_udrDBp->isUniqueResponse(ss.str())) {
ff4d391d
NC
1086 if (g_udrLog) {
1087 // This should also probably log to a dedicated file.
1088 g_log<<Logger::Notice<<"Unique response observed: qname="<<dname.toLogString()<<" qtype="<<QType(qtype).getName()<< " rrtype=" << QType(record.d_type).getName() << " rrname=" << record.d_name.toLogString() << " rrcontent=" << record.d_content->getZoneRepresentation() << endl;
41c542ec
NC
1089 }
1090 ret = true;
1091 }
1092 }
1093 return ret;
1094}
af1377b7
NC
1095#endif /* NOD_ENABLED */
1096
d187038c 1097static void startDoResolve(void *p)
288f4aa9 1098{
9a864da4 1099 auto dc=std::unique_ptr<DNSComboWriter>(reinterpret_cast<DNSComboWriter*>(p));
288f4aa9 1100 try {
5af86fdc
RG
1101 if (t_queryring)
1102 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
92011b8f 1103
32015748 1104 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
7f7b8d55 1105 EDNSOpts edo;
5164bac3 1106 std::vector<pair<uint16_t, string> > ednsOpts;
eb9444be 1107 bool variableAnswer = dc->d_variable;
8e079f3a 1108 bool haveEDNS=false;
ca2526f5
NC
1109#ifdef NOD_ENABLED
1110 bool hasUDR = false;
1111#endif /* NOD_ENABLED */
f1db0de2
PL
1112 DNSPacketWriter::optvect_t returnedEdnsOptions; // Here we stuff all the options for the return packet
1113 uint8_t ednsExtRCode = 0;
8e079f3a 1114 if(getEDNSOpts(dc->d_mdp, &edo)) {
f1db0de2
PL
1115 haveEDNS=true;
1116 if (edo.d_version != 0) {
1117 ednsExtRCode = ERCode::BADVERS;
1118 }
1119
32015748
RG
1120 if(!dc->d_tcp) {
1121 /* rfc6891 6.2.3:
1122 "Values lower than 512 MUST be treated as equal to 512."
1123 */
1124 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
1125 }
5164bac3 1126 ednsOpts = edo.d_options;
3af35968 1127 maxanswersize -= 11; // EDNS header size
b40562da 1128
1f691b94
PL
1129 for (const auto& o : edo.d_options) {
1130 if (o.first == EDNSOptionCode::ECS && g_useIncomingECS && !dc->d_ecsParsed) {
1131 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
1132 } else if (o.first == EDNSOptionCode::NSID) {
f1db0de2 1133 const static string mode_server_id = ::arg()["server-id"];
8a42919a
PL
1134 if(mode_server_id != "disabled" && !mode_server_id.empty() &&
1135 maxanswersize > (2 + 2 + mode_server_id.size())) {
f1db0de2
PL
1136 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::NSID, mode_server_id));
1137 variableAnswer = true; // Can't packetcache an answer with NSID
1138 // Option Code and Option Length are both 2
1139 maxanswersize -= 2 + 2 + mode_server_id.size();
1140 }
b40562da
RG
1141 }
1142 }
10321a98 1143 }
b40562da
RG
1144 /* perhaps there was no EDNS or no ECS but by now we looked */
1145 dc->d_ecsParsed = true;
e325f20c 1146 vector<DNSRecord> ret;
ea634573 1147 vector<uint8_t> packet;
b23b8614 1148
ad42489c 1149 auto luaconfsLocal = g_luaconfs.getLocal();
b8470add
PL
1150 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
1151 bool wantsRPZ(true);
1fbc6dc5 1152 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
f1c7929a 1153 bool logResponse = false;
aa7929a3 1154#ifdef HAVE_PROTOBUF
63341e8d 1155 if (checkProtobufExport(luaconfsLocal)) {
b773359c 1156 logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
5cc8371b 1157 Netmask requestorNM(dc->d_source, dc->d_source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
e1c8a4bb 1158 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
0bd2e252 1159 pbMessage = RecProtoBufMessage(RecProtoBufMessage::Response, dc->d_uuid, &requestor, &dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass, dc->d_mdp.d_header.id, dc->d_tcp, 0);
c165308b 1160 pbMessage->setServerIdentity(SyncRes::s_serverID);
d362f7c1 1161 pbMessage->setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
d9d3f9c1
RG
1162 }
1163#endif /* HAVE_PROTOBUF */
ad42489c 1164
b9fa43e0
OM
1165#ifdef HAVE_FSTRM
1166 checkFrameStreamExport(luaconfsLocal);
1167#endif
1168
3ddb9247 1169 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
ea634573
BH
1170
1171 pw.getHeader()->aa=0;
1172 pw.getHeader()->ra=1;
c154c8a4 1173 pw.getHeader()->qr=1;
bb4bdbaf 1174 pw.getHeader()->tc=0;
ea634573 1175 pw.getHeader()->id=dc->d_mdp.d_header.id;
10321a98 1176 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
57769f13 1177 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
ea634573 1178
70fb28d9
RG
1179 /* This is the lowest TTL seen in the records of the response,
1180 so we can't cache it for longer than this value.
1181 If we have a TTL cap, this value can't be larger than the
1182 cap no matter what. */
1183 uint32_t minTTL = dc->d_ttlCap;
904d3219
PD
1184
1185 SyncRes sr(dc->d_now);
0c43f455 1186
2e921ec6 1187 bool DNSSECOK=false;
3457a2a0 1188 if(t_pdl) {
f26bf547 1189 sr.setLuaEngine(t_pdl);
3457a2a0 1190 }
9eec8c98 1191 if(g_dnssecmode != DNSSECMode::Off) {
30ee601a 1192 sr.setDoDNSSEC(true);
9eec8c98
PL
1193
1194 // Does the requestor want DNSSEC records?
d6c335ab 1195 if(edo.d_extFlags & EDNSOpts::DNSSECOK) {
9eec8c98
PL
1196 DNSSECOK=true;
1197 g_stats.dnssecQueries++;
1198 }
88c33dca
RG
1199 if (dc->d_mdp.d_header.cd) {
1200 /* Per rfc6840 section 5.9, "When processing a request with
1201 the Checking Disabled (CD) bit set, a resolver SHOULD attempt
1202 to return all response data, even data that has failed DNSSEC
1203 validation. */
1204 ++g_stats.dnssecCheckDisabledQueries;
1205 }
1206 if (dc->d_mdp.d_header.ad) {
1207 /* Per rfc6840 section 5.7, "the AD bit in a query as a signal
1208 indicating that the requester understands and is interested in the
1209 value of the AD bit in the response. This allows a requester to
1210 indicate that it understands the AD bit without also requesting
1211 DNSSEC data via the DO bit. */
1212 ++g_stats.dnssecAuthenticDataQueries;
1213 }
9eec8c98
PL
1214 } else {
1215 // Ignore the client-set CD flag
1216 pw.getHeader()->cd=0;
5b9853c9 1217 }
0c43f455
RG
1218 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
1219
4898a348 1220#ifdef HAVE_PROTOBUF
30ee601a 1221 sr.setInitialRequestId(dc->d_uuid);
b773359c 1222 sr.setOutgoingProtobufServers(t_outgoingProtobufServers);
4898a348 1223#endif
b9fa43e0
OM
1224#ifdef HAVE_FSTRM
1225 sr.setFrameStreamServers(t_frameStreamServers);
1226#endif
2fe3354d 1227 sr.setQuerySource(dc->d_remote, g_useIncomingECS && !dc->d_ednssubnet.source.empty() ? boost::optional<const EDNSSubnetOpts&>(dc->d_ednssubnet) : boost::none);
57769f13 1228
904d3219 1229 bool tracedQuery=false; // we could consider letting Lua know about this too
9fc36e90 1230 bool shouldNotValidate = false;
904d3219 1231
ef3b6cd7
RG
1232 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
1233 int res = RCode::NoError;
1f1ca368 1234 DNSFilterEngine::Policy appliedPolicy;
6da513b2 1235 std::vector<DNSRecord> spoofed;
f1c7929a 1236 RecursorLua4::DNSQuestion dq(dc->d_source, dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ, logResponse);
d6c335ab 1237 dq.ednsFlags = &edo.d_extFlags;
5164bac3 1238 dq.ednsOptions = &ednsOpts;
6e505c5e
RG
1239 dq.tag = dc->d_tag;
1240 dq.discardedPolicies = &sr.d_discardedPolicies;
1241 dq.policyTags = &dc->d_policyTags;
1242 dq.appliedPolicy = &appliedPolicy;
1243 dq.currentRecords = &ret;
1244 dq.dh = &dc->d_mdp.d_header;
05c74122 1245 dq.data = dc->d_data;
67e31ebe
RG
1246#ifdef HAVE_PROTOBUF
1247 dq.requestorId = dc->d_requestorId;
590388d2 1248 dq.deviceId = dc->d_deviceId;
67e31ebe 1249#endif
ba21fcfe 1250
6cf96227
PL
1251 if(ednsExtRCode != 0) {
1252 goto sendit;
1253 }
1254
e661a20b 1255 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
56b4d21b
PD
1256 pw.getHeader()->tc = 1;
1257 res = 0;
1258 variableAnswer = true;
e661a20b
PD
1259 goto sendit;
1260 }
1261
f26bf547 1262 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
77499b05
BH
1263 sr.setLogMode(SyncRes::Store);
1264 tracedQuery=true;
1265 }
3ddb9247 1266
8f7473d7 1267
976ec823 1268 if(!g_quiet || tracedQuery) {
e6a9dde5 1269 g_log<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
976ec823 1270 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
b40562da 1271 if(!dc->d_ednssubnet.source.empty()) {
e6a9dde5 1272 g_log<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
6e986f5e 1273 }
e6a9dde5 1274 g_log<<endl;
976ec823 1275 }
c75a6a9e 1276
fededf47 1277 sr.setId(MT->getTid());
67828389 1278 if(!dc->d_mdp.d_header.rd)
c836dc19
BH
1279 sr.setCacheOnly();
1280
f26bf547
RG
1281 if (t_pdl) {
1282 t_pdl->prerpz(dq, res);
0a273054
RG
1283 }
1284
db486de5 1285 // Check if the query has a policy attached to it
0a273054 1286 if (wantsRPZ) {
5cc8371b 1287 appliedPolicy = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_source, sr.d_discardedPolicies);
0a273054 1288 }
644dd1da 1289
54be222b 1290 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
f26bf547 1291 if(!t_pdl || !t_pdl->preresolve(dq, res)) {
b8470add 1292
30ee601a 1293 sr.setWantsRPZ(wantsRPZ);
b8470add
PL
1294 if(wantsRPZ) {
1295 switch(appliedPolicy.d_kind) {
1296 case DNSFilterEngine::PolicyKind::NoAction:
1297 break;
1298 case DNSFilterEngine::PolicyKind::Drop:
1299 g_stats.policyDrops++;
7a25883a 1300 g_stats.policyResults[appliedPolicy.d_kind]++;
b8470add
PL
1301 return;
1302 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1303 g_stats.policyResults[appliedPolicy.d_kind]++;
1304 res=RCode::NXDomain;
1305 goto haveAnswer;
1306 case DNSFilterEngine::PolicyKind::NODATA:
1307 g_stats.policyResults[appliedPolicy.d_kind]++;
1308 res=RCode::NoError;
db486de5 1309 goto haveAnswer;
b8470add
PL
1310 case DNSFilterEngine::PolicyKind::Custom:
1311 g_stats.policyResults[appliedPolicy.d_kind]++;
1312 res=RCode::NoError;
6da513b2
RG
1313 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1314 for (const auto& dr : spoofed) {
1315 ret.push_back(dr);
1316 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1317 }
b8470add
PL
1318 goto haveAnswer;
1319 case DNSFilterEngine::PolicyKind::Truncate:
1320 if(!dc->d_tcp) {
1321 g_stats.policyResults[appliedPolicy.d_kind]++;
1322 res=RCode::NoError;
1323 pw.getHeader()->tc=1;
1324 goto haveAnswer;
1325 }
1326 break;
1327 }
db486de5
PL
1328 }
1329
b8470add 1330 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
44971ca0
PD
1331 try {
1332 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
9fc36e90 1333 shouldNotValidate = sr.wasOutOfBand();
44971ca0
PD
1334 }
1335 catch(ImmediateServFailException &e) {
854d44e3 1336 if(g_logCommonErrors)
e6a9dde5 1337 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
44971ca0
PD
1338 res = RCode::ServFail;
1339 }
4485aa35 1340
1921a4c2
RG
1341 dq.validationState = sr.getValidationState();
1342
b8470add
PL
1343 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
1344 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
1345 appliedPolicy = sr.d_appliedPolicy;
1346 g_stats.policyResults[appliedPolicy.d_kind]++;
1347 switch(appliedPolicy.d_kind) {
1348 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
1349 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
1350 case DNSFilterEngine::PolicyKind::Drop:
1351 g_stats.policyDrops++;
b8470add
PL
1352 return;
1353 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1354 ret.clear();
1355 res=RCode::NXDomain;
1356 goto haveAnswer;
1357
1358 case DNSFilterEngine::PolicyKind::NODATA:
1359 ret.clear();
1360 res=RCode::NoError;
1361 goto haveAnswer;
1362
1363 case DNSFilterEngine::PolicyKind::Truncate:
1364 if(!dc->d_tcp) {
1365 ret.clear();
1366 res=RCode::NoError;
1367 pw.getHeader()->tc=1;
1368 goto haveAnswer;
1369 }
1370 break;
1371
1372 case DNSFilterEngine::PolicyKind::Custom:
1373 ret.clear();
1374 res=RCode::NoError;
6da513b2
RG
1375 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1376 for (const auto& dr : spoofed) {
1377 ret.push_back(dr);
1378 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1379 }
b8470add
PL
1380 goto haveAnswer;
1381 }
1382 }
1383
1384 if (wantsRPZ) {
1f1ca368 1385 appliedPolicy = luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies);
b8470add 1386 }
db486de5 1387
f26bf547 1388 if(t_pdl) {
db486de5
PL
1389 if(res == RCode::NoError) {
1390 auto i=ret.cbegin();
1391 for(; i!= ret.cend(); ++i)
1392 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
1393 break;
f26bf547 1394 if(i == ret.cend() && t_pdl->nodata(dq, res))
3ca4e735
PL
1395 shouldNotValidate = true;
1396
db486de5 1397 }
f26bf547 1398 else if(res == RCode::NXDomain && t_pdl->nxdomain(dq, res))
3ca4e735 1399 shouldNotValidate = true;
db486de5 1400
f26bf547 1401 if(t_pdl->postresolve(dq, res))
3ca4e735 1402 shouldNotValidate = true;
db486de5
PL
1403 }
1404
b8470add
PL
1405 if (wantsRPZ) { //XXX This block is repeated, see above
1406 g_stats.policyResults[appliedPolicy.d_kind]++;
1407 switch(appliedPolicy.d_kind) {
1408 case DNSFilterEngine::PolicyKind::NoAction:
1409 break;
1410 case DNSFilterEngine::PolicyKind::Drop:
1411 g_stats.policyDrops++;
b8470add
PL
1412 return;
1413 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1414 ret.clear();
1415 res=RCode::NXDomain;
1416 goto haveAnswer;
1417
1418 case DNSFilterEngine::PolicyKind::NODATA:
1419 ret.clear();
1420 res=RCode::NoError;
1421 goto haveAnswer;
1422
1423 case DNSFilterEngine::PolicyKind::Truncate:
1424 if(!dc->d_tcp) {
1425 ret.clear();
1426 res=RCode::NoError;
1427 pw.getHeader()->tc=1;
1428 goto haveAnswer;
1429 }
1430 break;
1431
1432 case DNSFilterEngine::PolicyKind::Custom:
1433 ret.clear();
1434 res=RCode::NoError;
6da513b2
RG
1435 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1436 for (const auto& dr : spoofed) {
1437 ret.push_back(dr);
1438 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1439 }
b8470add
PL
1440 goto haveAnswer;
1441 }
644dd1da 1442 }
4485aa35 1443 }
644dd1da 1444 haveAnswer:;
3e8216c8 1445 if(res == PolicyDecision::DROP) {
e9c2ad3a 1446 g_stats.policyDrops++;
ae7e77ad 1447 return;
3ddb9247 1448 }
9cdfab64 1449 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1dc8f4d0 1450 {
85ffbc53
PD
1451 string trace(sr.getTrace());
1452 if(!trace.empty()) {
1453 vector<string> lines;
1454 boost::split(lines, trace, boost::is_any_of("\n"));
1dc8f4d0 1455 for(const string& line : lines) {
85ffbc53 1456 if(!line.empty())
e6a9dde5 1457 g_log<<Logger::Warning<< line << endl;
85ffbc53
PD
1458 }
1459 }
1460 }
3ddb9247 1461
9cdfab64 1462 if(res == -1) {
0fe1d080
PD
1463 pw.getHeader()->rcode=RCode::ServFail;
1464 // no commit here, because no record
1465 g_stats.servFails++;
1466 }
288f4aa9 1467 else {
ea634573 1468 pw.getHeader()->rcode=res;
92011b8f 1469
f3fe4ae6 1470 // Does the validation mode or query demand validation?
0c43f455 1471 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
b25cae9a 1472 try {
f3fe4ae6 1473 if(sr.doLog()) {
e6a9dde5 1474 g_log<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<endl;
2e921ec6 1475 }
4d2be65d
RG
1476
1477 auto state = sr.getValidationState();
1478
b25cae9a 1479 if(state == Secure) {
2e921ec6 1480 if(sr.doLog()) {
e6a9dde5 1481 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates correctly"<<endl;
2e921ec6 1482 }
b25cae9a 1483
1484 // Is the query source interested in the value of the ad-bit?
885c8881 1485 if (dc->d_mdp.d_header.ad || DNSSECOK)
b25cae9a 1486 pw.getHeader()->ad=1;
1487 }
1488 else if(state == Insecure) {
f3fe4ae6 1489 if(sr.doLog()) {
e6a9dde5 1490 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Insecure"<<endl;
12ce523e 1491 }
b25cae9a 1492
1493 pw.getHeader()->ad=0;
f3fe4ae6 1494 }
b25cae9a 1495 else if(state == Bogus) {
66f2e6ad
KM
1496 if(t_bogusremotes)
1497 t_bogusremotes->push_back(dc->d_source);
1498 if(t_bogusqueryring)
1499 t_bogusqueryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
c87e1876 1500 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
e6a9dde5 1501 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Bogus"<<endl;
b25cae9a 1502 }
1503
1504 // Does the query or validation mode sending out a SERVFAIL on validation errors?
885c8881 1505 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
b25cae9a 1506 if(sr.doLog()) {
e6a9dde5 1507 g_log<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
b25cae9a 1508 }
1509
1510 pw.getHeader()->rcode=RCode::ServFail;
1511 goto sendit;
1512 } else {
1513 if(sr.doLog()) {
e6a9dde5 1514 g_log<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
b25cae9a 1515 }
1516 }
1517 }
1518 }
1519 catch(ImmediateServFailException &e) {
1520 if(g_logCommonErrors)
e6a9dde5 1521 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
b25cae9a 1522 pw.getHeader()->rcode=RCode::ServFail;
1523 goto sendit;
f3fe4ae6 1524 }
b3f0ed10 1525 }
1526
c154c8a4 1527 if(ret.size()) {
92476c8b 1528 orderAndShuffle(ret);
5cc8371b 1529 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_source)) {
20d84f77 1530 stable_sort(ret.begin(), ret.end(), *sl);
3e61e7f7 1531 variableAnswer=true;
1532 }
8e079f3a 1533 }
0afa32d4
RG
1534
1535 bool needCommit = false;
8e079f3a 1536 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
3e80ebce
KM
1537 if( ! DNSSECOK &&
1538 ( i->d_type == QType::NSEC3 ||
1539 (
1540 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1541 (
1542 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1543 i->d_place != DNSResourceRecord::ANSWER
1544 )
1545 )
1546 )
1547 ) {
2e921ec6 1548 continue;
3e80ebce
KM
1549 }
1550
70fb28d9 1551 if (!addRecordToPacket(pw, *i, minTTL, dc->d_ttlCap, maxanswersize)) {
97c6d7e5
RG
1552 needCommit = false;
1553 break;
1554 }
1555 needCommit = true;
1556
41c542ec
NC
1557#ifdef NOD_ENABLED
1558 bool udr = false;
1559 if (g_udrEnabled) {
1560 udr = udrCheckUniqueDNSRecord(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, *i);
ca2526f5
NC
1561 if (!hasUDR && udr)
1562 hasUDR = true;
41c542ec
NC
1563 }
1564#endif /* NOD ENABLED */
1565
aa7929a3 1566#ifdef HAVE_PROTOBUF
b773359c 1567 if (t_protobufServers) {
41c542ec
NC
1568#ifdef NOD_ENABLED
1569 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes, udr);
1570#else
0bd2e252 1571 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes);
41c542ec 1572#endif /* NOD_ENABLED */
aa7929a3
RG
1573 }
1574#endif
ea634573 1575 }
0afa32d4 1576 if(needCommit)
8e079f3a 1577 pw.commit();
288f4aa9 1578 }
10321a98 1579 sendit:;
b3f0ed10 1580
a0ddd130 1581 if(g_useIncomingECS && dc->d_ecsFound && !sr.wasVariable() && !variableAnswer) {
9837850d 1582 // cerr<<"Stuffing in a 0 scope because answer is static"<<endl;
5a7f99b4 1583 EDNSSubnetOpts eo;
1584 eo.source = dc->d_ednssubnet.source;
1585 ComboAddress sa;
1ef18cab 1586 sa.reset();
5a7f99b4 1587 sa.sin4.sin_family = eo.source.getNetwork().sin4.sin_family;
1588 eo.scope = Netmask(sa, 0);
1589
1590 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::ECS, makeEDNSSubnetOptsString(eo)));
1591 }
1592
97c6d7e5
RG
1593 if (haveEDNS) {
1594 /* we try to add the EDNS OPT RR even for truncated answers,
1595 as rfc6891 states:
1596 "The minimal response MUST be the DNS header, question section, and an
1597 OPT record. This MUST also occur when a truncated response (using
1598 the DNS header's TC bit) is returned."
1599 */
9b60fb71 1600 pw.addOpt(512, ednsExtRCode, DNSSECOK ? EDNSOpts::DNSSECOK : 0, returnedEdnsOptions);
1f691b94 1601 pw.commit();
97c6d7e5
RG
1602 }
1603
79332bff 1604 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
5cc8371b 1605 updateResponseStats(res, dc->d_source, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
ff4d391d
NC
1606#ifdef NOD_ENABLED
1607 bool nod = false;
1608 if (g_nodEnabled) {
1609 if (nodCheckNewDomain(dc->d_mdp.d_qname))
1610 nod = true;
1611 }
1612#endif /* NOD_ENABLED */
aa7929a3 1613#ifdef HAVE_PROTOBUF
b773359c 1614 if (t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && (!appliedPolicy.d_name || appliedPolicy.d_name->empty()) && dc->d_policyTags.empty())) {
d362f7c1
RG
1615 pbMessage->setBytes(packet.size());
1616 pbMessage->setResponseCode(pw.getHeader()->rcode);
0a273054 1617 if (appliedPolicy.d_name) {
d362f7c1
RG
1618 pbMessage->setAppliedPolicy(*appliedPolicy.d_name);
1619 pbMessage->setAppliedPolicyType(appliedPolicy.d_type);
0a273054 1620 }
d362f7c1 1621 pbMessage->setPolicyTags(dc->d_policyTags);
c29d820c
RG
1622 if (g_useKernelTimestamp && dc->d_kernelTimestamp.tv_sec) {
1623 pbMessage->setQueryTime(dc->d_kernelTimestamp.tv_sec, dc->d_kernelTimestamp.tv_usec);
1624 }
1625 else {
1626 pbMessage->setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1627 }
d362f7c1
RG
1628 pbMessage->setRequestorId(dq.requestorId);
1629 pbMessage->setDeviceId(dq.deviceId);
41c542ec
NC
1630#ifdef NOD_ENABLED
1631 if (g_nodEnabled) {
ca2526f5 1632 if (nod) {
41c542ec 1633 pbMessage->setNOD(true);
ca2526f5
NC
1634 pbMessage->addPolicyTag(g_nod_pbtag);
1635 }
1636 if (hasUDR) {
1637 pbMessage->addPolicyTag(g_udr_pbtag);
1638 }
41c542ec
NC
1639 }
1640#endif /* NOD_ENABLED */
b773359c 1641 protobufLogResponse(*pbMessage);
ac238ea7 1642#ifdef NOD_ENABLED
ca2526f5
NC
1643 if (g_nodEnabled) {
1644 pbMessage->setNOD(false);
1645 pbMessage->clearUDR();
1646 if (nod)
1647 pbMessage->removePolicyTag(g_nod_pbtag);
1648 if (hasUDR)
1649 pbMessage->removePolicyTag(g_udr_pbtag);
1650 }
ac238ea7 1651#endif /* NOD_ENABLED */
aa7929a3
RG
1652 }
1653#endif
ea634573 1654 if(!dc->d_tcp) {
b71b60ee 1655 struct msghdr msgh;
1656 struct iovec iov;
1657 char cbuf[256];
1658 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
2c0af54f
PD
1659 msgh.msg_control=NULL;
1660
cbc03320 1661 if(g_fromtosockets.count(dc->d_socket)) {
fbe2a2e0 1662 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
2c0af54f 1663 }
cbc03320 1664 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
e6a9dde5 1665 g_log<<Logger::Warning<<"Sending UDP reply to client "<<dc->getRemote()<<" failed with: "<<strerror(errno)<<endl;
70fb28d9 1666
49dc532e 1667 if(variableAnswer || sr.wasVariable()) {
1ef18cab 1668 g_stats.variableResponses++;
49dc532e 1669 }
3762e821 1670 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
b5e675a7 1671 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, std::move(dc->d_query), dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
76e2b9e3 1672 string((const char*)&*packet.begin(), packet.size()),
3ddb9247 1673 g_now.tv_sec,
76e2b9e3 1674 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
d9d3f9c1 1675 min(minTTL,SyncRes::s_packetcachettl),
88694a6a 1676 dq.validationState,
08b02366
RG
1677 dc->d_ecsBegin,
1678 dc->d_ecsEnd,
4b0bdd5f 1679 std::move(pbMessage));
1051f8a9 1680 }
3762e821 1681 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
feccc9fc 1682 }
9c495589
BH
1683 else {
1684 char buf[2];
ea634573
BH
1685 buf[0]=packet.size()/256;
1686 buf[1]=packet.size()%256;
feccc9fc 1687
c038218b 1688 Utility::iovec iov[2];
feccc9fc 1689
ea634573
BH
1690 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1691 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
feccc9fc 1692
dd079764 1693 int wret=Utility::writev(dc->d_socket, iov, 2);
0e9d9ce2 1694 bool hadError=true;
feccc9fc 1695
dd079764 1696 if(wret == 0)
e6a9dde5 1697 g_log<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
dd079764 1698 else if(wret < 0 )
e6a9dde5 1699 g_log<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
dd079764 1700 else if((unsigned int)wret != 2 + packet.size())
e6a9dde5 1701 g_log<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
0e9d9ce2 1702 else
18af64a8 1703 hadError=false;
3ddb9247 1704
09e6702a 1705 // update tcp connection status, either by closing or moving to 'BYTE0'
3ddb9247 1706
09e6702a 1707 if(hadError) {
18af64a8 1708 // no need to remove us from FDM, we weren't there
c36bc97a 1709 dc->d_socket = -1;
09e6702a 1710 }
a6ae6414 1711 else {
fde296a3
RG
1712 dc->d_tcpConnection->queriesCount++;
1713 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1714 dc->d_socket = -1;
1715 }
1716 else {
1717 dc->d_tcpConnection->state=TCPConnection::BYTE0;
1718 Utility::gettimeofday(&g_now, 0); // needs to be updated
27ae2e3c
RG
1719 struct timeval ttd = g_now;
1720 ttd.tv_sec += g_tcpTimeout;
1721
1722 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection, &ttd);
fde296a3 1723 }
0e9d9ce2 1724 }
9c495589 1725 }
2c9119cd 1726 float spent=makeFloat(sr.getNow()-dc->d_now);
1d5b3ce6 1727 if(!g_quiet) {
e6a9dde5
PL
1728 g_log<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1729 g_log<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
2c9119cd 1730 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
1731 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
1732
1733 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
e6a9dde5 1734 g_log<< ", dnssec="<<vStates[sr.getValidationState()];
2c9119cd 1735 }
1736
e6a9dde5 1737 g_log<<endl;
2c9119cd 1738
c75a6a9e 1739 }
b23b8614 1740
f7b8cffa
RG
1741 if (sr.d_outqueries || sr.d_authzonequeries) {
1742 t_RC->cacheMisses++;
1743 }
1744 else {
1745 t_RC->cacheHits++;
1746 }
2c9119cd 1747
fe213470
BH
1748 if(spent < 0.001)
1749 g_stats.answers0_1++;
1750 else if(spent < 0.010)
1751 g_stats.answers1_10++;
1752 else if(spent < 0.1)
1753 g_stats.answers10_100++;
1754 else if(spent < 1.0)
1755 g_stats.answers100_1000++;
1756 else
1757 g_stats.answersSlow++;
1758
574af7ea 1759 uint64_t newLat=(uint64_t)(spent*1000000);
b841314c 1760 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
08f3f638 1761 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
0a6b1027 1762 // no worries, we do this for packet cache hits elsewhere
19178da9 1763
1764 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
1765 if(ourtime < 1)
1766 g_stats.ourtime0_1++;
1767 else if(ourtime < 2)
1768 g_stats.ourtime1_2++;
1769 else if(ourtime < 4)
1770 g_stats.ourtime2_4++;
1771 else if(ourtime < 8)
1772 g_stats.ourtime4_8++;
1773 else if(ourtime < 16)
1774 g_stats.ourtime8_16++;
1775 else if(ourtime < 32)
1776 g_stats.ourtime16_32++;
1777 else {
1778 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1779 g_stats.ourtimeSlow++;
1780 }
042da1a1 1781 if(ourtime >= 0.0) {
1782 newLat=ourtime*1000; // usec
1783 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
1784 }
c6d04bdc 1785 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
288f4aa9 1786 }
3f81d239 1787 catch(PDNSException &ae) {
e6a9dde5 1788 g_log<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
288f4aa9 1789 }
16ce7f18
JS
1790 catch(const MOADNSException &mde) {
1791 g_log<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<mde.what()<<endl;
7b1469bb 1792 }
fdbf35ac 1793 catch(std::exception& e) {
e6a9dde5 1794 g_log<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
068c7634
PD
1795
1796 // Luawrapper nests the exception from Lua, so we unnest it here
1797 try {
1798 std::rethrow_if_nested(e);
2010ac95 1799 } catch(const std::exception& ne) {
e6a9dde5 1800 g_log<<". Extra info: "<<ne.what();
068c7634
PD
1801 } catch(...) {}
1802
e6a9dde5 1803 g_log<<endl;
c154c8a4 1804 }
288f4aa9 1805 catch(...) {
e6a9dde5 1806 g_log<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
288f4aa9 1807 }
3ddb9247 1808
ec6eacbc 1809 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
288f4aa9
BH
1810}
1811
d187038c 1812static void makeControlChannelSocket(int processNum=-1)
1d5b3ce6 1813{
2d733c0f 1814 string sockname=::arg()["socket-dir"]+"/"+s_programname;
677e2a46 1815 if(processNum >= 0)
335da0ba 1816 sockname += "."+std::to_string(processNum);
677e2a46 1817 sockname+=".controlsocket";
41f7a068 1818 s_rcc.listen(sockname);
3ddb9247 1819
387de317
BH
1820 int sockowner = -1;
1821 int sockgroup = -1;
1822
1823 if (!::arg().isEmpty("socket-group"))
1824 sockgroup=::arg().asGid("socket-group");
1825 if (!::arg().isEmpty("socket-owner"))
1826 sockowner=::arg().asUid("socket-owner");
3ddb9247 1827
f838ad8d
BH
1828 if (sockgroup > -1 || sockowner > -1) {
1829 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1830 unixDie("Failed to chown control socket");
1831 }
1832 }
387de317
BH
1833
1834 // do mode change if socket-mode is given
1835 if(!::arg().isEmpty("socket-mode")) {
1836 mode_t sockmode=::arg().asMode("socket-mode");
34c513f9
RG
1837 if(chmod(sockname.c_str(), sockmode) < 0) {
1838 unixDie("Failed to chmod control socket");
1839 }
387de317 1840 }
1d5b3ce6
BH
1841}
1842
5cc8371b 1843static void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass,
29e6303a 1844 bool& foundECS, EDNSSubnetOpts* ednssubnet, EDNSOptionViewMap* options,
5cc8371b 1845 bool& foundXPF, ComboAddress* xpfSource, ComboAddress* xpfDest)
02b47f43 1846{
59cb4a79 1847 const bool lookForXPF = xpfSource != nullptr && g_xpfRRCode != 0;
5cc8371b
RG
1848 const bool lookForECS = ednssubnet != nullptr;
1849 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(question.c_str());
02b47f43
RG
1850 size_t questionLen = question.length();
1851 unsigned int consumed=0;
1852 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1853
1854 size_t pos= sizeof(dnsheader)+consumed+4;
5cc8371b
RG
1855 const size_t headerSize = /* root */ 1 + sizeof(dnsrecordheader);
1856 const uint16_t arcount = ntohs(dh->arcount);
1857
1858 for (uint16_t arpos = 0; arpos < arcount && questionLen > (pos + headerSize) && ((lookForECS && !foundECS) || (lookForXPF && !foundXPF)); arpos++) {
1859 if (question.at(pos) != 0) {
1860 /* not an OPT or a XPF, bye. */
1861 return;
1862 }
1863
1864 pos += 1;
1865 const dnsrecordheader* drh = reinterpret_cast<const dnsrecordheader*>(&question.at(pos));
1866 pos += sizeof(dnsrecordheader);
1867
1868 if (pos >= questionLen) {
1869 return;
1870 }
1871
02b47f43 1872 /* OPT root label (1) followed by type (2) */
5cc8371b 1873 if(lookForECS && ntohs(drh->d_type) == QType::OPT) {
00b8cadc
RG
1874 if (!options) {
1875 char* ecsStart = nullptr;
1876 size_t ecsLen = 0;
5cc8371b
RG
1877 /* we need to pass the record len */
1878 int res = getEDNSOption(const_cast<char*>(reinterpret_cast<const char*>(&question.at(pos - sizeof(drh->d_clen)))), questionLen - pos + sizeof(drh->d_clen), EDNSOptionCode::ECS, &ecsStart, &ecsLen);
00b8cadc
RG
1879 if (res == 0 && ecsLen > 4) {
1880 EDNSSubnetOpts eso;
1881 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1882 *ednssubnet=eso;
5cc8371b 1883 foundECS = true;
00b8cadc
RG
1884 }
1885 }
1886 }
1887 else {
5cc8371b
RG
1888 /* we need to pass the record len */
1889 int res = getEDNSOptions(reinterpret_cast<const char*>(&question.at(pos -sizeof(drh->d_clen))), questionLen - pos + (sizeof(drh->d_clen)), *options);
00b8cadc
RG
1890 if (res == 0) {
1891 const auto& it = options->find(EDNSOptionCode::ECS);
29e6303a 1892 if (it != options->end() && !it->second.values.empty() && it->second.values.at(0).content != nullptr && it->second.values.at(0).size > 0) {
00b8cadc 1893 EDNSSubnetOpts eso;
29e6303a 1894 if(getEDNSSubnetOptsFromString(it->second.values.at(0).content, it->second.values.at(0).size, &eso)) {
00b8cadc 1895 *ednssubnet=eso;
5cc8371b 1896 foundECS = true;
00b8cadc
RG
1897 }
1898 }
02b47f43
RG
1899 }
1900 }
1901 }
59cb4a79 1902 else if (lookForXPF && ntohs(drh->d_type) == g_xpfRRCode && ntohs(drh->d_class) == QClass::IN && drh->d_ttl == 0) {
5cc8371b
RG
1903 if ((questionLen - pos) < ntohs(drh->d_clen)) {
1904 return;
1905 }
1906
1907 foundXPF = parseXPFPayload(reinterpret_cast<const char*>(&question.at(pos)), ntohs(drh->d_clen), *xpfSource, xpfDest);
1908 }
1909
1910 pos += ntohs(drh->d_clen);
02b47f43
RG
1911 }
1912}
1913
d187038c 1914static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1915{
cd989c87 1916 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
c038218b 1917
879b3f70 1918 if(conn->state==TCPConnection::BYTE0) {
2749c3fe 1919 ssize_t bytes=recv(conn->getFD(), &conn->data[0], 2, 0);
09e6702a 1920 if(bytes==1)
667f7e60 1921 conn->state=TCPConnection::BYTE1;
3ddb9247 1922 if(bytes==2) {
a0aa4f64 1923 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2749c3fe 1924 conn->data.resize(conn->qlen);
667f7e60
BH
1925 conn->bytesread=0;
1926 conn->state=TCPConnection::GETQUESTION;
09e6702a
BH
1927 }
1928 if(!bytes || bytes < 0) {
bb4bdbaf 1929 t_fdm->removeReadFD(fd);
09e6702a
BH
1930 return;
1931 }
1932 }
667f7e60 1933 else if(conn->state==TCPConnection::BYTE1) {
2749c3fe 1934 ssize_t bytes=recv(conn->getFD(), &conn->data[1], 1, 0);
09e6702a 1935 if(bytes==1) {
667f7e60 1936 conn->state=TCPConnection::GETQUESTION;
a0aa4f64 1937 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2749c3fe 1938 conn->data.resize(conn->qlen);
667f7e60 1939 conn->bytesread=0;
09e6702a
BH
1940 }
1941 if(!bytes || bytes < 0) {
1942 if(g_logCommonErrors)
e6a9dde5 1943 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected after first byte"<<endl;
bb4bdbaf 1944 t_fdm->removeReadFD(fd);
09e6702a
BH
1945 return;
1946 }
1947 }
667f7e60 1948 else if(conn->state==TCPConnection::GETQUESTION) {
2749c3fe 1949 ssize_t bytes=recv(conn->getFD(), &conn->data[conn->bytesread], conn->qlen - conn->bytesread, 0);
f9d67b41 1950 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
c0f9be19
RG
1951 if(g_logCommonErrors) {
1952 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected while reading question body"<<endl;
1953 }
bb4bdbaf 1954 t_fdm->removeReadFD(fd);
09e6702a
BH
1955 return;
1956 }
b841314c 1957 conn->bytesread+=(uint16_t)bytes;
667f7e60 1958 if(conn->bytesread==conn->qlen) {
bb4bdbaf 1959 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
879b3f70 1960
9a864da4 1961 std::unique_ptr<DNSComboWriter> dc;
09e6702a 1962 try {
9a864da4 1963 dc=std::unique_ptr<DNSComboWriter>(new DNSComboWriter(conn->data, g_now));
09e6702a 1964 }
16ce7f18 1965 catch(const MOADNSException &mde) {
3ddb9247 1966 g_stats.clientParseError++;
4957a608 1967 if(g_logCommonErrors)
e6a9dde5 1968 g_log<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4957a608 1969 return;
09e6702a 1970 }
cd989c87
BH
1971 dc->d_tcpConnection = conn; // carry the torch
1972 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
09e6702a 1973 dc->d_tcp=true;
5cc8371b
RG
1974 dc->setRemote(conn->d_remote);
1975 dc->setSource(conn->d_remote);
a6147cd2 1976 ComboAddress dest;
d38e2ba9 1977 dest.reset();
a6147cd2 1978 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1979 socklen_t len = dest.getSocklen();
1980 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1981 dc->setLocal(dest);
5cc8371b 1982 dc->setDestination(dest);
33dcceba
RG
1983 DNSName qname;
1984 uint16_t qtype=0;
1985 uint16_t qclass=0;
1986 bool needECS = false;
5cc8371b 1987 bool needXPF = g_XPFAcl.match(conn->d_remote);
67e31ebe 1988 string requestorId;
590388d2 1989 string deviceId;
16bbc6e3 1990 bool logQuery = false;
aa7929a3 1991#ifdef HAVE_PROTOBUF
02b47f43 1992 auto luaconfsLocal = g_luaconfs.getLocal();
63341e8d 1993 if (checkProtobufExport(luaconfsLocal)) {
33dcceba
RG
1994 needECS = true;
1995 }
b773359c 1996 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
b9fa43e0
OM
1997#endif /* HAVE_PROTOBUF */
1998
1999#ifdef HAVE_FSTRM
2000 checkFrameStreamExport(luaconfsLocal);
33dcceba
RG
2001#endif
2002
70fb28d9 2003 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag_ffi || t_pdl->d_gettag))) {
33dcceba
RG
2004
2005 try {
29e6303a 2006 EDNSOptionViewMap ednsOptions;
5cc8371b 2007 bool xpfFound = false;
b40562da 2008 dc->d_ecsParsed = true;
5cc8371b 2009 dc->d_ecsFound = false;
2749c3fe 2010 getQNameAndSubnet(conn->data, &qname, &qtype, &qclass,
5cc8371b
RG
2011 dc->d_ecsFound, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2012 xpfFound, needXPF ? &dc->d_source : nullptr, needXPF ? &dc->d_destination : nullptr);
02b47f43 2013
70fb28d9 2014 if(t_pdl) {
33dcceba 2015 try {
70fb28d9 2016 if (t_pdl->d_gettag_ffi) {
f1c7929a 2017 dc->d_tag = t_pdl->gettag_ffi(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, dc->d_ttlCap, dc->d_variable, logQuery);
70fb28d9
RG
2018 }
2019 else if (t_pdl->d_gettag) {
2020 dc->d_tag = t_pdl->gettag(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId);
2021 }
33dcceba 2022 }
70fb28d9 2023 catch(const std::exception& e) {
33dcceba 2024 if(g_logCommonErrors)
e6a9dde5 2025 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
33dcceba
RG
2026 }
2027 }
2028 }
70fb28d9 2029 catch(const std::exception& e)
33dcceba
RG
2030 {
2031 if(g_logCommonErrors)
e6a9dde5 2032 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
33dcceba
RG
2033 }
2034 }
f52177c3
RG
2035
2036 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(&conn->data[0]);
2037
33dcceba 2038#ifdef HAVE_PROTOBUF
b773359c 2039 if(t_protobufServers || t_outgoingProtobufServers) {
67e31ebe 2040 dc->d_requestorId = requestorId;
590388d2 2041 dc->d_deviceId = deviceId;
d61aa945 2042 dc->d_uuid = getUniqueID();
4898a348 2043 }
02b47f43 2044
b773359c 2045 if(t_protobufServers) {
02b47f43 2046 try {
02b47f43 2047
845cbf4c 2048 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && dc->d_policyTags.empty())) {
b773359c 2049 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, dc->d_source, dc->d_destination, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId);
b790ef3d 2050 }
02b47f43
RG
2051 }
2052 catch(std::exception& e) {
2053 if(g_logCommonErrors)
e6a9dde5 2054 g_log<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
02b47f43
RG
2055 }
2056 }
aa7929a3 2057#endif
5034517a
RG
2058 if(t_pdl) {
2059 if(t_pdl->ipfilter(dc->d_source, dc->d_destination, *dh)) {
2060 if(!g_quiet)
2061 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED TCP question from "<<dc->d_source.toStringWithPort()<<(dc->d_source != dc->d_remote ? " (via "+dc->d_remote.toStringWithPort()+")" : "")<<" based on policy"<<endl;
2062 g_stats.policyDrops++;
2063 return;
2064 }
2065 }
2066
879b3f70 2067 if(dc->d_mdp.d_header.qr) {
048f5db6 2068 g_stats.ignoredCount++;
c0f9be19
RG
2069 if(g_logCommonErrors) {
2070 g_log<<Logger::Error<<"Ignoring answer from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
2071 }
4957a608 2072 return;
879b3f70 2073 }
3abcdab2 2074 if(dc->d_mdp.d_header.opcode) {
048f5db6 2075 g_stats.ignoredCount++;
c0f9be19
RG
2076 if(g_logCommonErrors) {
2077 g_log<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
2078 }
c0f9be19
RG
2079 return;
2080 }
2081 else if (dh->qdcount == 0) {
2082 g_stats.emptyQueriesCount++;
2083 if(g_logCommonErrors) {
2084 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<< dc->getRemote() <<" on server socket!"<<endl;
2085 }
3abcdab2
PD
2086 return;
2087 }
09e6702a 2088 else {
4957a608
BH
2089 ++g_stats.qcounter;
2090 ++g_stats.tcpqcounter;
9a864da4 2091 MT->makeThread(startDoResolve, dc.release()); // deletes dc, will set state to BYTE0 again
4957a608 2092 return;
09e6702a
BH
2093 }
2094 }
2095 }
2096}
2097
6dcd28c3 2098//! Handle new incoming TCP connection
d187038c 2099static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
09e6702a 2100{
37d3f960 2101 ComboAddress addr;
09e6702a 2102 socklen_t addrlen=sizeof(addr);
a683e8bd 2103 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
b841314c 2104 if(newsock>=0) {
85c32340
BH
2105 if(MT->numProcesses() > g_maxMThreads) {
2106 g_stats.overCapacityDrops++;
a7b68ae7
RG
2107 try {
2108 closesocket(newsock);
2109 }
2110 catch(const PDNSException& e) {
e6a9dde5 2111 g_log<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
a7b68ae7 2112 }
85c32340
BH
2113 return;
2114 }
2115
92011b8f 2116 if(t_remotes)
2117 t_remotes->push_back(addr);
49a699c4 2118 if(t_allowFrom && !t_allowFrom->match(&addr)) {
3ddb9247 2119 if(!g_quiet)
e6a9dde5 2120 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2914b022 2121
09e6702a 2122 g_stats.unauthorizedTCP++;
a7b68ae7
RG
2123 try {
2124 closesocket(newsock);
2125 }
2126 catch(const PDNSException& e) {
e6a9dde5 2127 g_log<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
a7b68ae7 2128 }
09e6702a
BH
2129 return;
2130 }
bd0289fc 2131 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
09e6702a 2132 g_stats.tcpClientOverflow++;
a7b68ae7
RG
2133 try {
2134 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
2135 }
2136 catch(const PDNSException& e) {
e6a9dde5 2137 g_log<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
a7b68ae7 2138 }
09e6702a
BH
2139 return;
2140 }
3ddb9247 2141
3897b9e1 2142 setNonBlocking(newsock);
f26bf547 2143 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
cd989c87 2144 tc->state=TCPConnection::BYTE0;
3ddb9247 2145
27ae2e3c
RG
2146 struct timeval ttd;
2147 Utility::gettimeofday(&ttd, 0);
2148 ttd.tv_sec += g_tcpTimeout;
c038218b 2149
27ae2e3c 2150 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc, &ttd);
09e6702a
BH
2151 }
2152}
3ddb9247 2153
d187038c 2154static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
1bc3c142 2155{
183eb877 2156 gettimeofday(&g_now, 0);
c29d820c
RG
2157 if (tv.tv_sec) {
2158 struct timeval diff = g_now - tv;
2159 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
183eb877 2160
c29d820c
RG
2161 if(delta > 1000.0) {
2162 g_stats.tooOldDrops++;
2163 return nullptr;
2164 }
b71b60ee 2165 }
2166
1bc3c142 2167 ++g_stats.qcounter;
d7f10541
BH
2168 if(fromaddr.sin4.sin_family==AF_INET6)
2169 g_stats.ipv6qcounter++;
1bc3c142
BH
2170
2171 string response;
93f0da94 2172 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
49a3500d 2173 unsigned int ctag=0;
f57486f1 2174 uint32_t qhash = 0;
12aff2e5 2175 bool needECS = false;
5cc8371b 2176 bool needXPF = g_XPFAcl.match(fromaddr);
02b47f43 2177 std::vector<std::string> policyTags;
5fd2577f 2178 LuaContext::LuaObject data;
5cc8371b
RG
2179 ComboAddress source = fromaddr;
2180 ComboAddress destination = destaddr;
67e31ebe 2181 string requestorId;
590388d2 2182 string deviceId;
16bbc6e3 2183 bool logQuery = false;
12aff2e5 2184#ifdef HAVE_PROTOBUF
02b47f43 2185 boost::uuids::uuid uniqueId;
02b47f43 2186 auto luaconfsLocal = g_luaconfs.getLocal();
63341e8d 2187 if (checkProtobufExport(luaconfsLocal)) {
d61aa945 2188 uniqueId = getUniqueID();
02b47f43 2189 needECS = true;
63341e8d 2190 } else if (checkOutgoingProtobufExport(luaconfsLocal)) {
d61aa945 2191 uniqueId = getUniqueID();
02b47f43 2192 }
b773359c
RG
2193 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
2194 bool logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
b9fa43e0
OM
2195#endif
2196#ifdef HAVE_FSTRM
2197 checkFrameStreamExport(luaconfsLocal);
12aff2e5 2198#endif
b40562da
RG
2199 EDNSSubnetOpts ednssubnet;
2200 bool ecsFound = false;
2201 bool ecsParsed = false;
08b02366
RG
2202 uint16_t ecsBegin = 0;
2203 uint16_t ecsEnd = 0;
70fb28d9
RG
2204 uint32_t ttlCap = std::numeric_limits<uint32_t>::max();
2205 bool variable = false;
1bc3c142 2206 try {
02b47f43
RG
2207 DNSName qname;
2208 uint16_t qtype=0;
2209 uint16_t qclass=0;
1bc3c142 2210 uint32_t age;
c15ff3df 2211 bool qnameParsed=false;
8f7473d7 2212#ifdef MALLOC_TRACE
2213 /*
2214 static uint64_t last=0;
2215 if(!last)
2216 g_mtracer->clearAllocators();
2217 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
2218 last=g_mtracer->getAllocs();
2219 cout<<g_mtracer->topAllocatorsString()<<endl;
2220 g_mtracer->clearAllocators();
2221 */
2222#endif
55a1378f 2223
70fb28d9 2224 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag || t_pdl->d_gettag_ffi))) {
b2eacd67 2225 try {
29e6303a 2226 EDNSOptionViewMap ednsOptions;
5cc8371b
RG
2227 bool xpfFound = false;
2228
2229 ecsFound = false;
2230
2231 getQNameAndSubnet(question, &qname, &qtype, &qclass,
2232 ecsFound, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2233 xpfFound, needXPF ? &source : nullptr, needXPF ? &destination : nullptr);
2234
c15ff3df
RG
2235 qnameParsed = true;
2236 ecsParsed = true;
12aff2e5 2237
70fb28d9 2238 if(t_pdl) {
12aff2e5 2239 try {
70fb28d9 2240 if (t_pdl->d_gettag_ffi) {
f1c7929a 2241 ctag = t_pdl->gettag_ffi(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, ttlCap, variable, logQuery);
70fb28d9
RG
2242 }
2243 else if (t_pdl->d_gettag) {
2244 ctag = t_pdl->gettag(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId);
2245 }
12aff2e5 2246 }
70fb28d9 2247 catch(const std::exception& e) {
12aff2e5 2248 if(g_logCommonErrors)
e6a9dde5 2249 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 2250 }
8ea8c302 2251 }
b2eacd67 2252 }
70fb28d9 2253 catch(const std::exception& e)
b2eacd67 2254 {
2255 if(g_logCommonErrors)
e6a9dde5 2256 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 2257 }
12ce523e 2258 }
3ddb9247 2259
02b47f43 2260 bool cacheHit = false;
1fbc6dc5 2261 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
02b47f43 2262#ifdef HAVE_PROTOBUF
b773359c 2263 if (t_protobufServers) {
d362f7c1 2264 pbMessage = RecProtoBufMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
c165308b 2265 pbMessage->setServerIdentity(SyncRes::s_serverID);
845cbf4c 2266 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && policyTags.empty())) {
b773359c 2267 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, source, destination, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId);
b790ef3d 2268 }
d9d3f9c1
RG
2269 }
2270#endif /* HAVE_PROTOBUF */
02b47f43 2271
70fb28d9
RG
2272 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
2273 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
2274 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
8467ec26 2275 vState valState;
c15ff3df 2276 if (qnameParsed) {
08b02366 2277 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
c15ff3df
RG
2278 }
2279 else {
08b02366 2280 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, &qtype, &qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
c15ff3df
RG
2281 }
2282
d9d3f9c1 2283 if (cacheHit) {
8467ec26
KM
2284 if(valState == Bogus) {
2285 if(t_bogusremotes)
2286 t_bogusremotes->push_back(source);
2287 if(t_bogusqueryring)
2288 t_bogusqueryring->push_back(make_pair(qname, qtype));
2289 }
2290
d9d3f9c1 2291#ifdef HAVE_PROTOBUF
b773359c 2292 if(t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && pbMessage->getAppliedPolicy().empty() && pbMessage->getPolicyTags().empty())) {
5cc8371b 2293 Netmask requestorNM(source, source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
e1c8a4bb 2294 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
d362f7c1
RG
2295 pbMessage->update(uniqueId, &requestor, &destination, false, dh->id);
2296 pbMessage->setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
c29d820c
RG
2297 if (g_useKernelTimestamp && tv.tv_sec) {
2298 pbMessage->setQueryTime(tv.tv_sec, tv.tv_usec);
2299 }
2300 else {
2301 pbMessage->setQueryTime(g_now.tv_sec, g_now.tv_usec);
2302 }
d362f7c1
RG
2303 pbMessage->setRequestorId(requestorId);
2304 pbMessage->setDeviceId(deviceId);
b773359c 2305 protobufLogResponse(*pbMessage);
02b47f43 2306 }
d9d3f9c1 2307#endif /* HAVE_PROTOBUF */
49a3500d 2308 if(!g_quiet)
e6a9dde5 2309 g_log<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<endl;
8f7473d7 2310
1bc3c142
BH
2311 g_stats.packetCacheHits++;
2312 SyncRes::s_queries++;
2313 ageDNSPacket(response, age);
b71b60ee 2314 struct msghdr msgh;
2315 struct iovec iov;
2316 char cbuf[256];
2317 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
2c0af54f
PD
2318 msgh.msg_control=NULL;
2319
cbc03320 2320 if(g_fromtosockets.count(fd)) {
fbe2a2e0 2321 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
b71b60ee 2322 }
cbc03320 2323 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
e6a9dde5 2324 g_log<<Logger::Warning<<"Sending UDP reply to client "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" failed with: "<<strerror(errno)<<endl;
b71b60ee 2325
97bee66d 2326 if(response.length() >= sizeof(struct dnsheader)) {
dd079764
RG
2327 struct dnsheader tmpdh;
2328 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
5cc8371b 2329 updateResponseStats(tmpdh.rcode, source, response.length(), 0, 0);
97bee66d 2330 }
08f3f638 2331 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
19178da9 2332 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
1bc3c142
BH
2333 return 0;
2334 }
3ddb9247 2335 }
1bc3c142 2336 catch(std::exception& e) {
e6a9dde5 2337 g_log<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1bc3c142
BH
2338 return 0;
2339 }
3ddb9247 2340
f26bf547 2341 if(t_pdl) {
5cc8371b 2342 if(t_pdl->ipfilter(source, destination, *dh)) {
4ea94941 2343 if(!g_quiet)
e6a9dde5 2344 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" based on policy"<<endl;
4ea94941 2345 g_stats.policyDrops++;
2346 return 0;
2347 }
2348 }
2349
1bc3c142 2350 if(MT->numProcesses() > g_maxMThreads) {
461df9d2 2351 if(!g_quiet)
e6a9dde5 2352 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<", over capacity"<<endl;
461df9d2 2353
1bc3c142
BH
2354 g_stats.overCapacityDrops++;
2355 return 0;
2356 }
3ddb9247 2357
9a864da4 2358 auto dc = std::unique_ptr<DNSComboWriter>(new DNSComboWriter(question, g_now, std::move(policyTags), std::move(data)));
1bc3c142 2359 dc->setSocket(fd);
49a3500d 2360 dc->d_tag=ctag;
e9f63d47 2361 dc->d_qhash=qhash;
5cc8371b
RG
2362 dc->setRemote(fromaddr);
2363 dc->setSource(source);
b71b60ee 2364 dc->setLocal(destaddr);
5cc8371b 2365 dc->setDestination(destination);
1bc3c142 2366 dc->d_tcp=false;
b40562da
RG
2367 dc->d_ecsFound = ecsFound;
2368 dc->d_ecsParsed = ecsParsed;
08b02366
RG
2369 dc->d_ecsBegin = ecsBegin;
2370 dc->d_ecsEnd = ecsEnd;
b40562da 2371 dc->d_ednssubnet = ednssubnet;
70fb28d9
RG
2372 dc->d_ttlCap = ttlCap;
2373 dc->d_variable = variable;
aa7929a3 2374#ifdef HAVE_PROTOBUF
b773359c 2375 if (t_protobufServers || t_outgoingProtobufServers) {
5164bac3 2376 dc->d_uuid = std::move(uniqueId);
d9d3f9c1 2377 }
67e31ebe 2378 dc->d_requestorId = requestorId;
590388d2 2379 dc->d_deviceId = deviceId;
c29d820c 2380 dc->d_kernelTimestamp = tv;
aa7929a3
RG
2381#endif
2382
9a864da4 2383 MT->makeThread(startDoResolve, (void*) dc.release()); // deletes dc
1bc3c142 2384 return 0;
3ddb9247
PD
2385}
2386
b71b60ee 2387
d187038c 2388static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
5db529f8 2389{
a683e8bd 2390 ssize_t len;
12c2f2b9 2391 static const size_t maxIncomingQuerySize = 512;
04896b99 2392 static thread_local std::string data;
5db529f8 2393 ComboAddress fromaddr;
b71b60ee 2394 struct msghdr msgh;
2395 struct iovec iov;
2396 char cbuf[256];
390f1dab 2397 bool firstQuery = true;
b71b60ee 2398
c0a00acd
RG
2399 for(size_t queriesCounter = 0; queriesCounter < s_maxUDPQueriesPerRound; queriesCounter++) {
2400 data.resize(maxIncomingQuerySize);
2401 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
2402 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), &data[0], data.size(), &fromaddr);
b71b60ee 2403
c0a00acd 2404 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
390f1dab 2405
c0a00acd 2406 firstQuery = false;
390f1dab 2407
c0a00acd
RG
2408 if (static_cast<size_t>(len) < sizeof(dnsheader)) {
2409 g_stats.ignoredCount++;
2410 if (!g_quiet) {
2411 g_log<<Logger::Error<<"Ignoring too-short ("<<std::to_string(len)<<") query from "<<fromaddr.toString()<<endl;
2412 }
2413 return;
04896b99 2414 }
04896b99 2415
c0a00acd
RG
2416 if (msgh.msg_flags & MSG_TRUNC) {
2417 g_stats.truncatedDrops++;
2418 if (!g_quiet) {
2419 g_log<<Logger::Error<<"Ignoring truncated query from "<<fromaddr.toString()<<endl;
2420 }
2421 return;
ba892c7f 2422 }
b23b8614 2423
c0a00acd
RG
2424 if(t_remotes) {
2425 t_remotes->push_back(fromaddr);
2426 }
81859ba5 2427
c0a00acd
RG
2428 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
2429 if(!g_quiet) {
2430 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2431 }
3ddb9247 2432
c0a00acd
RG
2433 g_stats.unauthorizedUDP++;
2434 return;
5db529f8 2435 }
c0a00acd
RG
2436 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
2437 if(!fromaddr.sin4.sin_port) { // also works for IPv6
2438 if(!g_quiet) {
2439 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
2440 }
2441
2442 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
2443 return;
3abcdab2 2444 }
c0a00acd
RG
2445
2446 try {
2447 data.resize(static_cast<size_t>(len));
2448 dnsheader* dh=(dnsheader*)&data[0];
2449
2450 if(dh->qr) {
2451 g_stats.ignoredCount++;
2452 if(g_logCommonErrors) {
2453 g_log<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
2454 }
2455 }
2456 else if(dh->opcode) {
2457 g_stats.ignoredCount++;
2458 if(g_logCommonErrors) {
2459 g_log<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
2460 }
a6147cd2 2461 }
c0f9be19
RG
2462 else if (dh->qdcount == 0) {
2463 g_stats.emptyQueriesCount++;
2464 if(g_logCommonErrors) {
2465 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<<fromaddr.toString()<<" on server socket!"<<endl;
2466 }
2467 }
a6147cd2 2468 else {
c0a00acd
RG
2469 struct timeval tv={0,0};
2470 HarvestTimestamp(&msgh, &tv);
2471 ComboAddress dest;
2472 dest.reset(); // this makes sure we ignore this address if not returned by recvmsg above
2473 auto loc = rplookup(g_listenSocketsAddresses, fd);
2474 if(HarvestDestinationAddress(&msgh, &dest)) {
2475 // but.. need to get port too
2476 if(loc) {
2477 dest.sin4.sin_port = loc->sin4.sin_port;
2478 }
a6147cd2 2479 }
2480 else {
c0a00acd
RG
2481 if(loc) {
2482 dest = *loc;
2483 }
2484 else {
2485 dest.sin4.sin_family = fromaddr.sin4.sin_family;
2486 socklen_t slen = dest.getSocklen();
2487 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
2488 }
2489 }
2490
2491 if(g_weDistributeQueries) {
2492 distributeAsyncFunction(data, boost::bind(doProcessUDPQuestion, data, fromaddr, dest, tv, fd));
2493 }
2494 else {
144040be 2495 ++s_threadInfos[t_id].numberOfDistributedQueries;
c0a00acd 2496 doProcessUDPQuestion(data, fromaddr, dest, tv, fd);
a6147cd2 2497 }
2498 }
c0a00acd 2499 }
16ce7f18 2500 catch(const MOADNSException &mde) {
c0a00acd
RG
2501 g_stats.clientParseError++;
2502 if(g_logCommonErrors) {
2503 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
2504 }
2505 }
2506 catch(const std::runtime_error& e) {
2507 g_stats.clientParseError++;
2508 if(g_logCommonErrors) {
2509 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
2510 }
5db529f8
BH
2511 }
2512 }
c0a00acd
RG
2513 else {
2514 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
2515 if(firstQuery && errno == EAGAIN) {
2516 g_stats.noPacketError++;
2517 }
390f1dab 2518
c0a00acd
RG
2519 break;
2520 }
ac0e821b 2521 }
5db529f8
BH
2522}
2523
adb6cd72 2524static void makeTCPServerSockets(deferredAdd_t& deferredAdds, std::set<int>& tcpSockets)
9c495589 2525{
37d3f960 2526 int fd;
f28307ad 2527 vector<string>locals;
2e3d8a19 2528 stringtok(locals,::arg()["local-address"]," ,");
9c495589 2529
f28307ad 2530 if(locals.empty())
3f81d239 2531 throw PDNSException("No local address specified");
3ddb9247 2532
f28307ad 2533 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
2534 ServiceTuple st;
2535 st.port=::arg().asNum("local-port");
2536 parseService(*i, st);
3ddb9247 2537
32252594
BH
2538 ComboAddress sin;
2539
d38e2ba9 2540 sin.reset();
37d3f960 2541 sin.sin4.sin_family = AF_INET;
32252594 2542 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 2543 sin.sin6.sin6_family = AF_INET6;
f71bc087 2544 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 2545 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
37d3f960
BH
2546 }
2547
2548 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
3ddb9247 2549 if(fd<0)
3f81d239 2550 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
f28307ad 2551
3897b9e1 2552 setCloseOnExec(fd);
a903b39c 2553
f28307ad 2554 int tmp=1;
810ff705 2555 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
e6a9dde5 2556 g_log<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
c8ddb7c2 2557 exit(1);
f28307ad 2558 }
0dfa94ab 2559 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
e6a9dde5 2560 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
0dfa94ab 2561 }
2562
c8ddb7c2 2563#ifdef TCP_DEFER_ACCEPT
38ac0821 2564 if(setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
37d3f960 2565 if(i==locals.begin())
377602e3 2566 g_log<<Logger::Info<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
c8ddb7c2
BH
2567 }
2568#endif
2569
fec7dd5a
SS
2570 if( ::arg().mustDo("non-local-bind") )
2571 Utility::setBindAny(AF_INET, fd);
2572
2332f42d 2573#ifdef SO_REUSEPORT
810ff705
RG
2574 if(g_reusePort) {
2575 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
2332f42d 2576 throw PDNSException("SO_REUSEPORT: "+stringerror());
2577 }
2578#endif
2579
0735b17e
RG
2580 if (::arg().asNum("tcp-fast-open") > 0) {
2581#ifdef TCP_FASTOPEN
2582 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
2583 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
e6a9dde5 2584 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno)<<endl;
0735b17e
RG
2585 }
2586#else
e6a9dde5 2587 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
0735b17e
RG
2588#endif
2589 }
2590
32252594 2591 sin.sin4.sin_port = htons(st.port);
a683e8bd 2592 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
3ddb9247 2593 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
3f81d239 2594 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
3ddb9247 2595
3897b9e1 2596 setNonBlocking(fd);
49a699c4 2597 setSocketSendBuffer(fd, 65000);
37d3f960 2598 listen(fd, 128);
b243ca3b 2599 deferredAdds.push_back(make_pair(fd, handleNewTCPQuestion));
adb6cd72
RG
2600 tcpSockets.insert(fd);
2601
84433b79 2602 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
2603 // - fd is not that which we know here, but returned from accept()
3ddb9247 2604 if(sin.sin4.sin_family == AF_INET)
377602e3 2605 g_log<<Logger::Info<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 2606 else
377602e3 2607 g_log<<Logger::Info<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 2608 }
9c495589
BH
2609}
2610
b243ca3b 2611static void makeUDPServerSockets(deferredAdd_t& deferredAdds)
288f4aa9 2612{
fec7dd5a 2613 int one=1;
f28307ad 2614 vector<string>locals;
2e3d8a19 2615 stringtok(locals,::arg()["local-address"]," ,");
288f4aa9 2616
f28307ad 2617 if(locals.empty())
3f81d239 2618 throw PDNSException("No local address specified");
3ddb9247 2619
f28307ad 2620 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
2621 ServiceTuple st;
2622 st.port=::arg().asNum("local-port");
2623 parseService(*i, st);
2624
37d3f960 2625 ComboAddress sin;
996c89cc 2626
d38e2ba9 2627 sin.reset();
37d3f960 2628 sin.sin4.sin_family = AF_INET;
32252594 2629 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 2630 sin.sin6.sin6_family = AF_INET6;
f71bc087 2631 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 2632 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
37d3f960 2633 }
3ddb9247 2634
bb4bdbaf 2635 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
d3b4137e 2636 if(fd < 0) {
3f81d239 2637 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
d3b4137e 2638 }
915b0c39 2639 if (!setSocketTimestamps(fd))
e6a9dde5 2640 g_log<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
0dfa94ab 2641
b71b60ee 2642 if(IsAnyAddress(sin)) {
cbc03320 2643 if(sin.sin4.sin_family == AF_INET)
2644 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
2645 g_fromtosockets.insert(fd);
757d3179 2646#ifdef IPV6_RECVPKTINFO
cbc03320 2647 if(sin.sin4.sin_family == AF_INET6)
2648 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
2649 g_fromtosockets.insert(fd);
757d3179 2650#endif
0dfa94ab 2651 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
e6a9dde5 2652 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
0dfa94ab 2653 }
b71b60ee 2654 }
fec7dd5a
SS
2655 if( ::arg().mustDo("non-local-bind") )
2656 Utility::setBindAny(AF_INET6, fd);
2657
3897b9e1 2658 setCloseOnExec(fd);
a903b39c 2659
4e9a20e6 2660 setSocketReceiveBuffer(fd, 250000);
32252594 2661 sin.sin4.sin_port = htons(st.port);
37d3f960 2662
2332f42d 2663
2573d4a6 2664#ifdef SO_REUSEPORT
810ff705 2665 if(g_reusePort) {
2332f42d 2666 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
2667 throw PDNSException("SO_REUSEPORT: "+stringerror());
2668 }
2669#endif
90f9fbc0
RG
2670
2671 if (sin.isIPv4()) {
2672 try {
2673 setSocketIgnorePMTU(fd);
2674 }
2675 catch(const std::exception& e) {
2676 g_log<<Logger::Warning<<"Failed to set IP_MTU_DISCOVER on UDP server socket: "<<e.what()<<endl;
2677 }
2678 }
2679
2680 socklen_t socklen=sin.getSocklen();
3ddb9247 2681 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
335da0ba 2682 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
3ddb9247 2683
3897b9e1 2684 setNonBlocking(fd);
c2136bf0 2685
b243ca3b 2686 deferredAdds.push_back(make_pair(fd, handleNewUDPQuestion));
40a3dd64 2687 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
3ddb9247 2688 if(sin.sin4.sin_family == AF_INET)
377602e3 2689 g_log<<Logger::Info<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 2690 else
377602e3 2691 g_log<<Logger::Info<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 2692 }
c836dc19 2693}
caa6eefa 2694
d187038c 2695static void daemonize(void)
c836dc19
BH
2696{
2697 if(fork())
2698 exit(0); // bye bye
3ddb9247
PD
2699
2700 setsid();
c836dc19 2701
27a5ead5 2702 int i=open("/dev/null",O_RDWR); /* open stdin */
3ddb9247 2703 if(i < 0)
e6a9dde5 2704 g_log<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
27a5ead5
BH
2705 else {
2706 dup2(i,0); /* stdin */
2707 dup2(i,1); /* stderr */
2708 dup2(i,2); /* stderr */
2709 close(i);
2710 }
288f4aa9 2711}
caa6eefa 2712
d187038c 2713static void usr1Handler(int)
c75a6a9e
BH
2714{
2715 statsWanted=true;
2716}
ae1b2e98 2717
d187038c 2718static void usr2Handler(int)
9170fbaf 2719{
f1f34cc2 2720 g_quiet= !g_quiet;
2721 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
2722 ::arg().set("quiet")=g_quiet ? "" : "no";
9170fbaf
BH
2723}
2724
d187038c 2725static void doStats(void)
c75a6a9e 2726{
16beeaa4
BH
2727 static time_t lastOutputTime;
2728 static uint64_t lastQueryCount;
d299d4f5 2729
2730 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
2731 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
3ddb9247 2732
d299d4f5 2733 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
e6a9dde5 2734 g_log<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
3427fa8a
BH
2735 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
2736 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
3ddb9247
PD
2737 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
2738
e6a9dde5 2739 g_log<<Logger::Notice<<"stats: throttle map: "
3427fa8a 2740 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
3ddb9247 2741 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
e6a9dde5
PL
2742 g_log<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2743 g_log<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
525b8a7c 2744 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
e6a9dde5 2745 g_log<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
3427fa8a 2746 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
81883dcc 2747
e6a9dde5 2748 //g_log<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
16beeaa4 2749 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
3ddb9247 2750
e6a9dde5 2751 g_log<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
16beeaa4 2752 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
3ddb9247 2753
144040be
RG
2754 size_t idx = 0;
2755 for (const auto& threadInfo : s_threadInfos) {
2756 if(threadInfo.isWorker) {
ad9fc3dc 2757 g_log<<Logger::Notice<<"stats: thread "<<idx<<" has been distributed "<<threadInfo.numberOfDistributedQueries<<" queries"<<endl;
144040be
RG
2758 ++idx;
2759 }
2760 }
2761
16beeaa4
BH
2762 time_t now = time(0);
2763 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
e6a9dde5 2764 g_log<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
16beeaa4
BH
2765 }
2766 lastOutputTime = now;
2767 lastQueryCount = SyncRes::s_queries;
c75a6a9e 2768 }
3ddb9247 2769 else if(statsWanted)
e6a9dde5 2770 g_log<<Logger::Notice<<"stats: no stats yet!"<<endl;
7becf07f 2771
c75a6a9e
BH
2772 statsWanted=false;
2773}
c836dc19 2774
29f0b1ce 2775static void houseKeeping(void *)
c836dc19 2776{
e4ae55e5 2777 static thread_local time_t last_rootupdate, last_prune, last_secpoll, last_trustAnchorUpdate{0};
3337c2f7
RG
2778 static thread_local int cleanCounter=0;
2779 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
e4ae55e5
PL
2780 auto luaconfsLocal = g_luaconfs.getLocal();
2781
2782 if (last_trustAnchorUpdate == 0 && !luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0) {
2783 // Loading the Lua config file already "refreshed" the TAs
2784 last_trustAnchorUpdate = g_now.tv_sec + luaconfsLocal->trustAnchorFileInfo.interval * 3600;
2785 }
2786
cc59bce6 2787 try {
6b0d90ea 2788 if(s_running) {
cc59bce6 2789 return;
6b0d90ea 2790 }
cc59bce6 2791 s_running=true;
3ddb9247 2792
cc59bce6 2793 struct timeval now;
2794 Utility::gettimeofday(&now, 0);
3ddb9247
PD
2795
2796 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
a6f7f5fe 2797 t_RC->doPrune(g_maxCacheEntries / g_numThreads); // this function is local to a thread, so fine anyhow
2798 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
3ddb9247 2799
a6f7f5fe 2800 SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
3ddb9247 2801
cc59bce6 2802 if(!((cleanCounter++)%40)) { // this is a full scan!
2803 time_t limit=now.tv_sec-300;
a712cb56 2804 SyncRes::pruneNSSpeeds(limit);
cc59bce6 2805 }
2806 last_prune=time(0);
d67620e4 2807 }
3ddb9247 2808
cc59bce6 2809 if(now.tv_sec - last_rootupdate > 7200) {
30ee601a 2810 int res = SyncRes::getRootNS(g_now, nullptr);
7836f7b4
PL
2811 if (!res)
2812 last_rootupdate=now.tv_sec;
cc59bce6 2813 }
3ddb9247 2814
b243ca3b 2815 if(isHandlerThread()) {
3ddb9247 2816
cc59bce6 2817 if(now.tv_sec - last_secpoll >= 3600) {
2818 try {
2819 doSecPoll(&last_secpoll);
2820 }
581d4ea3 2821 catch(std::exception& e)
2822 {
e6a9dde5 2823 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
581d4ea3 2824 }
47e9b74f 2825 catch(PDNSException& e)
2826 {
e6a9dde5 2827 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
47e9b74f 2828 }
d0992a65
CH
2829 catch(ImmediateServFailException &e)
2830 {
e6a9dde5 2831 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
d0992a65 2832 }
47e9b74f 2833 catch(...)
2834 {
e6a9dde5 2835 g_log<<Logger::Error<<"Exception while performing security poll"<<endl;
47e9b74f 2836 }
18b73338 2837 }
e4ae55e5
PL
2838
2839 if (!luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0 &&
2840 g_now.tv_sec - last_trustAnchorUpdate >= (luaconfsLocal->trustAnchorFileInfo.interval * 3600)) {
2841 g_log<<Logger::Debug<<"Refreshing Trust Anchors from file"<<endl;
2842 try {
2843 map<DNSName, dsmap_t> dsAnchors;
2844 if (updateTrustAnchorsFromFile(luaconfsLocal->trustAnchorFileInfo.fname, dsAnchors)) {
2845 g_luaconfs.modify([&dsAnchors](LuaConfigItems& lci) {
2846 lci.dsAnchors = dsAnchors;
2847 });
2848 }
2849 last_trustAnchorUpdate = now.tv_sec;
2850 } catch (const PDNSException &pe) {
2851 g_log<<Logger::Error<<"Unable to update Trust Anchors: "<<pe.reason<<endl;
2852 }
2853 }
d67620e4 2854 }
6b0d90ea 2855 s_running=false;
d67620e4 2856 }
cc59bce6 2857 catch(PDNSException& ae)
2858 {
2859 s_running=false;
e6a9dde5 2860 g_log<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
cc59bce6 2861 throw;
2862 }
779828c4 2863}
d6d5dea7 2864
d187038c 2865static void makeThreadPipes()
49a699c4 2866{
ee271fc4
RG
2867 auto pipeBufferSize = ::arg().asNum("distribution-pipe-buffer-size");
2868 if (pipeBufferSize > 0) {
2869 g_log<<Logger::Info<<"Resizing the buffer of the distribution pipe to "<<pipeBufferSize<<endl;
2870 }
2871
b243ca3b
RG
2872 /* thread 0 is the handler / SNMP, we start at 1 */
2873 for(unsigned int n = 1; n <= (g_numWorkerThreads + g_numDistributorThreads); ++n) {
2874 auto& threadInfos = s_threadInfos.at(n);
2875
49a699c4
BH
2876 int fd[2];
2877 if(pipe(fd) < 0)
2878 unixDie("Creating pipe for inter-thread communications");
3ddb9247 2879
b243ca3b
RG
2880 threadInfos.pipes.readToThread = fd[0];
2881 threadInfos.pipes.writeToThread = fd[1];
3ddb9247 2882
49a699c4
BH
2883 if(pipe(fd) < 0)
2884 unixDie("Creating pipe for inter-thread communications");
b243ca3b
RG
2885
2886 threadInfos.pipes.readFromThread = fd[0];
2887 threadInfos.pipes.writeFromThread = fd[1];
3ddb9247 2888
cf8cda18
RG
2889 if(pipe(fd) < 0)
2890 unixDie("Creating pipe for inter-thread communications");
d10307c5 2891
b243ca3b
RG
2892 threadInfos.pipes.readQueriesToThread = fd[0];
2893 threadInfos.pipes.writeQueriesToThread = fd[1];
2894
ee271fc4
RG
2895 if (pipeBufferSize > 0) {
2896 if (!setPipeBufferSize(threadInfos.pipes.writeQueriesToThread, pipeBufferSize)) {
2897 g_log<<Logger::Warning<<"Error resizing the buffer of the distribution pipe for thread "<<n<<" to "<<pipeBufferSize<<": "<<strerror(errno)<<endl;
2898 auto existingSize = getPipeBufferSize(threadInfos.pipes.writeQueriesToThread);
2899 if (existingSize > 0) {
2900 g_log<<Logger::Warning<<"The current size of the distribution pipe's buffer for thread "<<n<<" is "<<existingSize<<endl;
2901 }
2902 }
2903 }
2904
b243ca3b 2905 if (!setNonBlocking(threadInfos.pipes.writeQueriesToThread)) {
d10307c5
RG
2906 unixDie("Making pipe for inter-thread communications non-blocking");
2907 }
49a699c4
BH
2908 }
2909}
2910
00c9b8c1
BH
2911struct ThreadMSG
2912{
2913 pipefunc_t func;
2914 bool wantAnswer;
2915};
2916
b4e76a18 2917void broadcastFunction(const pipefunc_t& func)
49a699c4 2918{
b243ca3b
RG
2919 /* This function might be called by the worker with t_id 0 during startup
2920 for the initialization of ACLs and domain maps. After that it should only
2921 be called by the handler. */
d77abca1 2922
b243ca3b
RG
2923 if (s_threadInfos.empty() && isHandlerThread()) {
2924 /* the handler and distributors will call themselves below, but
2925 during startup we get called while s_threadInfos has not been
2926 populated yet to update the ACL or domain maps, so we need to
2927 handle that case.
2928 */
2929 func();
2930 }
b4e76a18 2931
b243ca3b
RG
2932 unsigned int n = 0;
2933 for (const auto& threadInfo : s_threadInfos) {
49a699c4 2934 if(n++ == t_id) {
b4e76a18 2935 func(); // don't write to ourselves!
49a699c4
BH
2936 continue;
2937 }
3ddb9247 2938
00c9b8c1
BH
2939 ThreadMSG* tmsg = new ThreadMSG();
2940 tmsg->func = func;
2941 tmsg->wantAnswer = true;
b243ca3b 2942 if(write(threadInfo.pipes.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
b841314c 2943 delete tmsg;
b243ca3b 2944
49a699c4 2945 unixDie("write to thread pipe returned wrong size or error");
b841314c 2946 }
3ddb9247 2947
49467864 2948 string* resp = nullptr;
b243ca3b 2949 if(read(threadInfo.pipes.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
49a699c4 2950 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 2951
49a699c4 2952 if(resp) {
49a699c4 2953 delete resp;
49467864 2954 resp = nullptr;
49a699c4
BH
2955 }
2956 }
2957}
06ea9015 2958
592d7ade 2959static bool trySendingQueryToWorker(unsigned int target, ThreadMSG* tmsg)
00c9b8c1 2960{
144040be 2961 auto& targetInfo = s_threadInfos[target];
b243ca3b
RG
2962 if(!targetInfo.isWorker) {
2963 g_log<<Logger::Error<<"distributeAsyncFunction() tried to assign a query to a non-worker thread"<<endl;
d77abca1 2964 exit(1);
00c9b8c1 2965 }
d77abca1 2966
b243ca3b 2967 const auto& tps = targetInfo.pipes;
3ddb9247 2968
cf8cda18
RG
2969 ssize_t written = write(tps.writeQueriesToThread, &tmsg, sizeof(tmsg));
2970 if (written > 0) {
2971 if (static_cast<size_t>(written) != sizeof(tmsg)) {
2972 delete tmsg;
2973 unixDie("write to thread pipe returned wrong size or error");
2974 }
2975 }
2976 else {
2977 int error = errno;
cf8cda18 2978 if (error == EAGAIN || error == EWOULDBLOCK) {
592d7ade 2979 return false;
cf8cda18 2980 } else {
592d7ade 2981 delete tmsg;
17634427 2982 unixDie("write to thread pipe returned wrong size or error:" + std::to_string(error));
cf8cda18 2983 }
b841314c 2984 }
592d7ade 2985
144040be
RG
2986 ++targetInfo.numberOfDistributedQueries;
2987
592d7ade
RG
2988 return true;
2989}
2990
144040be
RG
2991static unsigned int getWorkerLoad(size_t workerIdx)
2992{
2993 const auto mt = s_threadInfos[/* skip handler */ 1 + g_numDistributorThreads + workerIdx].mt;
2994 if (mt != nullptr) {
2995 return mt->numProcesses();
2996 }
2997 return 0;
2998}
2999
3000static unsigned int selectWorker(unsigned int hash)
3001{
3002 if (s_balancingFactor == 0) {
3003 return /* skip handler */ 1 + g_numDistributorThreads + (hash % g_numWorkerThreads);
3004 }
3005
3006 /* we start with one, representing the query we are currently handling */
3007 double currentLoad = 1;
3008 std::vector<unsigned int> load(g_numWorkerThreads);
3009 for (size_t idx = 0; idx < g_numWorkerThreads; idx++) {
3010 load[idx] = getWorkerLoad(idx);
3011 currentLoad += load[idx];
3012 // cerr<<"load for worker "<<idx<<" is "<<load[idx]<<endl;
3013 }
3014
3015 double targetLoad = (currentLoad / g_numWorkerThreads) * s_balancingFactor;
3016 // cerr<<"total load is "<<currentLoad<<", number of workers is "<<g_numWorkerThreads<<", target load is "<<targetLoad<<endl;
3017
3018 unsigned int worker = hash % g_numWorkerThreads;
1b9d2d46 3019 /* at least one server has to be at or below the average load */
596bf482
RG
3020 if (load[worker] > targetLoad) {
3021 ++g_stats.rebalancedQueries;
3022 do {
3023 // cerr<<"worker "<<worker<<" is above the target load, selecting another one"<<endl;
3024 worker = (worker + 1) % g_numWorkerThreads;
3025 }
3026 while(load[worker] > targetLoad);
144040be
RG
3027 }
3028
3029 return /* skip handler */ 1 + g_numDistributorThreads + worker;
3030}
3031
592d7ade
RG
3032// This function is only called by the distributor threads, when pdns-distributes-queries is set
3033void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
3034{
3035 if (!isDistributorThread()) {
3036 g_log<<Logger::Error<<"distributeAsyncFunction() has been called by a worker ("<<t_id<<")"<<endl;
3037 exit(1);
3038 }
3039
3040 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
144040be 3041 unsigned int target = selectWorker(hash);
592d7ade
RG
3042
3043 ThreadMSG* tmsg = new ThreadMSG();
3044 tmsg->func = func;
3045 tmsg->wantAnswer = false;
3046
3047 if (!trySendingQueryToWorker(target, tmsg)) {
3048 /* if this function failed but did not raise an exception, it means that the pipe
3049 was full, let's try another one */
3050 unsigned int newTarget = 0;
3051 do {
3052 newTarget = /* skip handler */ 1 + g_numDistributorThreads + dns_random(g_numWorkerThreads);
3053 } while (newTarget == target);
3054
3055 if (!trySendingQueryToWorker(newTarget, tmsg)) {
3056 g_stats.queryPipeFullDrops++;
3057 delete tmsg;
3058 }
3059 }
00c9b8c1 3060}
3427fa8a 3061
d187038c 3062static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
49a699c4 3063{
f26bf547 3064 ThreadMSG* tmsg = nullptr;
3ddb9247 3065
cf8cda18 3066 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread || fd == readQueriesToThread
49a699c4
BH
3067 unixDie("read from thread pipe returned wrong size or error");
3068 }
3ddb9247 3069
2f22827a 3070 void *resp=0;
3071 try {
3072 resp = tmsg->func();
3073 }
3074 catch(std::exception& e) {
6d2010a8 3075 if(g_logCommonErrors)
e6a9dde5 3076 g_log<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2f22827a 3077 }
3078 catch(PDNSException& e) {
6d2010a8 3079 if(g_logCommonErrors)
e6a9dde5 3080 g_log<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2f22827a 3081 }
d7c676a5 3082 if(tmsg->wantAnswer) {
b243ca3b
RG
3083 const auto& threadInfo = s_threadInfos.at(t_id);
3084 if(write(threadInfo.pipes.writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
d7c676a5 3085 delete tmsg;
00c9b8c1 3086 unixDie("write to thread pipe returned wrong size or error");
d7c676a5
RG
3087 }
3088 }
3ddb9247 3089
00c9b8c1 3090 delete tmsg;
49a699c4 3091}
09e6702a 3092
13034931
BH
3093template<class T> void *voider(const boost::function<T*()>& func)
3094{
3095 return func();
3096}
3097
b3b5459d
BH
3098vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
3099{
3100 a.insert(a.end(), b.begin(), b.end());
3101 return a;
3102}
3103
92011b8f 3104vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
3105{
3106 a.insert(a.end(), b.begin(), b.end());
3107 return a;
3108}
3109
3ddb9247
PD
3110vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
3111{
3112 a.insert(a.end(), b.begin(), b.end());
3113 return a;
3114}
3115
92011b8f 3116
387b9ca6
RG
3117/*
3118 This function should only be called by the handler to gather metrics, wipe the cache,
788eeb4c
RG
3119 reload the Lua script (not the Lua config) or change the current trace regex,
3120 and by the SNMP thread to gather metrics. */
b4e76a18 3121template<class T> T broadcastAccFunction(const boost::function<T*()>& func)
3427fa8a 3122{
b243ca3b 3123 if (!isHandlerThread()) {
788eeb4c 3124 g_log<<Logger::Error<<"broadcastAccFunction has been called by a worker ("<<t_id<<")"<<endl;
d77abca1 3125 exit(1);
d77abca1
RG
3126 }
3127
b243ca3b 3128 unsigned int n = 0;
3427fa8a 3129 T ret=T();
b243ca3b
RG
3130 for (const auto& threadInfo : s_threadInfos) {
3131 if (n++ == t_id) {
3132 continue;
3133 }
3134
3135 const auto& tps = threadInfo.pipes;
00c9b8c1
BH
3136 ThreadMSG* tmsg = new ThreadMSG();
3137 tmsg->func = boost::bind(voider<T>, func);
3138 tmsg->wantAnswer = true;
3ddb9247 3139
b841314c
RG
3140 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
3141 delete tmsg;
3427fa8a 3142 unixDie("write to thread pipe returned wrong size or error");
b841314c 3143 }
3ddb9247 3144
49467864 3145 T* resp = nullptr;
3427fa8a
BH
3146 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
3147 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 3148
3427fa8a 3149 if(resp) {
3427fa8a
BH
3150 ret += *resp;
3151 delete resp;
49467864 3152 resp = nullptr;
3427fa8a
BH
3153 }
3154 }
3155 return ret;
3156}
3157
b4e76a18
RG
3158template string broadcastAccFunction(const boost::function<string*()>& fun); // explicit instantiation
3159template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun); // explicit instantiation
3160template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun); // explicit instantiation
3161template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun); // explicit instantiation
5ac6d761 3162template ThreadTimes broadcastAccFunction(const boost::function<ThreadTimes*()>& fun);
3427fa8a 3163
d187038c 3164static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3165{
fbfc1809
RG
3166 try {
3167 string remote;
3168 string msg=s_rcc.recv(&remote);
3169 RecursorControlParser rcp;
3170 RecursorControlParser::func_t* command;
3ddb9247 3171
fbfc1809 3172 string answer=rcp.getAnswer(msg, &command);
f0f3f0b0 3173
fbfc1809
RG
3174 // If we are inside a chroot, we need to strip
3175 if (!arg()["chroot"].empty()) {
3176 size_t len = arg()["chroot"].length();
3177 remote = remote.substr(len);
3178 }
f0f3f0b0 3179
ab5c053d
BH
3180 s_rcc.send(answer, &remote);
3181 command();
3182 }
fbfc1809 3183 catch(const std::exception& e) {
e6a9dde5 3184 g_log<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
ab5c053d 3185 }
fbfc1809 3186 catch(const PDNSException& ae) {
e6a9dde5 3187 g_log<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
ab5c053d 3188 }
09e6702a
BH
3189}
3190
d187038c 3191static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3192{
0b18b22e 3193 PacketID* pident=any_cast<PacketID>(&var);
667f7e60 3194 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
09e6702a 3195
667f7e60 3196 shared_array<char> buffer(new char[pident->inNeeded]);
09e6702a 3197
a683e8bd 3198 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
09e6702a 3199 if(ret > 0) {
667f7e60 3200 pident->inMSG.append(&buffer[0], &buffer[ret]);
a683e8bd 3201 pident->inNeeded-=(size_t)ret;
825fa717 3202 if(!pident->inNeeded || pident->inIncompleteOkay) {
667f7e60
BH
3203 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
3204 PacketID pid=*pident;
3205 string msg=pident->inMSG;
3ddb9247 3206
bb4bdbaf 3207 t_fdm->removeReadFD(fd);
3ddb9247 3208 MT->sendEvent(pid, &msg);
09e6702a
BH
3209 }
3210 else {
667f7e60 3211 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
09e6702a
BH
3212 }
3213 }
3214 else {
667f7e60 3215 PacketID tmp=*pident;
bb4bdbaf 3216 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
09e6702a
BH
3217 string empty;
3218 MT->sendEvent(tmp, &empty); // this conveys error status
3219 }
3220}
3221
d187038c 3222static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3223{
0b18b22e 3224 PacketID* pid=any_cast<PacketID>(&var);
a683e8bd 3225 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
09e6702a 3226 if(ret > 0) {
a683e8bd 3227 pid->outPos+=(ssize_t)ret;
667f7e60
BH
3228 if(pid->outPos==pid->outMSG.size()) {
3229 PacketID tmp=*pid;
bb4bdbaf 3230 t_fdm->removeWriteFD(fd);
09e6702a
BH
3231 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
3232 }
3233 }
3234 else { // error or EOF
667f7e60 3235 PacketID tmp(*pid);
bb4bdbaf 3236 t_fdm->removeWriteFD(fd);
09e6702a 3237 string sent;
998a4334 3238 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
09e6702a
BH
3239 }
3240}
3241
34801ab1 3242// resend event to everybody chained onto it
d187038c 3243static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
34801ab1
BH
3244{
3245 if(iter->key.chain.empty())
3246 return;
e27e91a8 3247 // cerr<<"doResends called!\n";
34801ab1
BH
3248 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
3249 resend.fd=-1;
3250 resend.id=*i;
e27e91a8 3251 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
4665c31e 3252
34801ab1
BH
3253 MT->sendEvent(resend, &content);
3254 g_stats.chainResends++;
34801ab1
BH
3255 }
3256}
3257
d187038c 3258static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3259{
600fc20b 3260 PacketID pid=any_cast<PacketID>(var);
a683e8bd 3261 ssize_t len;
fae8fe07
RG
3262 std::string packet;
3263 packet.resize(g_outgoingEDNSBufsize);
996c89cc 3264 ComboAddress fromaddr;
09e6702a
BH
3265 socklen_t addrlen=sizeof(fromaddr);
3266
fae8fe07 3267 len=recvfrom(fd, &packet.at(0), packet.size(), 0, (sockaddr *)&fromaddr, &addrlen);
c1da7976 3268
a683e8bd 3269 if(len < (ssize_t) sizeof(dnsheader)) {
998a4334 3270 if(len < 0)
996c89cc 3271 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
09e6702a 3272 else {
3ddb9247 3273 g_stats.serverParseError++;
09e6702a 3274 if(g_logCommonErrors)
e6a9dde5 3275 g_log<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
e44d9fa7 3276 ": packet smaller than DNS header"<<endl;
998a4334 3277 }
34801ab1 3278
49a699c4 3279 t_udpclientsocks->returnSocket(fd);
34801ab1
BH
3280 string empty;
3281
3282 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3ddb9247 3283 if(iter != MT->d_waiters.end())
34801ab1 3284 doResends(iter, pid, empty);
3ddb9247 3285
34801ab1 3286 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
998a4334 3287 return;
3ddb9247 3288 }
998a4334 3289
fae8fe07 3290 packet.resize(len);
998a4334 3291 dnsheader dh;
fae8fe07 3292 memcpy(&dh, &packet.at(0), sizeof(dh));
3ddb9247 3293
6da3b3ad
PD
3294 PacketID pident;
3295 pident.remote=fromaddr;
3296 pident.id=dh.id;
3297 pident.fd=fd;
34801ab1 3298
33a928af 3299 if(!dh.qr && g_logCommonErrors) {
e6a9dde5 3300 g_log<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
6da3b3ad
PD
3301 }
3302
3303 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
3304 !dh.qr) { // one weird server
3305 pident.domain.clear();
3306 pident.type = 0;
3307 }
3308 else {
3309 try {
0b31e67e 3310 if(len > 12)
fae8fe07 3311 pident.domain=DNSName(&packet.at(0), len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
6da3b3ad
PD
3312 }
3313 catch(std::exception& e) {
3314 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
e6a9dde5 3315 g_log<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
6da3b3ad 3316 return;
34801ab1 3317 }
6da3b3ad 3318 }
34801ab1 3319
6da3b3ad
PD
3320 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
3321 if(iter != MT->d_waiters.end()) {
3322 doResends(iter, pident, packet);
3323 }
c1da7976 3324
6da3b3ad 3325retryWithName:
4957a608 3326
6da3b3ad 3327 if(!MT->sendEvent(pident, &packet)) {
9ec48f21
RG
3328 /* we did not find a match for this response, something is wrong */
3329
6da3b3ad
PD
3330 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
3331 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
3332 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
e325f20c 3333 pident.domain == mthread->key.domain) {
6da3b3ad 3334 mthread->key.nearMisses++;
998a4334 3335 }
6da3b3ad
PD
3336
3337 // be a bit paranoid here since we're weakening our matching
3ddb9247 3338 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
6da3b3ad
PD
3339 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
3340 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
3341 pident.domain = mthread->key.domain;
3342 pident.type = mthread->key.type;
3343 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
d4fb76e9 3344 }
09e6702a 3345 }
6da3b3ad
PD
3346 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
3347 if(g_logCommonErrors) {
e6a9dde5 3348 g_log<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
d8f6d49f 3349 }
09e6702a 3350 }
6da3b3ad 3351 else if(fd >= 0) {
9ec48f21 3352 /* we either found a waiter (1) or encountered an issue (-1), it's up to us to clean the socket anyway */
6da3b3ad
PD
3353 t_udpclientsocks->returnSocket(fd);
3354 }
09e6702a
BH
3355}
3356
1f4abb20
BH
3357FDMultiplexer* getMultiplexer()
3358{
3359 FDMultiplexer* ret;
f26bf547 3360 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
1f4abb20 3361 try {
f26bf547 3362 ret=i.second();
1f4abb20
BH
3363 return ret;
3364 }
98d0ee4a 3365 catch(FDMultiplexerException &fe) {
e6a9dde5 3366 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
98d0ee4a
BH
3367 }
3368 catch(...) {
e6a9dde5 3369 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
98d0ee4a 3370 }
1f4abb20 3371 }
e6a9dde5 3372 g_log<<Logger::Error<<"No working multiplexer found!"<<endl;
1f4abb20
BH
3373 exit(1);
3374}
3375
3ddb9247 3376
d187038c 3377static string* doReloadLuaScript()
4485aa35 3378{
674cf0f6 3379 string fname= ::arg()["lua-dns-script"];
4485aa35 3380 try {
674cf0f6 3381 if(fname.empty()) {
f26bf547 3382 t_pdl.reset();
377602e3 3383 g_log<<Logger::Info<<t_id<<" Unloaded current lua script"<<endl;
0f39c1a3 3384 return new string("unloaded\n");
4485aa35
BH
3385 }
3386 else {
9694e14f
AT
3387 t_pdl = std::make_shared<RecursorLua4>();
3388 t_pdl->loadFile(fname);
4485aa35
BH
3389 }
3390 }
fdbf35ac 3391 catch(std::exception& e) {
e6a9dde5 3392 g_log<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
0f39c1a3 3393 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
4485aa35 3394 }
3ddb9247 3395
e6a9dde5 3396 g_log<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
0f39c1a3 3397 return new string("(re)loaded '"+fname+"'\n");
4485aa35
BH
3398}
3399
49a699c4
BH
3400string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3401{
3ddb9247 3402 if(begin != end)
49a699c4 3403 ::arg().set("lua-dns-script") = *begin;
3ddb9247 3404
0f39c1a3 3405 return broadcastAccFunction<string>(doReloadLuaScript);
3ddb9247 3406}
49a699c4 3407
d187038c 3408static string* pleaseUseNewTraceRegex(const std::string& newRegex)
77499b05
BH
3409try
3410{
3411 if(newRegex.empty()) {
f26bf547 3412 t_traceRegex.reset();
77499b05
BH
3413 return new string("unset\n");
3414 }
3415 else {
f26bf547 3416 t_traceRegex = std::make_shared<Regex>(newRegex);
77499b05
BH
3417 return new string("ok\n");
3418 }
3419}
3f81d239 3420catch(PDNSException& ae)
77499b05
BH
3421{
3422 return new string(ae.reason+"\n");
3423}
3424
3425string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3426{
3427 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
3428}
3429
4e9a20e6 3430static void checkLinuxIPv6Limits()
3431{
3432#ifdef __linux__
3433 string line;
3434 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
335da0ba 3435 int lim=std::stoi(line);
4e9a20e6 3436 if(lim < 16384) {
e6a9dde5 3437 g_log<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
4e9a20e6 3438 }
3439 }
3440#endif
3441}
36849ff2 3442static void checkOrFixFDS()
4e9a20e6 3443{
c0063e60 3444 unsigned int availFDs=getFilenumLimit();
3445 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
3446
3447 if(wantFDs > availFDs) {
067ad20e 3448 unsigned int hardlimit= getFilenumLimit(true);
3449 if(hardlimit >= wantFDs) {
c0063e60 3450 setFilenumLimit(wantFDs);
e6a9dde5 3451 g_log<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
36849ff2 3452 }
3453 else {
067ad20e 3454 int newval = (hardlimit - 25) / g_numWorkerThreads;
e6a9dde5 3455 g_log<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
36849ff2 3456 g_maxMThreads = newval;
067ad20e 3457 setFilenumLimit(hardlimit);
36849ff2 3458 }
3459 }
4e9a20e6 3460}
77499b05 3461
c390b2da 3462static void* recursorThread(unsigned int tid, const string& threadName);
51e2144e 3463
f26bf547 3464static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
49a699c4
BH
3465{
3466 t_allowFrom = ng;
f26bf547 3467 return nullptr;
49a699c4
BH
3468}
3469
dbd23fc2
BH
3470int g_argc;
3471char** g_argv;
3472
18af64a8 3473void parseACLs()
f7c1d4e3 3474{
18af64a8 3475 static bool l_initialized;
3ddb9247 3476
49a699c4 3477 if(l_initialized) { // only reload configuration file on second call
18af64a8 3478 string configname=::arg()["config-dir"]+"/recursor.conf";
3e63da83
JR
3479 if(::arg()["config-name"]!="") {
3480 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3481 }
18af64a8 3482 cleanSlashes(configname);
3ddb9247
PD
3483
3484 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
7e818521 3485 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
49a699c4 3486 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
242b90e1 3487 ::arg().preParseFile(configname.c_str(), "include-dir");
829849d6
AT
3488 ::arg().preParse(g_argc, g_argv, "include-dir");
3489
3490 // then process includes
3491 std::vector<std::string> extraConfigs;
242b90e1
AT
3492 ::arg().gatherIncludes(extraConfigs);
3493
1dc8f4d0 3494 for(const std::string& fn : extraConfigs) {
7e818521 3495 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
3496 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
3497 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
3498 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
829849d6 3499 }
ca2c884c
AT
3500
3501 ::arg().preParse(g_argc, g_argv, "allow-from-file");
3502 ::arg().preParse(g_argc, g_argv, "allow-from");
f27e6356 3503 }
49a699c4 3504
f26bf547
RG
3505 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
3506 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
3ddb9247 3507
2c95fc65
BH
3508 if(!::arg()["allow-from-file"].empty()) {
3509 string line;
2c95fc65
BH
3510 ifstream ifs(::arg()["allow-from-file"].c_str());
3511 if(!ifs) {
9c61b9d0 3512 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2c95fc65
BH
3513 }
3514
3515 string::size_type pos;
3516 while(getline(ifs,line)) {
3517 pos=line.find('#');
3518 if(pos!=string::npos)
3519 line.resize(pos);
3520 trim(line);
3521 if(line.empty())
3522 continue;
3523
18af64a8 3524 allowFrom->addMask(line);
2c95fc65 3525 }
e6a9dde5 3526 g_log<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2c95fc65
BH
3527 }
3528 else if(!::arg()["allow-from"].empty()) {
f7c1d4e3
BH
3529 vector<string> ips;
3530 stringtok(ips, ::arg()["allow-from"], ", ");
3ddb9247 3531
e6a9dde5 3532 g_log<<Logger::Warning<<"Only allowing queries from: ";
f7c1d4e3 3533 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
18af64a8 3534 allowFrom->addMask(*i);
f7c1d4e3 3535 if(i!=ips.begin())
e6a9dde5
PL
3536 g_log<<Logger::Warning<<", ";
3537 g_log<<Logger::Warning<<*i;
f7c1d4e3 3538 }
e6a9dde5 3539 g_log<<Logger::Warning<<endl;
f7c1d4e3 3540 }
49a699c4 3541 else {
3ddb9247 3542 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
377602e3 3543 g_log<<Logger::Warning<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
f26bf547 3544 allowFrom = nullptr;
49a699c4 3545 }
3ddb9247 3546
49a699c4 3547 g_initialAllowFrom = allowFrom;
d7dae798 3548 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
f26bf547 3549 oldAllowFrom = nullptr;
3ddb9247 3550
49a699c4 3551 l_initialized = true;
18af64a8
BH
3552}
3553
795215f2 3554
756e82cf 3555static void setupDelegationOnly()
3556{
3557 vector<string> parts;
3558 stringtok(parts, ::arg()["delegation-only"], ", \t");
3559 for(const auto& p : parts) {
9065eb05 3560 SyncRes::addDelegationOnly(DNSName(p));
756e82cf 3561 }
3562}
795215f2 3563
8fd25133
RG
3564static std::map<unsigned int, std::set<int> > parseCPUMap()
3565{
3566 std::map<unsigned int, std::set<int> > result;
3567
3568 const std::string value = ::arg()["cpu-map"];
3569
3570 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
e6a9dde5 3571 g_log<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
8fd25133
RG
3572 return result;
3573 }
3574
3575 std::vector<std::string> parts;
3576
3577 stringtok(parts, value, " \t");
3578
3579 for(const auto& part : parts) {
3580 if (part.find('=') == string::npos)
3581 continue;
3582
3583 try {
3584 auto headers = splitField(part, '=');
3585 trim(headers.first);
3586 trim(headers.second);
3587
3588 unsigned int threadId = pdns_stou(headers.first);
3589 std::vector<std::string> cpus;
3590
3591 stringtok(cpus, headers.second, ",");
3592
3593 for(const auto& cpu : cpus) {
3594 int cpuId = std::stoi(cpu);
3595
3596 result[threadId].insert(cpuId);
3597 }
3598 }
3599 catch(const std::exception& e) {
e6a9dde5 3600 g_log<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
8fd25133
RG
3601 }
3602 }
3603
3604 return result;
3605}
3606
3607static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
3608{
3609 const auto& cpuMapping = cpusMap.find(n);
3610 if (cpuMapping != cpusMap.cend()) {
3611 int rc = mapThreadToCPUList(tid, cpuMapping->second);
3612 if (rc == 0) {
e6a9dde5 3613 g_log<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
8fd25133 3614 for (const auto cpu : cpuMapping->second) {
e6a9dde5 3615 g_log<<Logger::Info<<" "<<cpu;
8fd25133 3616 }
e6a9dde5 3617 g_log<<Logger::Info<<endl;
8fd25133
RG
3618 }
3619 else {
e6a9dde5 3620 g_log<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
8fd25133 3621 for (const auto cpu : cpuMapping->second) {
e6a9dde5 3622 g_log<<Logger::Info<<" "<<cpu;
8fd25133 3623 }
e6a9dde5 3624 g_log<<Logger::Info<<strerror(rc)<<endl;
8fd25133
RG
3625 }
3626 }
3627}
3628
af1377b7
NC
3629#ifdef NOD_ENABLED
3630static void setupNODThread()
3631{
3632 if (g_nodEnabled) {
b78727c6
NC
3633 uint32_t num_cells = ::arg().asNum("new-domain-db-size");
3634 t_nodDBp = std::make_shared<nod::NODDB>(num_cells);
af1377b7
NC
3635 try {
3636 t_nodDBp->setCacheDir(::arg()["new-domain-history-dir"]);
3637 }
3638 catch (const PDNSException& e) {
3639 g_log<<Logger::Error<<"new-domain-history-dir (" << ::arg()["new-domain-history-dir"] << ") is not readable or does not exist"<<endl;
3640 _exit(1);
3641 }
3642 if (!t_nodDBp->init()) {
3643 g_log<<Logger::Error<<"Could not initialize domain tracking"<<endl;
3644 _exit(1);
3645 }
41c542ec 3646 std::thread t(nod::NODDB::startHousekeepingThread, t_nodDBp, std::this_thread::get_id());
af1377b7 3647 t.detach();
ca2526f5 3648 g_nod_pbtag = ::arg()["new-domain-pb-tag"];
41c542ec
NC
3649 }
3650 if (g_udrEnabled) {
b78727c6
NC
3651 uint32_t num_cells = ::arg().asNum("unique-response-db-size");
3652 t_udrDBp = std::make_shared<nod::UniqueResponseDB>(num_cells);
41c542ec
NC
3653 try {
3654 t_udrDBp->setCacheDir(::arg()["unique-response-history-dir"]);
3655 }
3656 catch (const PDNSException& e) {
3657 g_log<<Logger::Error<<"unique-response-history-dir (" << ::arg()["unique-response-history-dir"] << ") is not readable or does not exist"<<endl;
3658 _exit(1);
3659 }
3660 if (!t_udrDBp->init()) {
3661 g_log<<Logger::Error<<"Could not initialize unique response tracking"<<endl;
3662 _exit(1);
3663 }
3664 std::thread t(nod::UniqueResponseDB::startHousekeepingThread, t_udrDBp, std::this_thread::get_id());
af1377b7 3665 t.detach();
ca2526f5 3666 g_udr_pbtag = ::arg()["unique-response-pb-tag"];
af1377b7
NC
3667 }
3668}
3669
3670void parseNODWhitelist(const std::string& wlist)
3671{
3672 vector<string> parts;
3673 stringtok(parts, wlist, ",; ");
3674 for(const auto& a : parts) {
3675 g_nodDomainWL.add(DNSName(a));
3676 }
3677}
3678
3679static void setupNODGlobal()
3680{
3681 // Setup NOD subsystem
3682 g_nodEnabled = ::arg().mustDo("new-domain-tracking");
3683 g_nodLookupDomain = DNSName(::arg()["new-domain-lookup"]);
3684 g_nodLog = ::arg().mustDo("new-domain-log");
3685 parseNODWhitelist(::arg()["new-domain-whitelist"]);
41c542ec
NC
3686
3687 // Setup Unique DNS Response subsystem
3688 g_udrEnabled = ::arg().mustDo("unique-response-tracking");
3689 g_udrLog = ::arg().mustDo("unique-response-log");
af1377b7
NC
3690}
3691#endif /* NOD_ENABLED */
3692
d187038c 3693static int serviceMain(int argc, char*argv[])
18af64a8 3694{
e6a9dde5
PL
3695 g_log.setName(s_programname);
3696 g_log.disableSyslog(::arg().mustDo("disable-syslog"));
3697 g_log.setTimestamps(::arg().mustDo("log-timestamp"));
18af64a8
BH
3698
3699 if(!::arg()["logging-facility"].empty()) {
f8499e52
BH
3700 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
3701 if(val >= 0)
e6a9dde5 3702 g_log.setFacility(val);
18af64a8 3703 else
e6a9dde5 3704 g_log<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
18af64a8
BH
3705 }
3706
ba1a571d 3707 showProductVersion();
3afde9b2 3708
06ea9015 3709 g_disthashseed=dns_random(0xffffffff);
3710
b7ef5828
PL
3711 checkLinuxIPv6Limits();
3712 try {
3713 vector<string> addrs;
3714 if(!::arg()["query-local-address6"].empty()) {
3715 SyncRes::s_doIPv6=true;
e6a9dde5 3716 g_log<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
b7ef5828
PL
3717
3718 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
3719 for(const string& addr : addrs) {
3720 g_localQueryAddresses6.push_back(ComboAddress(addr));
3721 }
3722 }
3723 else {
e6a9dde5 3724 g_log<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
b7ef5828
PL
3725 }
3726 addrs.clear();
3727 stringtok(addrs, ::arg()["query-local-address"], ", ;");
3728 for(const string& addr : addrs) {
3729 g_localQueryAddresses4.push_back(ComboAddress(addr));
3730 }
3731 }
3732 catch(std::exception& e) {
e6a9dde5 3733 g_log<<Logger::Error<<"Assigning local query addresses: "<<e.what();
b7ef5828
PL
3734 exit(99);
3735 }
3736
e48c6b8a
PL
3737 // keep this ABOVE loadRecursorLuaConfig!
3738 if(::arg()["dnssec"]=="off")
3739 g_dnssecmode=DNSSECMode::Off;
3740 else if(::arg()["dnssec"]=="process-no-validate")
3741 g_dnssecmode=DNSSECMode::ProcessNoValidate;
3742 else if(::arg()["dnssec"]=="process")
3743 g_dnssecmode=DNSSECMode::Process;
3744 else if(::arg()["dnssec"]=="validate")
3745 g_dnssecmode=DNSSECMode::ValidateAll;
3746 else if(::arg()["dnssec"]=="log-fail")
3747 g_dnssecmode=DNSSECMode::ValidateForLog;
3748 else {
e6a9dde5 3749 g_log<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
e48c6b8a
PL
3750 exit(1);
3751 }
3752
9a3ab3e4
KM
3753 g_signatureInceptionSkew = ::arg().asNum("signature-inception-skew");
3754 if (g_signatureInceptionSkew < 0) {
3755 g_log<<Logger::Error<<"A negative value for 'signature-inception-skew' is not allowed"<<endl;
3756 exit(1);
3757 }
3758
e48c6b8a 3759 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
d377bb54 3760 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
e48c6b8a 3761
a6f7f5fe 3762 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
3763 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
e6ec15bf
RG
3764
3765 luaConfigDelayedThreads delayedLuaThreads;
0f5785a6 3766 try {
e6ec15bf 3767 loadRecursorLuaConfig(::arg()["lua-config-file"], delayedLuaThreads);
0f5785a6
PL
3768 }
3769 catch (PDNSException &e) {
e6a9dde5 3770 g_log<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
0f5785a6
PL
3771 exit(1);
3772 }
ad42489c 3773
18af64a8 3774 parseACLs();
d6f3fcfa 3775 initPublicSuffixList(::arg()["public-suffix-list-file"]);
92011b8f 3776
eb5bae86 3777 if(!::arg()["dont-query"].empty()) {
eb5bae86
BH
3778 vector<string> ips;
3779 stringtok(ips, ::arg()["dont-query"], ", ");
66e0b6ea
BH
3780 ips.push_back("0.0.0.0");
3781 ips.push_back("::");
c36bc97a 3782
e6a9dde5 3783 g_log<<Logger::Warning<<"Will not send queries to: ";
eb5bae86 3784 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
9065eb05 3785 SyncRes::addDontQuery(*i);
eb5bae86 3786 if(i!=ips.begin())
e6a9dde5
PL
3787 g_log<<Logger::Warning<<", ";
3788 g_log<<Logger::Warning<<*i;
eb5bae86 3789 }
e6a9dde5 3790 g_log<<Logger::Warning<<endl;
eb5bae86
BH
3791 }
3792
f7c1d4e3 3793 g_quiet=::arg().mustDo("quiet");
3ddb9247 3794
b243ca3b 3795 /* this needs to be done before parseACLs(), which call broadcastFunction() */
1bc3c142
BH
3796 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
3797 if(g_weDistributeQueries) {
b243ca3b 3798 g_log<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
1bc3c142 3799 }
3ddb9247 3800
756e82cf 3801 setupDelegationOnly();
b33c2462 3802 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
756e82cf 3803
77499b05
BH
3804 if(::arg()["trace"]=="fail") {
3805 SyncRes::setDefaultLogMode(SyncRes::Store);
3806 }
3807 else if(::arg().mustDo("trace")) {
3808 SyncRes::setDefaultLogMode(SyncRes::Log);
f7c1d4e3
BH
3809 ::arg().set("quiet")="no";
3810 g_quiet=false;
3e9c6c0a 3811 g_dnssecLOG=true;
f7c1d4e3 3812 }
43a9b290
PL
3813 string myHostname = getHostname();
3814 if (myHostname == "UNKNOWN"){
3815 g_log<<Logger::Warning<<"Unable to get the hostname, NSID and id.server values will be empty"<<endl;
3816 myHostname = "";
d0983bff 3817 }
3ddb9247 3818
aadceba8 3819 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
5cf4b2e7 3820 SyncRes::s_minimumECSTTL = ::arg().asNum("ecs-minimum-ttl-override");
aadceba8 3821
1051f8a9
BH
3822 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
3823
f7c1d4e3 3824 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
b9473937 3825 SyncRes::s_maxbogusttl=::arg().asNum("max-cache-bogus-ttl");
63637fd8 3826 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
1051f8a9 3827 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
79ec0627
PL
3828 // Cap the packetcache-servfail-ttl to the packetcache-ttl
3829 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
3830 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
628e2c7b
PA
3831 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
3832 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
f7c1d4e3 3833 SyncRes::s_serverID=::arg()["server-id"];
173d790e 3834 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
9de3e034 3835 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
7c3398aa 3836 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
01402d56 3837 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
f7c1d4e3 3838 if(SyncRes::s_serverID.empty()) {
d0983bff 3839 SyncRes::s_serverID = myHostname;
f7c1d4e3 3840 }
3ddb9247 3841
e9f9b8ec
RG
3842 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
3843 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
c9783016 3844 SyncRes::clearECSStats();
fd8898fb 3845 SyncRes::s_ecsipv4cachelimit = ::arg().asNum("ecs-ipv4-cache-bits");
3846 SyncRes::s_ecsipv6cachelimit = ::arg().asNum("ecs-ipv6-cache-bits");
ed9019c9 3847 SyncRes::s_ecscachelimitttl = ::arg().asNum("ecs-cache-limit-ttl");
e9f9b8ec 3848
116d1288
OM
3849 SyncRes::s_qnameminimization = ::arg().mustDo("qname-minimization");
3850
8a3a3822
RG
3851 if (!::arg().isEmpty("ecs-scope-zero-address")) {
3852 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
3853 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
3854 }
3855 else {
3856 bool found = false;
3857 for (const auto& addr : g_localQueryAddresses4) {
3858 if (!IsAnyAddress(addr)) {
3859 SyncRes::setECSScopeZeroAddress(Netmask(addr, 32));
3860 found = true;
3861 break;
3862 }
3863 }
3864 if (!found) {
3865 for (const auto& addr : g_localQueryAddresses6) {
3866 if (!IsAnyAddress(addr)) {
3867 SyncRes::setECSScopeZeroAddress(Netmask(addr, 128));
3868 found = true;
3869 break;
3870 }
3871 }
3872 if (!found) {
3873 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
3874 }
3875 }
3876 }
3877
2fe3354d
CH
3878 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
3879 SyncRes::parseEDNSSubnetAddFor(::arg()["ecs-add-for"]);
3880 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
3881
5cc8371b 3882 g_XPFAcl.toMasks(::arg()["xpf-allow-from"]);
59cb4a79 3883 g_xpfRRCode = ::arg().asNum("xpf-rr-code");
5cc8371b 3884
5b0ddd18 3885 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
bb4bdbaf 3886
49a699c4 3887 g_initialDomainMap = parseAuthAndForwards();
3ddb9247 3888
08f3f638 3889 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3ddb9247 3890
f7c1d4e3 3891 g_logCommonErrors=::arg().mustDo("log-common-errors");
98d36505 3892 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
e661a20b
PD
3893
3894 g_anyToTcp = ::arg().mustDo("any-to-tcp");
a09a8ce0
PD
3895 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
3896
b3adda56
PD
3897 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
3898
b243ca3b 3899 g_numDistributorThreads = ::arg().asNum("distributor-threads");
810ff705 3900 g_numWorkerThreads = ::arg().asNum("threads");
1c4f2e1b 3901 if (g_numWorkerThreads < 1) {
e6a9dde5 3902 g_log<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
1c4f2e1b
RG
3903 g_numWorkerThreads = 1;
3904 }
3905
b243ca3b 3906 g_numThreads = g_numDistributorThreads + g_numWorkerThreads;
810ff705
RG
3907 g_maxMThreads = ::arg().asNum("max-mthreads");
3908
00b8cadc
RG
3909 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
3910
0ec489bf 3911 g_statisticsInterval = ::arg().asNum("statistics-interval");
3912
559b6c93
PL
3913 {
3914 SuffixMatchNode dontThrottleNames;
3915 vector<string> parts;
3916 stringtok(parts, ::arg()["dont-throttle-names"]);
3917 for (const auto &p : parts) {
3918 dontThrottleNames.add(DNSName(p));
3919 }
3920 g_dontThrottleNames.setState(dontThrottleNames);
3921
3922 NetmaskGroup dontThrottleNetmasks;
3923 stringtok(parts, ::arg()["dont-throttle-netmasks"]);
3924 for (const auto &p : parts) {
3925 dontThrottleNetmasks.addMask(Netmask(p));
3926 }
3927 g_dontThrottleNetmasks.setState(dontThrottleNetmasks);
3928 }
3929
144040be 3930 s_balancingFactor = ::arg().asDouble("distribution-load-factor");
078be17f
RG
3931 if (s_balancingFactor != 0.0 && s_balancingFactor < 1.0) {
3932 s_balancingFactor = 0.0;
3933 g_log<<Logger::Warning<<"Asked to run with a distribution-load-factor below 1.0, disabling it instead"<<endl;
3934 }
144040be 3935
810ff705
RG
3936#ifdef SO_REUSEPORT
3937 g_reusePort = ::arg().mustDo("reuseport");
3938#endif
3939
b243ca3b 3940 s_threadInfos.resize(g_numDistributorThreads + g_numWorkerThreads + /* handler */ 1);
810ff705 3941
b243ca3b
RG
3942 if (g_reusePort) {
3943 if (g_weDistributeQueries) {
3944 /* first thread is the handler, then distributors */
3945 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3946 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
adb6cd72 3947 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
b243ca3b 3948 makeUDPServerSockets(deferredAdds);
adb6cd72 3949 makeTCPServerSockets(deferredAdds, tcpSockets);
b243ca3b
RG
3950 }
3951 }
3952 else {
3953 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3954 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3955 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
adb6cd72 3956 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
b243ca3b 3957 makeUDPServerSockets(deferredAdds);
adb6cd72 3958 makeTCPServerSockets(deferredAdds, tcpSockets);
b243ca3b 3959 }
810ff705
RG
3960 }
3961 }
3962 else {
c47f201b 3963 std::set<int> tcpSockets;
b243ca3b
RG
3964 /* we don't have reuseport so we can only open one socket per
3965 listening addr:port and everyone will listen on it */
3966 makeUDPServerSockets(g_deferredAdds);
c47f201b
RG
3967 makeTCPServerSockets(g_deferredAdds, tcpSockets);
3968
3969 /* every listener (so distributor if g_weDistributeQueries, workers otherwise)
3970 needs to listen to the shared sockets */
3971 if (g_weDistributeQueries) {
3972 /* first thread is the handler, then distributors */
3973 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3974 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3975 }
3976 }
3977 else {
3978 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3979 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3980 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3981 }
3982 }
810ff705 3983 }
815099b2 3984
af1377b7
NC
3985#ifdef NOD_ENABLED
3986 // Setup newly observed domain globals
3987 setupNODGlobal();
3988#endif /* NOD_ENABLED */
3989
677e2a46
BH
3990 int forks;
3991 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
1bc3c142
BH
3992 if(!fork()) // we are child
3993 break;
3994 }
3ddb9247 3995
f7c1d4e3 3996 if(::arg().mustDo("daemon")) {
e6a9dde5
PL
3997 g_log<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
3998 g_log.toConsole(Logger::Critical);
f7c1d4e3
BH
3999 daemonize();
4000 }
4001 signal(SIGUSR1,usr1Handler);
4002 signal(SIGUSR2,usr2Handler);
4003 signal(SIGPIPE,SIG_IGN);
810ff705 4004
a6414fdc 4005 checkOrFixFDS();
3ddb9247 4006
d1b28475
KM
4007#ifdef HAVE_LIBSODIUM
4008 if (sodium_init() == -1) {
e6a9dde5 4009 g_log<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
d1b28475
KM
4010 exit(99);
4011 }
4012#endif
4013
3afde9b2
PL
4014 openssl_thread_setup();
4015 openssl_seed();
e97cb679
AT
4016 /* setup rng before chroot */
4017 dns_random_init();
3afde9b2 4018
bdbb07e0 4019 if(::arg()["server-id"].empty()) {
d0983bff 4020 ::arg().set("server-id") = myHostname;
bdbb07e0
PL
4021 }
4022
138435cb
BH
4023 int newgid=0;
4024 if(!::arg()["setgid"].empty())
4025 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
4026 int newuid=0;
4027 if(!::arg()["setuid"].empty())
4028 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
4029
f1d6a7ce
KM
4030 Utility::dropGroupPrivs(newuid, newgid);
4031
138435cb 4032 if (!::arg()["chroot"].empty()) {
75336810
PL
4033#ifdef HAVE_SYSTEMD
4034 char *ns;
4035 ns = getenv("NOTIFY_SOCKET");
4036 if (ns != nullptr) {
e6a9dde5 4037 g_log<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
75336810
PL
4038 exit(1);
4039 }
4040#endif
138435cb 4041 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
e6a9dde5 4042 g_log<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
138435cb
BH
4043 exit(1);
4044 }
f0f3f0b0 4045 else
377602e3 4046 g_log<<Logger::Info<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
138435cb
BH
4047 }
4048
f0f3f0b0
PL
4049 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
4050 if(!s_pidfname.empty())
4051 unlink(s_pidfname.c_str()); // remove possible old pid file
4052 writePid();
4053
4054 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
4055
f1d6a7ce 4056 Utility::dropUserPrivs(newuid);
1f2b341e
RG
4057 try {
4058 /* we might still have capabilities remaining, for example if we have been started as root
4059 without --setuid (please don't do that) or as an unprivileged user with ambient capabilities
4060 like CAP_NET_BIND_SERVICE.
4061 */
4062 dropCapabilities();
4063 }
4064 catch(const std::exception& e) {
4065 g_log<<Logger::Warning<<e.what()<<endl;
4066 }
c0063e60 4067
e6ec15bf
RG
4068 startLuaConfigDelayedThreads(delayedLuaThreads, g_luaconfs.getCopy().generation);
4069
49a699c4 4070 makeThreadPipes();
3ddb9247 4071
5d4dd7fe
BH
4072 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
4073 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
fde296a3 4074 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
a5886e6a 4075 s_maxUDPQueriesPerRound=::arg().asNum("max-udp-queries-per-round");
343257a4 4076
c29d820c
RG
4077 g_useKernelTimestamp = ::arg().mustDo("protobuf-use-kernel-timestamp");
4078
563517f3
RG
4079 blacklistStats(StatComponent::API, ::arg()["stats-api-blacklist"]);
4080 blacklistStats(StatComponent::Carbon, ::arg()["stats-carbon-blacklist"]);
4081 blacklistStats(StatComponent::RecControl, ::arg()["stats-rec-control-blacklist"]);
4082 blacklistStats(StatComponent::SNMP, ::arg()["stats-snmp-blacklist"]);
72259676 4083
d705aad9
RG
4084 if (::arg().mustDo("snmp-agent")) {
4085 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
4086 g_snmpAgent->run();
4087 }
4088
b47026fd 4089 int port = ::arg().asNum("udp-source-port-min");
58da9034 4090 if(port < 1024 || port > 65535){
e6a9dde5 4091 g_log<<Logger::Error<<"Unable to launch, udp-source-port-min is not a valid port number"<<endl;
bf6f28ca
CHB
4092 exit(99); // this isn't going to fix itself either
4093 }
4094 s_minUdpSourcePort = port;
b47026fd 4095 port = ::arg().asNum("udp-source-port-max");
58da9034 4096 if(port < 1024 || port > 65535 || port < s_minUdpSourcePort){
e6a9dde5 4097 g_log<<Logger::Error<<"Unable to launch, udp-source-port-max is not a valid port number or is smaller than udp-source-port-min"<<endl;
bf6f28ca
CHB
4098 exit(99); // this isn't going to fix itself either
4099 }
4100 s_maxUdpSourcePort = port;
4101 std::vector<string> parts {};
b47026fd 4102 stringtok(parts, ::arg()["udp-source-port-avoid"], ", ");
bf6f28ca
CHB
4103 for (const auto &part : parts)
4104 {
4105 port = std::stoi(part);
58da9034 4106 if(port < 1024 || port > 65535){
e6a9dde5 4107 g_log<<Logger::Error<<"Unable to launch, udp-source-port-avoid contains an invalid port number: "<<part<<endl;
bf6f28ca
CHB
4108 exit(99); // this isn't going to fix itself either
4109 }
4110 s_avoidUdpSourcePorts.insert(port);
4111 }
4112
b243ca3b 4113 unsigned int currentThreadId = 1;
8fd25133 4114 const auto cpusMap = parseCPUMap();
d77abca1 4115
c3828c03 4116 if(g_numThreads == 1) {
e6a9dde5 4117 g_log<<Logger::Warning<<"Operating unthreaded"<<endl;
6b6720de
PL
4118#ifdef HAVE_SYSTEMD
4119 sd_notify(0, "READY=1");
4120#endif
b243ca3b
RG
4121
4122 /* This thread handles the web server, carbon, statistics and the control channel */
4123 auto& handlerInfos = s_threadInfos.at(0);
4124 handlerInfos.isHandler = true;
c390b2da 4125 handlerInfos.thread = std::thread(recursorThread, 0, "main");
b243ca3b
RG
4126
4127 setCPUMap(cpusMap, currentThreadId, pthread_self());
4128
4129 auto& infos = s_threadInfos.at(currentThreadId);
4130 infos.isListener = true;
4131 infos.isWorker = true;
c390b2da 4132 recursorThread(currentThreadId++, "worker");
76698c6e
BH
4133 }
4134 else {
8fd25133 4135
b243ca3b
RG
4136 if (g_weDistributeQueries) {
4137 g_log<<Logger::Warning<<"Launching "<< g_numDistributorThreads <<" distributor threads"<<endl;
4138 for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
4139 auto& infos = s_threadInfos.at(currentThreadId);
4140 infos.isListener = true;
c390b2da 4141 infos.thread = std::thread(recursorThread, currentThreadId++, "distr");
b243ca3b
RG
4142
4143 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
4144 }
4145 }
8fd25133 4146
62b549e0
RG
4147 g_log<<Logger::Warning<<"Launching "<< g_numWorkerThreads <<" worker threads"<<endl;
4148
b243ca3b
RG
4149 for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
4150 auto& infos = s_threadInfos.at(currentThreadId);
4151 infos.isListener = g_weDistributeQueries ? false : true;
4152 infos.isWorker = true;
c390b2da 4153 infos.thread = std::thread(recursorThread, currentThreadId++, "worker");
b243ca3b
RG
4154
4155 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
76698c6e 4156 }
b243ca3b 4157
6b6720de
PL
4158#ifdef HAVE_SYSTEMD
4159 sd_notify(0, "READY=1");
4160#endif
b243ca3b
RG
4161
4162 /* This thread handles the web server, carbon, statistics and the control channel */
4163 auto& infos = s_threadInfos.at(0);
4164 infos.isHandler = true;
c390b2da 4165 infos.thread = std::thread(recursorThread, 0, "web+stat");
b243ca3b
RG
4166
4167 s_threadInfos.at(0).thread.join();
bb4bdbaf 4168 }
bb4bdbaf
BH
4169 return 0;
4170}
4171
c390b2da 4172static void* recursorThread(unsigned int n, const string& threadName)
bb4bdbaf
BH
4173try
4174{
d77abca1 4175 t_id=n;
b243ca3b 4176 auto& threadInfo = s_threadInfos.at(t_id);
c390b2da
PL
4177
4178 static string threadPrefix = "pdns-r/";
519f5484 4179 setThreadName(threadPrefix + threadName);
c390b2da 4180
49a699c4 4181 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
a712cb56 4182 SyncRes::setDomainMap(g_initialDomainMap);
49a699c4 4183 t_allowFrom = g_initialAllowFrom;
f26bf547
RG
4184 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
4185 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
49a699c4 4186 primeHints();
3ddb9247 4187
f26bf547 4188 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
3ddb9247 4189
e6a9dde5 4190 g_log<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3ddb9247 4191
af1377b7 4192#ifdef NOD_ENABLED
41c542ec
NC
4193 if (threadInfo.isWorker)
4194 setupNODThread();
af1377b7 4195#endif /* NOD_ENABLED */
c1751a59
RG
4196
4197 /* the listener threads handle TCP queries */
4198 if(threadInfo.isWorker || threadInfo.isListener) {
5b388d28
PD
4199 try {
4200 if(!::arg()["lua-dns-script"].empty()) {
4201 t_pdl = std::make_shared<RecursorLua4>();
4202 t_pdl->loadFile(::arg()["lua-dns-script"]);
4203 g_log<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
4204 }
4205 }
4206 catch(std::exception &e) {
4207 g_log<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
4208 _exit(99);
674cf0f6 4209 }
674cf0f6 4210 }
3ddb9247 4211
f8f243b0 4212 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
92011b8f 4213 if(ringsize) {
f26bf547 4214 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
b243ca3b
RG
4215 if(g_weDistributeQueries)
4216 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries") / g_numDistributorThreads);
f8f243b0 4217 else
3ddb9247 4218 t_remotes->set_capacity(ringsize);
f26bf547 4219 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 4220 t_servfailremotes->set_capacity(ringsize);
66f2e6ad
KM
4221 t_bogusremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4222 t_bogusremotes->set_capacity(ringsize);
f26bf547 4223 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 4224 t_largeanswerremotes->set_capacity(ringsize);
621ccf89 4225 t_timeouts = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4226 t_timeouts->set_capacity(ringsize);
92011b8f 4227
f26bf547 4228 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 4229 t_queryring->set_capacity(ringsize);
f26bf547 4230 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 4231 t_servfailqueryring->set_capacity(ringsize);
66f2e6ad
KM
4232 t_bogusqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4233 t_bogusqueryring->set_capacity(ringsize);
92011b8f 4234 }
3ddb9247 4235
f26bf547 4236 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
144040be 4237 threadInfo.mt = MT.get();
3ddb9247 4238
63341e8d
RG
4239#ifdef HAVE_PROTOBUF
4240 /* start protobuf export threads if needed */
4241 auto luaconfsLocal = g_luaconfs.getLocal();
4242 checkProtobufExport(luaconfsLocal);
4243 checkOutgoingProtobufExport(luaconfsLocal);
4244#endif /* HAVE_PROTOBUF */
b9fa43e0
OM
4245#ifdef HAVE_FSTRM
4246 checkFrameStreamExport(luaconfsLocal);
4247#endif
63341e8d 4248
bb4bdbaf
BH
4249 PacketID pident;
4250
4251 t_fdm=getMultiplexer();
d77abca1 4252
b243ca3b 4253 if(threadInfo.isHandler) {
d07bf7ff 4254 if(::arg().mustDo("webserver")) {
e6a9dde5 4255 g_log<<Logger::Warning << "Enabling web server" << endl;
8989097d 4256 try {
1ce57618 4257 new RecursorWebServer(t_fdm);
8989097d
CH
4258 }
4259 catch(PDNSException &e) {
e6a9dde5 4260 g_log<<Logger::Error<<"Exception: "<<e.reason<<endl;
8989097d
CH
4261 exit(99);
4262 }
f3d1d67b 4263 }
377602e3 4264 g_log<<Logger::Info<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
f3d1d67b 4265 }
810ff705 4266 else {
d77abca1 4267
b243ca3b
RG
4268 t_fdm->addReadFD(threadInfo.pipes.readToThread, handlePipeRequest);
4269 t_fdm->addReadFD(threadInfo.pipes.readQueriesToThread, handlePipeRequest);
4270
4271 if (threadInfo.isListener) {
4272 if (g_reusePort) {
4273 /* then every listener has its own FDs */
4274 for(const auto deferred : threadInfo.deferredAdds) {
4275 t_fdm->addReadFD(deferred.first, deferred.second);
4276 }
810ff705 4277 }
b243ca3b
RG
4278 else {
4279 /* otherwise all listeners are listening on the same ones */
4280 for(const auto deferred : g_deferredAdds) {
4281 t_fdm->addReadFD(deferred.first, deferred.second);
d77abca1
RG
4282 }
4283 }
4284 }
810ff705 4285 }
3ddb9247 4286
b0b37121 4287 registerAllStats();
d77abca1 4288
b243ca3b 4289 if(threadInfo.isHandler) {
674cf0f6
BH
4290 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
4291 }
1bc3c142 4292
f7c1d4e3 4293 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3ddb9247 4294
f7c1d4e3 4295 bool listenOnTCP(true);
49a699c4 4296
cb1523d1 4297 time_t last_stat = 0;
a2f87dd1 4298 time_t last_carbon=0, last_lua_maintenance=0;
2c78bd57 4299 time_t carbonInterval=::arg().asNum("carbon-interval");
a2f87dd1 4300 time_t luaMaintenanceInterval=::arg().asNum("lua-maintenance-interval");
ac0995bb 4301 counter.store(0); // used to periodically execute certain tasks
f7c1d4e3 4302 for(;;) {
ac0e821b 4303 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3ddb9247 4304
3427fa8a
BH
4305 if(!(counter%500)) {
4306 MT->makeThread(houseKeeping, 0);
f7c1d4e3
BH
4307 }
4308
d2392145 4309 if(!(counter%55)) {
d8f6d49f 4310 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
bb4bdbaf 4311 expired_t expired=t_fdm->getTimeouts(g_now);
3ddb9247 4312
f7c1d4e3 4313 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
cd989c87 4314 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4957a608 4315 if(g_logCommonErrors)
e6a9dde5 4316 g_log<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4957a608 4317 t_fdm->removeReadFD(i->first);
f7c1d4e3
BH
4318 }
4319 }
3ddb9247 4320
f7c1d4e3
BH
4321 counter++;
4322
b243ca3b 4323 if(threadInfo.isHandler) {
cb1523d1
RG
4324 if(statsWanted || (g_statisticsInterval > 0 && (g_now.tv_sec - last_stat) >= g_statisticsInterval)) {
4325 doStats();
4326 last_stat = g_now.tv_sec;
4327 }
f7c1d4e3 4328
cb1523d1 4329 Utility::gettimeofday(&g_now, 0);
2c78bd57 4330
cb1523d1
RG
4331 if((g_now.tv_sec - last_carbon) >= carbonInterval) {
4332 MT->makeThread(doCarbonDump, 0);
4333 last_carbon = g_now.tv_sec;
4334 }
2c78bd57 4335 }
2a0276a9 4336 if (t_pdl != nullptr) {
9adbe790 4337 // lua-dns-script directive is present, call the maintenance callback if needed
c1751a59
RG
4338 /* remember that the listener threads handle TCP queries */
4339 if (threadInfo.isWorker || threadInfo.isListener) {
2a0276a9
CHB
4340 // Only on threads processing queries
4341 if(g_now.tv_sec - last_lua_maintenance >= luaMaintenanceInterval) {
4342 t_pdl->maintenance();
4343 last_lua_maintenance = g_now.tv_sec;
4344 }
9adbe790 4345 }
a2f87dd1 4346 }
2c78bd57 4347
bb4bdbaf 4348 t_fdm->run(&g_now);
3ea54bf0 4349 // 'run' updates g_now for us
f7c1d4e3 4350
b243ca3b 4351 if(threadInfo.isListener) {
5c889cf5 4352 if(listenOnTCP) {
c47f201b
RG
4353 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
4354 for(const auto fd : threadInfo.tcpSockets) {
4355 t_fdm->removeReadFD(fd);
b243ca3b 4356 }
c47f201b
RG
4357 listenOnTCP=false;
4358 }
f7c1d4e3 4359 }
5c889cf5 4360 else {
c47f201b
RG
4361 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
4362 for(const auto fd : threadInfo.tcpSockets) {
4363 t_fdm->addReadFD(fd, handleNewTCPQuestion);
b243ca3b 4364 }
c47f201b
RG
4365 listenOnTCP=true;
4366 }
f7c1d4e3
BH
4367 }
4368 }
4369 }
4370}
3f81d239 4371catch(PDNSException &ae) {
e6a9dde5 4372 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
bb4bdbaf
BH
4373 return 0;
4374}
4375catch(std::exception &e) {
e6a9dde5 4376 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
bb4bdbaf
BH
4377 return 0;
4378}
4379catch(...) {
e6a9dde5 4380 g_log<<Logger::Error<<"any other exception in main: "<<endl;
bb4bdbaf
BH
4381 return 0;
4382}
4383
51e2144e 4384
3ddb9247 4385int main(int argc, char **argv)
288f4aa9 4386{
dbd23fc2
BH
4387 g_argc = argc;
4388 g_argv = argv;
5e3de507 4389 g_stats.startupTime=time(0);
b51ef4f9 4390 Utility::srandom();
3e135495 4391 versionSetProduct(ProductRecursor);
8a63d3ce 4392 reportBasicTypes();
0007c2e5 4393 reportOtherTypes();
ea634573 4394
22030c37 4395 int ret = EXIT_SUCCESS;
caa6eefa 4396
288f4aa9 4397 try {
f888311c 4398 ::arg().set("stack-size","stack size per mthread")="200000";
2e3d8a19 4399 ::arg().set("soa-minimum-ttl","Don't change")="0";
2e3d8a19 4400 ::arg().set("no-shuffle","Don't change")="off";
2e3d8a19 4401 ::arg().set("local-port","port to listen on")="53";
32252594 4402 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
fec7dd5a 4403 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
77499b05 4404 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
a6415142 4405 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
c87e1876 4406 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
13c46e62 4407 ::arg().set("signature-inception-skew", "Allow the signature inception to be off by this number of seconds")="60";
d3f809bf 4408 ::arg().set("daemon","Operate as a daemon")="no";
191f2e47 4409 ::arg().setSwitch("write-pid","Write a PID file")="yes";
9afa8662 4410 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
b6cfa948 4411 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
b18fa400 4412 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
22e0810c 4413 ::arg().set("log-common-errors","If we should log rather common errors")="no";
2e3d8a19
BH
4414 ::arg().set("chroot","switch to chroot jail")="";
4415 ::arg().set("setgid","If set, change group id to this gid for more security")="";
4416 ::arg().set("setuid","If set, change user id to this uid for more security")="";
c83ee49d 4417 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
bb4bdbaf 4418 ::arg().set("threads", "Launch this number of threads")="2";
b243ca3b 4419 ::arg().set("distributor-threads", "Launch this number of distributor threads, distributing queries to other threads")="0";
adabfcb9 4420 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
5124de27 4421 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
d07bf7ff 4422 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
479e0976 4423 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
479e0976 4424 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
d07bf7ff
PL
4425 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
4426 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
4427 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
be3e1477 4428 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
8ca656a8 4429 ::arg().set("webserver-loglevel", "Amount of logging in the webserver (none, normal, detailed)") = "normal";
cc08b5a9 4430 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
e12f8407 4431 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
2c78bd57 4432 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
f7a645ec
RG
4433 ::arg().set("carbon-namespace", "If set overwrites the first part of the carbon string")="pdns";
4434 ::arg().set("carbon-instance", "If set overwrites the the instance name default")="recursor";
4435
0ec489bf 4436 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
c038218b 4437 ::arg().set("quiet","Suppress logging of questions and answers")="";
f27e6356 4438 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
2e3d8a19 4439 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
fdbf35ac
BH
4440 ::arg().set("socket-owner","Owner of socket")="";
4441 ::arg().set("socket-group","Group of socket")="";
4442 ::arg().set("socket-mode", "Permissions for socket")="";
3ddb9247 4443
f0f3f0b0 4444 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
2e3d8a19
BH
4445 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
4446 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
d4fb76e9 4447 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
2e3d8a19 4448 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
85c32340 4449 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
2e3d8a19 4450 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
324dc148 4451 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
979edd70 4452 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
559b6c93
PL
4453 ::arg().set("dont-throttle-names", "Do not throttle nameservers with this name or suffix")="";
4454 ::arg().set("dont-throttle-netmasks", "Do not throttle nameservers with this IP netmask")="";
2e3d8a19 4455 ::arg().set("hint-file", "If set, load root hints from this file")="";
b45eb27c 4456 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
a9af3782 4457 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
b9473937 4458 ::arg().set("max-cache-bogus-ttl", "maximum number of seconds to keep a Bogus (positive or negative) cached entry in memory")="3600";
c3e753c7 4459 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
1051f8a9 4460 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
927c12b0 4461 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
1051f8a9 4462 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
950626be 4463 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname, set custom or 'disabled'")="";
92011b8f 4464 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
ba1a571d 4465 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
49a699c4 4466 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
2c95fc65 4467 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
51e2144e 4468 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3ddb9247 4469 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4e120339 4470 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
fde296a3 4471 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
0d5f0a9f 4472 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4ef015cd 4473 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
5605c067 4474 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3e61e7f7 4475 ::arg().set("lua-config-file", "More powerful configuration options")="";
644dd1da 4476
5605c067 4477 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
927c12b0
BH
4478 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
4479 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
5605c067 4480 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
ac0b4eb3 4481 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3ea54bf0 4482 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
e498dac1 4483 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
4485aa35 4484 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
a2f87dd1 4485 ::arg().set("lua-maintenance-interval", "Number of seconds between calls to the lua user defined maintenance() function")="1";
08f3f638 4486 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3ddb9247 4487 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
35695d18 4488 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
fd8898fb 4489 ::arg().set("ecs-ipv4-cache-bits", "Maximum number of bits of IPv4 mask to cache ECS response")="24";
35695d18 4490 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
fd8898fb 4491 ::arg().set("ecs-ipv6-cache-bits", "Maximum number of bits of IPv6 mask to cache ECS response")="56";
5cf4b2e7 4492 ::arg().set("ecs-minimum-ttl-override", "Set under adverse conditions, a minimum TTL for records in ECS-specific answers")="0";
ed9019c9 4493 ::arg().set("ecs-cache-limit-ttl", "Minimum TTL to cache ECS response")="0";
3f975863 4494 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
2fe3354d 4495 ::arg().set("ecs-add-for", "List of client netmasks for which EDNS Client Subnet will be added")="0.0.0.0/0, ::/0, " LOCAL_NETS_INVERSE;
8a3a3822 4496 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
a16c4536 4497 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
e498dac1 4498 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
4ca2d205 4499 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
e661a20b 4500 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
b3adda56 4501 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
00b8cadc 4502 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
54c36063
PL
4503 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1232";
4504 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1232";
aadceba8 4505 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
173d790e 4506 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
c5950146 4507 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
7c3398aa 4508 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
78227847 4509 ::arg().set("max-udp-queries-per-round", "Maximum number of UDP queries processed per recvmsg() round, before returning back to normal processing")="10000";
c29d820c 4510 ::arg().set("protobuf-use-kernel-timestamp", "Compute the latency of queries in protobuf messages by using the timestamp set by the kernel when the query was received (when available)")="";
ee271fc4 4511 ::arg().set("distribution-pipe-buffer-size", "Size in bytes of the internal buffer of the pipe used by the distributor to pass incoming queries to a worker thread")="0";
a09a8ce0 4512
68e6df3c 4513 ::arg().set("include-dir","Include *.conf files from this directory")="";
d67620e4 4514 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
2332f42d 4515
4516 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
2e3d8a19 4517
d705aad9 4518 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
396f126e 4519 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
d705aad9 4520
72259676
RG
4521 std::string defaultBlacklistedStats = "cache-bytes, packetcache-bytes, special-memory-usage";
4522 for (size_t idx = 0; idx < 32; idx++) {
4523 defaultBlacklistedStats += ", ecs-v4-response-bits-" + std::to_string(idx + 1);
4524 }
4525 for (size_t idx = 0; idx < 128; idx++) {
4526 defaultBlacklistedStats += ", ecs-v6-response-bits-" + std::to_string(idx + 1);
4527 }
563517f3
RG
4528 ::arg().set("stats-api-blacklist", "List of statistics that are disabled when retrieving the complete list of statistics via the API")=defaultBlacklistedStats;
4529 ::arg().set("stats-carbon-blacklist", "List of statistics that are prevented from being exported via Carbon")=defaultBlacklistedStats;
4530 ::arg().set("stats-rec-control-blacklist", "List of statistics that are prevented from being exported via rec_control get-all")=defaultBlacklistedStats;
4531 ::arg().set("stats-snmp-blacklist", "List of statistics that are prevented from being exported via SNMP")=defaultBlacklistedStats;
d705aad9 4532
0735b17e 4533 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
d377bb54 4534 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
0735b17e 4535
8fd25133
RG
4536 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
4537
98d36505
RG
4538 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
4539
5cc8371b 4540 ::arg().set("xpf-allow-from","XPF information is only processed from these subnets")="";
59cb4a79 4541 ::arg().set("xpf-rr-code","XPF option code to use")="0";
5cc8371b 4542
58da9034 4543 ::arg().set("udp-source-port-min", "Minimum UDP port to bind on")="1024";
b47026fd
CHB
4544 ::arg().set("udp-source-port-max", "Maximum UDP port to bind on")="65535";
4545 ::arg().set("udp-source-port-avoid", "List of comma separated UDP port number to avoid")="11211";
e97cb679 4546 ::arg().set("rng", "Specify random number generator to use. Valid values are auto,sodium,openssl,getrandom,arc4random,urandom.")="auto";
d6f3fcfa 4547 ::arg().set("public-suffix-list-file", "Path to the Public Suffix List file, if any")="";
144040be 4548 ::arg().set("distribution-load-factor", "The load factor used when PowerDNS is distributing queries to worker threads")="0.0";
116d1288 4549 ::arg().setSwitch("qname-minimization", "Use Query Name Minimization")="no";
af1377b7
NC
4550#ifdef NOD_ENABLED
4551 ::arg().set("new-domain-tracking", "Track newly observed domains (i.e. never seen before).")="no";
4552 ::arg().set("new-domain-log", "Log newly observed domains.")="yes";
4553 ::arg().set("new-domain-lookup", "Perform a DNS lookup newly observed domains as a subdomain of the configured domain")="";
4554 ::arg().set("new-domain-history-dir", "Persist new domain tracking data here to persist between restarts")=string(NODCACHEDIR)+"/nod";
4555 ::arg().set("new-domain-whitelist", "List of domains (and implicitly all subdomains) which will never be considered a new domain")="";
b78727c6 4556 ::arg().set("new-domain-db-size", "Size of the DB used to track new domains in terms of number of cells. Defaults to 67108864")="67108864";
ca2526f5 4557 ::arg().set("new-domain-pb-tag", "If protobuf is configured, the tag to use for messages containing newly observed domains. Defaults to 'pdns-nod'")="pdns-nod";
41c542ec
NC
4558 ::arg().set("unique-response-tracking", "Track unique responses (tuple of query name, type and RR).")="no";
4559 ::arg().set("unique-response-log", "Log unique responses")="yes";
4560 ::arg().set("unique-response-history-dir", "Persist unique response tracking data here to persist between restarts")=string(NODCACHEDIR)+"/udr";
b78727c6 4561 ::arg().set("unique-response-db-size", "Size of the DB used to track unique responses in terms of number of cells. Defaults to 67108864")="67108864";
ca2526f5 4562 ::arg().set("unique-response-pb-tag", "If protobuf is configured, the tag to use for messages containing unique DNS responses. Defaults to 'pdns-udr'")="pdns-udr";
af1377b7 4563#endif /* NOD_ENABLED */
2e3d8a19 4564 ::arg().setCmd("help","Provide a helpful message");
ba1a571d 4565 ::arg().setCmd("version","Print version string");
d5141417 4566 ::arg().setCmd("config","Output blank configuration");
e6a9dde5 4567 g_log.toConsole(Logger::Info);
2e3d8a19 4568 ::arg().laxParse(argc,argv); // do a lax parse
c75a6a9e 4569
2d733c0f
CH
4570 string configname=::arg()["config-dir"]+"/recursor.conf";
4571 if(::arg()["config-name"]!="") {
4572 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
5124de27 4573 s_programname+="-"+::arg()["config-name"];
2d733c0f
CH
4574 }
4575 cleanSlashes(configname);
5124de27 4576
5cc1ea1d
CH
4577 if(!::arg().getCommands().empty()) {
4578 cerr<<"Fatal: non-option on the command line, perhaps a '--setting=123' statement missed the '='?"<<endl;
4579 exit(99);
4580 }
4581
577cf284
BH
4582 if(::arg().mustDo("config")) {
4583 cout<<::arg().configstring()<<endl;
4584 exit(0);
4585 }
4586
3ddb9247 4587 if(!::arg().file(configname.c_str()))
e6a9dde5 4588 g_log<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
c75a6a9e 4589
2e3d8a19 4590 ::arg().parse(argc,argv);
c836dc19 4591
2054afbb
CH
4592 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() ) {
4593 g_log<<Logger::Error<<"Using chroot and enabling the API is not possible"<<endl;
f0f3f0b0
PL
4594 exit(EXIT_FAILURE);
4595 }
4596
4597 if (::arg()["socket-dir"].empty()) {
4598 if (::arg()["chroot"].empty())
4599 ::arg().set("socket-dir") = LOCALSTATEDIR;
4600 else
4601 ::arg().set("socket-dir") = "/";
4602 }
4603
2e3d8a19 4604 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
562588a3 4605
b243ca3b
RG
4606 if(::arg().asNum("threads")==1) {
4607 if (::arg().mustDo("pdns-distributes-queries")) {
4608 g_log<<Logger::Warning<<"Only one thread, no need to distribute queries ourselves"<<endl;
4609 ::arg().set("pdns-distributes-queries")="no";
4610 }
4611 }
4612
4613 if(::arg().mustDo("pdns-distributes-queries") && ::arg().asNum("distributor-threads") <= 0) {
4614 g_log<<Logger::Warning<<"Asked to run with pdns-distributes-queries set but no distributor threads, raising to 1"<<endl;
4615 ::arg().set("distributor-threads")="1";
4616 }
4617
4618 if (!::arg().mustDo("pdns-distributes-queries")) {
4619 ::arg().set("distributor-threads")="0";
4620 }
61d74169 4621
2e3d8a19 4622 if(::arg().mustDo("help")) {
ff5ba4f9
WA
4623 cout<<"syntax:"<<endl<<endl;
4624 cout<<::arg().helpstring(::arg()["help"])<<endl;
4625 exit(0);
b636533b 4626 }
5e3de507 4627 if(::arg().mustDo("version")) {
ba1a571d 4628 showProductVersion();
3613a51c 4629 showBuildConfiguration();
67076869 4630 exit(0);
5e3de507 4631 }
b636533b 4632
34162f8f 4633 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
f48d7b65 4634
34162f8f
CH
4635 if (logUrgency < Logger::Error)
4636 logUrgency = Logger::Error;
f48d7b65 4637 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
4638 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
4639 }
e6a9dde5
PL
4640 g_log.setLoglevel(logUrgency);
4641 g_log.toConsole(logUrgency);
34162f8f 4642
f7c1d4e3 4643 serviceMain(argc, argv);
288f4aa9 4644 }
3f81d239 4645 catch(PDNSException &ae) {
e6a9dde5 4646 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
22030c37 4647 ret=EXIT_FAILURE;
288f4aa9 4648 }
fdbf35ac 4649 catch(std::exception &e) {
e6a9dde5 4650 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
22030c37 4651 ret=EXIT_FAILURE;
288f4aa9
BH
4652 }
4653 catch(...) {
e6a9dde5 4654 g_log<<Logger::Error<<"any other exception in main: "<<endl;
22030c37 4655 ret=EXIT_FAILURE;
288f4aa9 4656 }
3ddb9247 4657
22030c37 4658 return ret;
288f4aa9 4659}