]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/pdns_recursor.cc
dnsdist: Add HTTPStatusAction to return a specific HTTP response
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
CommitLineData
288f4aa9 1/*
6edbf68a
PL
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
870a0fe4
AT
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
3e61e7f7 25
76473b92
KM
26#include <netdb.h>
27#include <sys/stat.h>
28#include <unistd.h>
f097141b 29#ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
b47026fd 30#include <boost/container/flat_set.hpp>
f097141b 31#endif
2470b36e 32#include "ws-recursor.hh"
c390b2da 33#include <thread>
519f5484 34#include "threadname.hh"
3ea54bf0 35#include "recpacketcache.hh"
3ddb9247 36#include "utility.hh"
51e2144e 37#include "dns_random.hh"
d1b28475
KM
38#ifdef HAVE_LIBSODIUM
39#include <sodium.h>
40#endif
3afde9b2 41#include "opensslsigners.hh"
288f4aa9
BH
42#include <iostream>
43#include <errno.h>
81859ba5 44#include <boost/static_assert.hpp>
288f4aa9
BH
45#include <map>
46#include <set>
97bb160b 47#include "recursor_cache.hh"
38c9ceaa 48#include "cachecleaner.hh"
288f4aa9 49#include <stdio.h>
c75a6a9e 50#include <signal.h>
288f4aa9 51#include <stdlib.h>
bb4bdbaf 52#include "misc.hh"
288f4aa9
BH
53#include "mtasker.hh"
54#include <utility>
288f4aa9
BH
55#include "arguments.hh"
56#include "syncres.hh"
88def049
BH
57#include <fcntl.h>
58#include <fstream>
3e61e7f7 59#include "sortlist.hh"
5c633640
BH
60#include "sstuff.hh"
61#include <boost/tuple/tuple.hpp>
62#include <boost/tuple/tuple_comparison.hpp>
72df400f 63#include <boost/shared_array.hpp>
7f1fa77d 64#include <boost/function.hpp>
5605c067 65#include <boost/algorithm/string.hpp>
8f7473d7 66#ifdef MALLOC_TRACE
67#include "malloctrace.hh"
68#endif
40a3dd64 69#include <netinet/tcp.h>
f12666f2 70#include "capabilities.hh"
ea634573
BH
71#include "dnsparser.hh"
72#include "dnswriter.hh"
73#include "dnsrecords.hh"
f814d7c8 74#include "zoneparser-tng.hh"
1d5b3ce6 75#include "rec_channel.hh"
aaacf7f2 76#include "logger.hh"
c8ddb7c2 77#include "iputils.hh"
09e6702a 78#include "mplexer.hh"
c038218b 79#include "config.h"
808c5ef7 80#include "lua-recursor4.hh"
ba1a571d 81#include "version.hh"
79332bff 82#include "responsestats.hh"
d67620e4 83#include "secpoll-recursor.hh"
c5c066bf 84#include "dnsname.hh"
644dd1da 85#include "filterpo.hh"
86#include "rpzloader.hh"
b3f0ed10 87#include "validate-recursor.hh"
f3c18728 88#include "rec-lua-conf.hh"
5c3b5e7f 89#include "ednsoptions.hh"
85c7ca75 90#include "gettime.hh"
d6f3fcfa 91#include "pubsuffix.hh"
af1377b7
NC
92#ifdef NOD_ENABLED
93#include "nod.hh"
94#endif /* NOD_ENABLED */
f3c18728 95
d9d3f9c1 96#include "rec-protobuf.hh"
d705aad9 97#include "rec-snmp.hh"
aa7929a3 98
6b6720de
PL
99#ifdef HAVE_SYSTEMD
100#include <systemd/sd-daemon.h>
101#endif
102
d187038c
RG
103#include "namespaces.hh"
104
d61aa945
RG
105#ifdef HAVE_PROTOBUF
106#include "uuid-utils.hh"
b9fa43e0 107#endif /* HAVE_PROTOBUF */
d61aa945 108
5cc8371b
RG
109#include "xpf.hh"
110
d187038c
RG
111typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
112
f26bf547 113static thread_local std::shared_ptr<RecursorLua4> t_pdl;
b243ca3b 114static thread_local unsigned int t_id = 0;
f26bf547
RG
115static thread_local std::shared_ptr<Regex> t_traceRegex;
116static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
63341e8d 117#ifdef HAVE_PROTOBUF
3fe06137 118static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_protobufServers{nullptr};
b773359c 119static thread_local uint64_t t_protobufServersGeneration;
3fe06137 120static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_outgoingProtobufServers{nullptr};
b773359c 121static thread_local uint64_t t_outgoingProtobufServersGeneration;
63341e8d 122#endif /* HAVE_PROTOBUF */
f26bf547 123
b9fa43e0 124#ifdef HAVE_FSTRM
10ba6d01 125static thread_local std::shared_ptr<std::vector<std::unique_ptr<FrameStreamLogger>>> t_frameStreamServers{nullptr};
b9fa43e0
OM
126static thread_local uint64_t t_frameStreamServersGeneration;
127#endif /* HAVE_FSTRM */
128
f26bf547
RG
129thread_local std::unique_ptr<MT_t> MT; // the big MTasker
130thread_local std::unique_ptr<MemRecursorCache> t_RC;
131thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
3337c2f7 132thread_local FDMultiplexer* t_fdm{nullptr};
be9078b3 133thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes, t_bogusremotes;
66f2e6ad 134thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring, t_bogusqueryring;
f26bf547 135thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
af1377b7
NC
136#ifdef NOD_ENABLED
137thread_local std::shared_ptr<nod::NODDB> t_nodDBp;
41c542ec 138thread_local std::shared_ptr<nod::UniqueResponseDB> t_udrDBp;
af1377b7 139#endif /* NOD_ENABLED */
d187038c 140__thread struct timeval g_now; // timestamp, updated (too) frequently
d7dae798 141
b243ca3b
RG
142typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
143
d7dae798 144// for communicating with our threads
b243ca3b
RG
145// effectively readonly after startup
146struct RecThreadInfo
147{
148 struct ThreadPipeSet
149 {
150 int writeToThread{-1};
151 int readToThread{-1};
152 int writeFromThread{-1};
153 int readFromThread{-1};
154 int writeQueriesToThread{-1}; // this one is non-blocking
155 int readQueriesToThread{-1};
156 };
157
adb6cd72 158 /* FD corresponding to TCP sockets this thread is listening
c47f201b 159 on.
adb6cd72
RG
160 These FDs are also in deferredAdds when we have one
161 socket per listener, and in g_deferredAdds instead. */
162 std::set<int> tcpSockets;
b243ca3b
RG
163 /* FD corresponding to listening sockets if we have one socket per
164 listener (with reuseport), otherwise all listeners share the
165 same FD and g_deferredAdds is then used instead */
166 deferredAdd_t deferredAdds;
167 struct ThreadPipeSet pipes;
168 std::thread thread;
144040be
RG
169 MT_t* mt{nullptr};
170 uint64_t numberOfDistributedQueries{0};
b243ca3b
RG
171 /* handle the web server, carbon, statistics and the control channel */
172 bool isHandler{false};
173 /* accept incoming queries (and distributes them to the workers if pdns-distributes-queries is set) */
174 bool isListener{false};
175 /* process queries */
176 bool isWorker{false};
49a699c4 177};
810ff705 178
b243ca3b
RG
179/* first we have the handler thread, t_id == 0 (some other
180 helper threads like SNMP might have t_id == 0 as well)
181 then the distributor threads if any
182 and finally the workers */
183static std::vector<RecThreadInfo> s_threadInfos;
184/* without reuseport, all listeners share the same sockets */
185static deferredAdd_t g_deferredAdds;
faf580f5 186
d187038c
RG
187typedef vector<int> tcpListenSockets_t;
188typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
3ea54bf0 189
d187038c 190static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
d187038c 191static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
d187038c
RG
192static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
193static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
194static AtomicCounter counter;
9065eb05 195static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
f26bf547 196static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
5cc8371b 197static NetmaskGroup g_XPFAcl;
d187038c 198static size_t g_tcpMaxQueriesPerConn;
a5886e6a 199static size_t s_maxUDPQueriesPerRound;
d187038c
RG
200static uint64_t g_latencyStatSize;
201static uint32_t g_disthashseed;
202static unsigned int g_maxTCPPerClient;
d187038c 203static unsigned int g_maxMThreads;
b243ca3b 204static unsigned int g_numDistributorThreads;
d187038c
RG
205static unsigned int g_numWorkerThreads;
206static int g_tcpTimeout;
207static uint16_t g_udpTruncationThreshold;
59cb4a79 208static uint16_t g_xpfRRCode{0};
d187038c
RG
209static std::atomic<bool> statsWanted;
210static std::atomic<bool> g_quiet;
211static bool g_logCommonErrors;
212static bool g_anyToTcp;
b243ca3b 213static bool g_weDistributeQueries; // if true, 1 or more threads listen on the incoming query sockets and distribute them to workers
810ff705 214static bool g_reusePort{false};
00b8cadc 215static bool g_gettagNeedsEDNSOptions{false};
0ec489bf 216static time_t g_statisticsInterval;
9065eb05 217static bool g_useIncomingECS;
c29d820c 218static bool g_useKernelTimestamp;
a6f7f5fe 219std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
af1377b7
NC
220#ifdef NOD_ENABLED
221static bool g_nodEnabled;
222static DNSName g_nodLookupDomain;
223static bool g_nodLog;
224static SuffixMatchNode g_nodDomainWL;
ca2526f5 225static std::string g_nod_pbtag;
41c542ec
NC
226static bool g_udrEnabled;
227static bool g_udrLog;
ca2526f5 228static std::string g_udr_pbtag;
af1377b7 229#endif /* NOD_ENABLED */
f097141b 230#ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
bf6f28ca 231static boost::container::flat_set<uint16_t> s_avoidUdpSourcePorts;
f097141b
CHB
232#else
233static std::set<uint16_t> s_avoidUdpSourcePorts;
234#endif
bf6f28ca
CHB
235static uint16_t s_minUdpSourcePort;
236static uint16_t s_maxUdpSourcePort;
144040be 237static double s_balancingFactor;
49a699c4 238
b243ca3b 239RecursorControlChannel s_rcc; // only active in the handler thread
d187038c 240RecursorStats g_stats;
2d733c0f 241string s_programname="pdns_recursor";
d187038c 242string s_pidfname;
c1c29961 243bool g_lowercaseOutgoing;
bf19ccfd 244unsigned int g_networkTimeoutMsec;
d187038c
RG
245unsigned int g_numThreads;
246uint16_t g_outgoingEDNSBufsize;
98d36505 247bool g_logRPZChanges{false};
c3828c03 248
559b6c93
PL
249// Used in the Syncres to not throttle certain servers
250GlobalStateHolder<SuffixMatchNode> g_dontThrottleNames;
251GlobalStateHolder<NetmaskGroup> g_dontThrottleNetmasks;
252
12cd44ee 253#define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
2fe3354d 254#define LOCAL_NETS_INVERSE "!127.0.0.0/8, !10.0.0.0/8, !100.64.0.0/10, !169.254.0.0/16, !192.168.0.0/16, !172.16.0.0/12, !::1/128, !fc00::/7, !fe80::/10"
12cd44ee 255// Bad Nets taken from both:
3ddb9247 256// http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
12cd44ee 257// and
258// http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
259// where such a network may not be considered a valid destination
260#define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
261#define DONT_QUERY LOCAL_NETS ", " BAD_NETS
49a699c4 262
d7dae798 263//! used to send information to a newborn mthread
ea634573 264struct DNSComboWriter {
08b02366 265 DNSComboWriter(const std::string& query, const struct timeval& now): d_mdp(true, query), d_now(now), d_query(query)
2749c3fe
RG
266 {
267 }
5cc8371b 268
08b02366 269 DNSComboWriter(const std::string& query, const struct timeval& now, std::vector<std::string>&& policyTags, LuaContext::LuaObject&& data): d_mdp(true, query), d_now(now), d_query(query), d_policyTags(std::move(policyTags)), d_data(std::move(data))
5164bac3
RG
270 {
271 }
272
5cc8371b
RG
273 void setRemote(const ComboAddress& sa)
274 {
275 d_remote=sa;
276 }
277
278 void setSource(const ComboAddress& sa)
ea634573 279 {
5cc8371b 280 d_source=sa;
ea634573
BH
281 }
282
b71b60ee 283 void setLocal(const ComboAddress& sa)
284 {
285 d_local=sa;
286 }
287
5cc8371b
RG
288 void setDestination(const ComboAddress& sa)
289 {
290 d_destination=sa;
291 }
b71b60ee 292
ea634573
BH
293 void setSocket(int sock)
294 {
295 d_socket=sock;
296 }
a1754c6a
BH
297
298 string getRemote() const
299 {
5cc8371b
RG
300 if (d_source == d_remote) {
301 return d_source.toStringWithPort();
302 }
303 return d_source.toStringWithPort() + " (proxied by " + d_remote.toStringWithPort() + ")";
a1754c6a
BH
304 }
305
5cc8371b 306 MOADNSParser d_mdp;
c9e9e5e0 307 struct timeval d_now;
5cc8371b
RG
308 /* Remote client, might differ from d_source
309 in case of XPF, in which case d_source holds
310 the IP of the client and d_remote of the proxy
311 */
312 ComboAddress d_remote;
313 ComboAddress d_source;
314 /* Destination address, might differ from
315 d_destination in case of XPF, in which case
316 d_destination holds the IP of the proxy and
317 d_local holds our own. */
318 ComboAddress d_local;
319 ComboAddress d_destination;
aa7929a3
RG
320#ifdef HAVE_PROTOBUF
321 boost::uuids::uuid d_uuid;
67e31ebe 322 string d_requestorId;
590388d2 323 string d_deviceId;
0a6a45c8 324 string d_deviceName;
c29d820c 325 struct timeval d_kernelTimestamp{0,0};
aa7929a3 326#endif
08b02366 327 std::string d_query;
5164bac3
RG
328 std::vector<std::string> d_policyTags;
329 LuaContext::LuaObject d_data;
b40562da 330 EDNSSubnetOpts d_ednssubnet;
5164bac3 331 shared_ptr<TCPConnection> d_tcpConnection;
ea634573 332 int d_socket;
b673817a 333 unsigned int d_tag{0};
e9f63d47 334 uint32_t d_qhash{0};
70fb28d9 335 uint32_t d_ttlCap{std::numeric_limits<uint32_t>::max()};
08b02366
RG
336 uint16_t d_ecsBegin{0};
337 uint16_t d_ecsEnd{0};
70fb28d9 338 bool d_variable{false};
5164bac3
RG
339 bool d_ecsFound{false};
340 bool d_ecsParsed{false};
341 bool d_tcp;
ea634573
BH
342};
343
06857845
RG
344MT_t* getMT()
345{
346 return MT ? MT.get() : nullptr;
347}
ea634573 348
288f4aa9
BH
349ArgvMap &arg()
350{
351 static ArgvMap theArg;
352 return theArg;
353}
4ef015cd 354
8fb594ba 355unsigned int getRecursorThreadId()
b4015453 356{
30da2030 357 return t_id;
b4015453 358}
09e6702a 359
30ee601a
RG
360int getMTaskerTID()
361{
362 return MT->getTid();
363}
364
b243ca3b
RG
365static bool isDistributorThread()
366{
367 if (t_id == 0) {
368 return false;
369 }
370
371 return g_weDistributeQueries && s_threadInfos.at(t_id).isListener;
372}
373
374static bool isHandlerThread()
375{
376 if (t_id == 0) {
377 return true;
378 }
379
380 return s_threadInfos.at(t_id).isHandler;
381}
382
d187038c 383static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 384
50c81227 385// -1 is error, 0 is timeout, 1 is success
3ddb9247 386int asendtcp(const string& data, Socket* sock)
5c633640
BH
387{
388 PacketID pident;
389 pident.sock=sock;
390 pident.outMSG=data;
3ddb9247 391
bb4bdbaf 392 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
50c81227 393 string packet;
5c633640 394
5b0ddd18 395 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
23db0a09 396
9170fbaf 397 if(!ret || ret==-1) { // timeout
bb4bdbaf 398 t_fdm->removeWriteFD(sock->getHandle());
5c633640 399 }
50c81227
BH
400 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
401 return -1;
402 }
9170fbaf 403 return ret;
5c633640
BH
404}
405
d187038c 406static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 407
9170fbaf 408// -1 is error, 0 is timeout, 1 is success
a683e8bd 409int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
288f4aa9 410{
50c81227 411 data.clear();
5c633640
BH
412 PacketID pident;
413 pident.sock=sock;
414 pident.inNeeded=len;
825fa717 415 pident.inIncompleteOkay=incompleteOkay;
bb4bdbaf 416 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
5c633640 417
bb4bdbaf 418 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
9170fbaf 419 if(!ret || ret==-1) { // timeout
bb4bdbaf 420 t_fdm->removeReadFD(sock->getHandle());
288f4aa9 421 }
50c81227
BH
422 else if(data.empty()) {// error, EOF or other
423 return -1;
424 }
425
9170fbaf 426 return ret;
288f4aa9
BH
427}
428
d187038c 429static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
4465e941 430{
fba1e944 431 PacketID pident=*any_cast<PacketID>(&var);
4465e941 432 char resp[512];
7c77ce63
RG
433 ComboAddress fromaddr;
434 socklen_t addrlen=sizeof(fromaddr);
435
436 ssize_t ret=recvfrom(fd, resp, sizeof(resp), 0, (sockaddr *)&fromaddr, &addrlen);
437 if (fromaddr != pident.remote) {
e6a9dde5 438 g_log<<Logger::Notice<<"Response received from the wrong remote host ("<<fromaddr.toStringWithPort()<<" instead of "<<pident.remote.toStringWithPort()<<"), discarding"<<endl;
7c77ce63
RG
439
440 }
441
4465e941 442 t_fdm->removeReadFD(fd);
443 if(ret >= 0) {
a683e8bd 444 string data(resp, (size_t) ret);
fba1e944 445 MT->sendEvent(pident, &data);
4465e941 446 }
447 else {
fba1e944 448 string empty;
449 MT->sendEvent(pident, &empty);
450 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
4465e941 451 }
452}
fba1e944 453string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
4465e941 454{
4465e941 455 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
456 s.setNonBlocking();
457 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
458
459 s.bind(local);
460 s.connect(dest);
4465e941 461 s.send(query);
462
463 PacketID pident;
464 pident.sock=&s;
7c77ce63 465 pident.remote=dest;
4465e941 466 pident.type=0;
fba1e944 467 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
4465e941 468
469 string data;
fba1e944 470
4465e941 471 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
fba1e944 472
4465e941 473 if(!ret || ret==-1) { // timeout
4465e941 474 t_fdm->removeReadFD(s.getHandle());
475 }
476 else if(data.empty()) {// error, EOF or other
fba1e944 477 // we could special case this
4465e941 478 return data;
479 }
4465e941 480 return data;
481}
482
d7dae798 483//! pick a random query local address
1652a63e 484ComboAddress getQueryLocalAddress(int family, uint16_t port)
5a38281c 485{
1652a63e 486 ComboAddress ret;
5a38281c 487 if(family==AF_INET) {
3ddb9247 488 if(g_localQueryAddresses4.empty())
1652a63e 489 ret = g_local4;
3ddb9247 490 else
1652a63e
BH
491 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
492 ret.sin4.sin_port = htons(port);
5a38281c
BH
493 }
494 else {
495 if(g_localQueryAddresses6.empty())
1652a63e
BH
496 ret = g_local6;
497 else
498 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
3ddb9247 499
1652a63e 500 ret.sin6.sin6_port = htons(port);
5a38281c 501 }
1652a63e 502 return ret;
5a38281c 503}
4ef015cd 504
d187038c 505static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
09e6702a 506
d187038c 507static void setSocketBuffer(int fd, int optname, uint32_t size)
d7dae798
BH
508{
509 uint32_t psize=0;
510 socklen_t len=sizeof(psize);
3ddb9247 511
d7dae798 512 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
e6a9dde5 513 g_log<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
3ddb9247 514 return;
d7dae798
BH
515 }
516
517 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
e6a9dde5 518 g_log<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
d7dae798
BH
519}
520
521
522static void setSocketReceiveBuffer(int fd, uint32_t size)
523{
524 setSocketBuffer(fd, SO_RCVBUF, size);
525}
526
527static void setSocketSendBuffer(int fd, uint32_t size)
528{
529 setSocketBuffer(fd, SO_SNDBUF, size);
530}
531
532
4ef015cd
BH
533// you can ask this class for a UDP socket to send a query from
534// this socket is not yours, don't even think about deleting it
535// but after you call 'returnSocket' on it, don't assume anything anymore
536class UDPClientSocks
537{
4ef015cd 538 unsigned int d_numsocks;
4ef015cd 539public:
e2642526 540 UDPClientSocks() : d_numsocks(0)
4ef015cd
BH
541 {
542 }
543
2ee280cf 544 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
d8f6d49f 545 int getSocket(const ComboAddress& toaddr, int* fd)
4ef015cd 546 {
d8f6d49f
BH
547 *fd=makeClientSocket(toaddr.sin4.sin_family);
548 if(*fd < 0) // temporary error - receive exception otherwise
2ee280cf 549 return -2;
d8f6d49f
BH
550
551 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
552 int err = errno;
a7b68ae7
RG
553 try {
554 closesocket(*fd);
555 }
556 catch(const PDNSException& e) {
e6a9dde5 557 g_log<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
a7b68ae7
RG
558 }
559
d8f6d49f 560 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
4957a608 561 return -2;
998a4334 562 return -1;
d8f6d49f 563 }
998a4334 564
998a4334 565 d_numsocks++;
d8f6d49f 566 return 0;
4ef015cd
BH
567 }
568
569 // return a socket to the pool, or simply erase it
2bee9b7c 570 void returnSocket(int fd)
4ef015cd 571 {
80baf329 572 try {
2bee9b7c 573 t_fdm->removeReadFD(fd);
80baf329 574 }
2bee9b7c 575 catch(const FDMultiplexerException& e) {
bb4bdbaf 576 // we sometimes return a socket that has not yet been assigned to t_fdm
80baf329 577 }
2bee9b7c 578
a7b68ae7 579 try {
2bee9b7c 580 closesocket(fd);
a7b68ae7
RG
581 }
582 catch(const PDNSException& e) {
e6a9dde5 583 g_log<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
a7b68ae7 584 }
3ddb9247 585
998a4334 586 --d_numsocks;
4ef015cd 587 }
d8f6d49f 588
2bee9b7c
RG
589private:
590
d8f6d49f 591 // returns -1 for errors which might go away, throws for ones that won't
bb4bdbaf 592 static int makeClientSocket(int family)
d8f6d49f 593 {
a683e8bd 594 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
42c235e5 595
d8f6d49f
BH
596 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
597 return ret;
3ddb9247
PD
598
599 if(ret<0)
335da0ba 600 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
36855b53 601
7eb73ffa 602 // setCloseOnExec(ret); // we're not going to exec
5a38281c 603
d8f6d49f 604 int tries=10;
3aa91c3e 605 ComboAddress sin;
d8f6d49f 606 while(--tries) {
1652a63e 607 uint16_t port;
3ddb9247 608
d8f6d49f 609 if(tries==1) // fall back to kernel 'random'
4957a608 610 port = 0;
bf6f28ca
CHB
611 else {
612 do {
613 port = s_minUdpSourcePort + dns_random(s_maxUdpSourcePort - s_minUdpSourcePort + 1);
614 }
615 while (s_avoidUdpSourcePorts.count(port));
616 }
5a38281c 617
3aa91c3e 618 sin=getQueryLocalAddress(family, port); // does htons for us
5a38281c 619
3ddb9247 620 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
4957a608 621 break;
d8f6d49f 622 }
9ec48f21
RG
623
624 if(!tries) {
625 closesocket(ret);
3aa91c3e 626 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
9ec48f21
RG
627 }
628
629 try {
630 setReceiveSocketErrors(ret, family);
631 setNonBlocking(ret);
632 }
633 catch(...) {
634 closesocket(ret);
635 throw;
636 }
3ddb9247 637
d8f6d49f
BH
638 return ret;
639 }
49a699c4
BH
640};
641
f26bf547 642static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
4ef015cd 643
288f4aa9 644/* these two functions are used by LWRes */
34801ab1 645// -2 is OS error, -1 is error that depends on the remote, > 0 is success
a683e8bd 646int asendto(const char *data, size_t len, int flags,
3ddb9247 647 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
288f4aa9 648{
34801ab1
BH
649
650 PacketID pident;
787e5eab
BH
651 pident.domain = domain;
652 pident.remote = toaddr;
653 pident.type = qtype;
34801ab1
BH
654
655 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
656 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
657
658 for(; chain.first != chain.second; chain.first++) {
659 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
e27e91a8 660 /*
4665c31e
BH
661 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
662 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
4957a608 663 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
e27e91a8 664 */
34801ab1
BH
665 chain.first->key.chain.insert(id); // we can chain
666 *fd=-1; // gets used in waitEvent / sendEvent later on
667 return 1;
668 }
669 }
670
49a699c4 671 int ret=t_udpclientsocks->getSocket(toaddr, fd);
d8f6d49f
BH
672 if(ret < 0)
673 return ret;
34801ab1 674
998a4334
BH
675 pident.fd=*fd;
676 pident.id=id;
3ddb9247 677
bb4bdbaf
BH
678 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
679 ret = send(*fd, data, len, 0);
680
5b0ddd18 681 int tmp = errno;
bb4bdbaf 682
7302ed0a 683 if(ret < 0)
49a699c4 684 t_udpclientsocks->returnSocket(*fd);
bb4bdbaf 685
5b0ddd18 686 errno = tmp; // this is for logging purposes only
7302ed0a 687 return ret;
288f4aa9
BH
688}
689
9170fbaf 690// -1 is error, 0 is timeout, 1 is success
f128d20d 691int arecvfrom(std::string& packet, int flags, const ComboAddress& fromaddr, size_t *d_len,
c5c066bf 692 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
288f4aa9 693{
0d5f0a9f 694 static optional<unsigned int> nearMissLimit;
3ddb9247 695 if(!nearMissLimit)
0d5f0a9f
BH
696 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
697
288f4aa9 698 PacketID pident;
4ef015cd 699 pident.fd=fd;
288f4aa9 700 pident.id=id;
0d5f0a9f 701 pident.domain=domain;
787e5eab 702 pident.type = qtype;
996c89cc 703 pident.remote=fromaddr;
b636533b 704
5b0ddd18 705 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
34801ab1 706
9ec48f21 707 /* -1 means error, 0 means timeout, 1 means a result from handleUDPServerResponse() which might still be an error */
9170fbaf 708 if(ret > 0) {
9ec48f21 709 /* handleUDPServerResponse() will close the socket for us no matter what */
996c89cc 710 if(packet.empty()) // means "error"
3ddb9247 711 return -1;
998a4334 712
a683e8bd 713 *d_len=packet.size();
f128d20d 714
0d5f0a9f 715 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
e6a9dde5 716 g_log<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
0d5f0a9f 717 g_stats.spoofCount++;
35ce8576
BH
718 return -1;
719 }
288f4aa9 720 }
09e6702a 721 else {
9ec48f21 722 /* getting there means error or timeout, it's up to us to close the socket */
34801ab1 723 if(fd >= 0)
49a699c4 724 t_udpclientsocks->returnSocket(fd);
09e6702a 725 }
9170fbaf 726 return ret;
288f4aa9
BH
727}
728
88def049
BH
729static void writePid(void)
730{
191f2e47 731 if(!::arg().mustDo("write-pid"))
732 return;
18e7758c 733 ofstream of(s_pidfname.c_str(), std::ios_base::app);
88def049 734 if(of)
705f31ae 735 of<< Utility::getpid() <<endl;
88def049 736 else
e6a9dde5 737 g_log<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
88def049
BH
738}
739
2749c3fe 740TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : data(2, 0), d_remote(addr), d_fd(fd)
3ddb9247
PD
741{
742 ++s_currentConnections;
cd989c87 743 (*t_tcpClientCounts)[d_remote]++;
0e408828 744}
cd989c87
BH
745
746TCPConnection::~TCPConnection()
0e408828 747{
a7b68ae7
RG
748 try {
749 if(closesocket(d_fd) < 0)
e6a9dde5 750 g_log<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
a7b68ae7
RG
751 }
752 catch(const PDNSException& e) {
e6a9dde5 753 g_log<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
a7b68ae7
RG
754 }
755
3ddb9247 756 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
cd989c87 757 t_tcpClientCounts->erase(d_remote);
1bc9e6bd 758 --s_currentConnections;
0e408828 759}
0e9d9ce2 760
3ddb9247 761AtomicCounter TCPConnection::s_currentConnections;
d187038c
RG
762
763static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
6dcd28c3 764
92011b8f 765// the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
d187038c 766static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
2cc13433 767{
92011b8f 768 if(packetsize > 1000 && t_largeanswerremotes)
769 t_largeanswerremotes->push_back(remote);
2cc13433
BH
770 switch(res) {
771 case RCode::ServFail:
92011b8f 772 if(t_servfailremotes) {
773 t_servfailremotes->push_back(remote);
5af86fdc 774 if(query && t_servfailqueryring) // packet cache
92011b8f 775 t_servfailqueryring->push_back(make_pair(*query, qtype));
776 }
2cc13433
BH
777 g_stats.servFails++;
778 break;
779 case RCode::NXDomain:
780 g_stats.nxDomains++;
781 break;
782 case RCode::NoError:
783 g_stats.noErrors++;
784 break;
785 }
786}
787
9a864da4 788static string makeLoginfo(const std::unique_ptr<DNSComboWriter>& dc)
a903b39c 789try
790{
5cc8371b 791 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->getRemote())+")";
a903b39c 792}
793catch(...)
794{
795 return "Exception making error message for exception";
796}
797
aa7929a3 798#ifdef HAVE_PROTOBUF
0a6a45c8 799static void protobufLogQuery(uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId, const std::string& deviceName)
aa7929a3 800{
b773359c
RG
801 if (!t_protobufServers) {
802 return;
803 }
804
e1c8a4bb
RG
805 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
806 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
807 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
c165308b 808 message.setServerIdentity(SyncRes::s_serverID);
a94bc5d7 809 message.setEDNSSubnet(ednssubnet, ednssubnet.isIpv4() ? maskV4 : maskV6);
67e31ebe 810 message.setRequestorId(requestorId);
590388d2 811 message.setDeviceId(deviceId);
0a6a45c8 812 message.setDeviceName(deviceName);
02b47f43 813
02b47f43 814 if (!policyTags.empty()) {
d9d3f9c1 815 message.setPolicyTags(policyTags);
02b47f43 816 }
aa7929a3 817
d9d3f9c1 818// cerr <<message.toDebugString()<<endl;
aa7929a3 819 std::string str;
d9d3f9c1 820 message.serialize(str);
b773359c
RG
821
822 for (auto& server : *t_protobufServers) {
823 server->queueData(str);
824 }
aa7929a3
RG
825}
826
b773359c 827static void protobufLogResponse(const RecProtoBufMessage& message)
aa7929a3 828{
b773359c
RG
829 if (!t_protobufServers) {
830 return;
831 }
832
d9d3f9c1 833// cerr <<message.toDebugString()<<endl;
aa7929a3 834 std::string str;
d9d3f9c1 835 message.serialize(str);
b773359c
RG
836
837 for (auto& server : *t_protobufServers) {
838 server->queueData(str);
839 }
aa7929a3
RG
840}
841#endif
842
53508135
PL
843/**
844 * Chases the CNAME provided by the PolicyCustom RPZ policy.
845 *
846 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
847 * @param qtype: The QType of the original query
848 * @param sr: A SyncRes
849 * @param res: An integer that will contain the RCODE of the lookup we do
850 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
851 */
d187038c 852static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
53508135
PL
853{
854 if (spoofed.d_type == QType::CNAME) {
30ee601a
RG
855 bool oldWantsRPZ = sr.getWantsRPZ();
856 sr.setWantsRPZ(false);
53508135 857 vector<DNSRecord> ans;
6da513b2 858 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, QClass::IN, ans);
53508135
PL
859 for (const auto& rec : ans) {
860 if(rec.d_place == DNSResourceRecord::ANSWER) {
861 ret.push_back(rec);
862 }
863 }
864 // Reset the RPZ state of the SyncRes
30ee601a 865 sr.setWantsRPZ(oldWantsRPZ);
53508135
PL
866 }
867}
868
70fb28d9 869static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, uint32_t ttlCap, const uint16_t maxAnswerSize)
97c6d7e5 870{
70fb28d9 871 pw.startRecord(rec.d_name, rec.d_type, (rec.d_ttl > ttlCap ? ttlCap : rec.d_ttl), rec.d_class, rec.d_place);
97c6d7e5
RG
872
873 if(rec.d_type != QType::OPT) // their TTL ain't real
874 minTTL = min(minTTL, rec.d_ttl);
875
876 rec.d_content->toPacket(pw);
877 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
878 pw.rollback();
879 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
880 pw.getHeader()->tc=1;
881 pw.truncate();
882 }
883 return false;
884 }
885
886 return true;
887}
888
63341e8d 889#ifdef HAVE_PROTOBUF
3fe06137 890static std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> startProtobufServers(const ProtobufExportConfig& config)
63341e8d 891{
3fe06137 892 auto result = std::make_shared<std::vector<std::unique_ptr<RemoteLogger>>>();
b773359c
RG
893
894 for (const auto& server : config.servers) {
895 try {
5d6c7a46
RG
896 auto logger = make_unique<RemoteLogger>(server, config.timeout, 100*config.maxQueuedEntries, config.reconnectWaitTime, config.asyncConnect);
897 logger->setLogQueries(config.logQueries);
898 logger->setLogResponses(config.logResponses);
899 result->emplace_back(std::move(logger));
b773359c
RG
900 }
901 catch(const std::exception& e) {
902 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.what()<<endl;
903 }
904 catch(const PDNSException& e) {
905 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.reason<<endl;
906 }
63341e8d
RG
907 }
908
909 return result;
910}
911
912static bool checkProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
913{
914 if (!luaconfsLocal->protobufExportConfig.enabled) {
b773359c
RG
915 if (t_protobufServers) {
916 for (auto& server : *t_protobufServers) {
917 server->stop();
918 }
919 t_protobufServers.reset();
63341e8d
RG
920 }
921
922 return false;
923 }
924
925 /* if the server was not running, or if it was running according to a
926 previous configuration */
b773359c
RG
927 if (!t_protobufServers ||
928 t_protobufServersGeneration < luaconfsLocal->generation) {
63341e8d 929
b773359c
RG
930 if (t_protobufServers) {
931 for (auto& server : *t_protobufServers) {
932 server->stop();
933 }
63341e8d 934 }
b773359c 935 t_protobufServers.reset();
63341e8d 936
b773359c
RG
937 t_protobufServers = startProtobufServers(luaconfsLocal->protobufExportConfig);
938 t_protobufServersGeneration = luaconfsLocal->generation;
63341e8d
RG
939 }
940
941 return true;
942}
943
944static bool checkOutgoingProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
945{
946 if (!luaconfsLocal->outgoingProtobufExportConfig.enabled) {
b773359c
RG
947 if (t_outgoingProtobufServers) {
948 for (auto& server : *t_outgoingProtobufServers) {
949 server->stop();
950 }
63341e8d 951 }
b773359c 952 t_outgoingProtobufServers.reset();
63341e8d
RG
953
954 return false;
955 }
956
957 /* if the server was not running, or if it was running according to a
958 previous configuration */
b773359c
RG
959 if (!t_outgoingProtobufServers ||
960 t_outgoingProtobufServersGeneration < luaconfsLocal->generation) {
63341e8d 961
b773359c
RG
962 if (t_outgoingProtobufServers) {
963 for (auto& server : *t_outgoingProtobufServers) {
964 server->stop();
965 }
63341e8d 966 }
b773359c 967 t_outgoingProtobufServers.reset();
63341e8d 968
b773359c
RG
969 t_outgoingProtobufServers = startProtobufServers(luaconfsLocal->outgoingProtobufExportConfig);
970 t_outgoingProtobufServersGeneration = luaconfsLocal->generation;
63341e8d
RG
971 }
972
973 return true;
974}
b9fa43e0
OM
975
976#ifdef HAVE_FSTRM
977
10ba6d01 978static std::shared_ptr<std::vector<std::unique_ptr<FrameStreamLogger>>> startFrameStreamServers(const FrameStreamExportConfig& config)
b9fa43e0 979{
10ba6d01 980 auto result = std::make_shared<std::vector<std::unique_ptr<FrameStreamLogger>>>();
b9fa43e0
OM
981
982 for (const auto& server : config.servers) {
983 try {
573f4ff0
OM
984 std::unordered_map<string,unsigned> options;
985 options["bufferHint"] = config.bufferHint;
986 options["flushTimeout"] = config.flushTimeout;
987 options["inputQueueSize"] = config.inputQueueSize;
988 options["outputQueueSize"] = config.outputQueueSize;
989 options["queueNotifyThreshold"] = config.queueNotifyThreshold;
990 options["reopenInterval"] = config.reopenInterval;
dea8a6bc
OM
991 FrameStreamLogger *fsl = nullptr;
992 try {
993 ComboAddress address(server);
994 fsl = new FrameStreamLogger(address.sin4.sin_family, address.toStringWithPort(), true, options);
995 }
996 catch (const PDNSException& e) {
997 fsl = new FrameStreamLogger(AF_UNIX, server, true, options);
998 }
573f4ff0
OM
999 fsl->setLogQueries(config.logQueries);
1000 fsl->setLogResponses(config.logResponses);
1001 result->emplace_back(fsl);
b9fa43e0
OM
1002 }
1003 catch(const std::exception& e) {
1004 g_log<<Logger::Error<<"Error while starting dnstap framestream logger to '"<<server<<": "<<e.what()<<endl;
1005 }
1006 catch(const PDNSException& e) {
1007 g_log<<Logger::Error<<"Error while starting dnstap framestream logger to '"<<server<<": "<<e.reason<<endl;
1008 }
1009 }
1010
1011 return result;
1012}
1013
1014static bool checkFrameStreamExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
1015{
1016 if (!luaconfsLocal->frameStreamExportConfig.enabled) {
1017 if (t_frameStreamServers) {
1018 // dt's take care of cleanup
1019 t_frameStreamServers.reset();
1020 }
1021
1022 return false;
1023 }
1024
1025 /* if the server was not running, or if it was running according to a
1026 previous configuration */
1027 if (!t_frameStreamServers ||
1028 t_frameStreamServersGeneration < luaconfsLocal->generation) {
1029
1030 if (t_frameStreamServers) {
1031 // dt's take care of cleanup
1032 t_frameStreamServers.reset();
1033 }
1034
1035 t_frameStreamServers = startFrameStreamServers(luaconfsLocal->frameStreamExportConfig);
1036 t_frameStreamServersGeneration = luaconfsLocal->generation;
1037 }
1038
1039 return true;
1040}
1041#endif /* HAVE_FSTRM */
63341e8d
RG
1042#endif /* HAVE_PROTOBUF */
1043
af1377b7 1044#ifdef NOD_ENABLED
41c542ec 1045static bool nodCheckNewDomain(const DNSName& dname)
af1377b7
NC
1046{
1047 static const QType qt(QType::A);
1048 static const uint16_t qc(QClass::IN);
41c542ec 1049 bool ret = false;
af1377b7
NC
1050 // First check the (sub)domain isn't whitelisted for NOD purposes
1051 if (!g_nodDomainWL.check(dname)) {
1052 // Now check the NODDB (note this is probablistic so can have FNs/FPs)
1053 if (t_nodDBp && t_nodDBp->isNewDomain(dname)) {
1054 if (g_nodLog) {
1055 // This should probably log to a dedicated log file
1056 g_log<<Logger::Notice<<"Newly observed domain nod="<<dname.toLogString()<<endl;
1057 }
1058 if (!(g_nodLookupDomain.isRoot())) {
1059 // Send a DNS A query to <domain>.g_nodLookupDomain
1060 DNSName qname = dname;
1061 vector<DNSRecord> dummy;
1062 qname += g_nodLookupDomain;
1063 directResolve(qname, qt, qc, dummy);
1064 }
41c542ec 1065 ret = true;
af1377b7
NC
1066 }
1067 }
41c542ec 1068 return ret;
af1377b7
NC
1069}
1070
1071static void nodAddDomain(const DNSName& dname)
1072{
1073 // Don't bother adding domains on the nod whitelist
1074 if (!g_nodDomainWL.check(dname)) {
1075 if (t_nodDBp) {
1076 // This keeps the nod info up to date
1077 t_nodDBp->addDomain(dname);
1078 }
1079 }
1080}
41c542ec
NC
1081
1082static bool udrCheckUniqueDNSRecord(const DNSName& dname, uint16_t qtype, const DNSRecord& record)
1083{
1084 bool ret = false;
1085 if (record.d_place == DNSResourceRecord::ANSWER ||
1086 record.d_place == DNSResourceRecord::ADDITIONAL) {
1087 // Create a string that represent a triplet of (qname, qtype and RR[type, name, content])
1088 std::stringstream ss;
1089 ss << dname.toDNSStringLC() << ":" << qtype << ":" << qtype << ":" << record.d_type << ":" << record.d_name.toDNSStringLC() << ":" << record.d_content->getZoneRepresentation();
1090 if (t_udrDBp && t_udrDBp->isUniqueResponse(ss.str())) {
ff4d391d
NC
1091 if (g_udrLog) {
1092 // This should also probably log to a dedicated file.
1093 g_log<<Logger::Notice<<"Unique response observed: qname="<<dname.toLogString()<<" qtype="<<QType(qtype).getName()<< " rrtype=" << QType(record.d_type).getName() << " rrname=" << record.d_name.toLogString() << " rrcontent=" << record.d_content->getZoneRepresentation() << endl;
41c542ec
NC
1094 }
1095 ret = true;
1096 }
1097 }
1098 return ret;
1099}
af1377b7
NC
1100#endif /* NOD_ENABLED */
1101
d187038c 1102static void startDoResolve(void *p)
288f4aa9 1103{
9a864da4 1104 auto dc=std::unique_ptr<DNSComboWriter>(reinterpret_cast<DNSComboWriter*>(p));
288f4aa9 1105 try {
5af86fdc
RG
1106 if (t_queryring)
1107 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
92011b8f 1108
32015748 1109 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
7f7b8d55 1110 EDNSOpts edo;
5164bac3 1111 std::vector<pair<uint16_t, string> > ednsOpts;
eb9444be 1112 bool variableAnswer = dc->d_variable;
8e079f3a 1113 bool haveEDNS=false;
ca2526f5
NC
1114#ifdef NOD_ENABLED
1115 bool hasUDR = false;
1116#endif /* NOD_ENABLED */
f1db0de2
PL
1117 DNSPacketWriter::optvect_t returnedEdnsOptions; // Here we stuff all the options for the return packet
1118 uint8_t ednsExtRCode = 0;
8e079f3a 1119 if(getEDNSOpts(dc->d_mdp, &edo)) {
f1db0de2
PL
1120 haveEDNS=true;
1121 if (edo.d_version != 0) {
1122 ednsExtRCode = ERCode::BADVERS;
1123 }
1124
32015748
RG
1125 if(!dc->d_tcp) {
1126 /* rfc6891 6.2.3:
1127 "Values lower than 512 MUST be treated as equal to 512."
1128 */
1129 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
1130 }
5164bac3 1131 ednsOpts = edo.d_options;
3af35968 1132 maxanswersize -= 11; // EDNS header size
b40562da 1133
1f691b94
PL
1134 for (const auto& o : edo.d_options) {
1135 if (o.first == EDNSOptionCode::ECS && g_useIncomingECS && !dc->d_ecsParsed) {
1136 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
1137 } else if (o.first == EDNSOptionCode::NSID) {
f1db0de2 1138 const static string mode_server_id = ::arg()["server-id"];
8a42919a
PL
1139 if(mode_server_id != "disabled" && !mode_server_id.empty() &&
1140 maxanswersize > (2 + 2 + mode_server_id.size())) {
f1db0de2
PL
1141 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::NSID, mode_server_id));
1142 variableAnswer = true; // Can't packetcache an answer with NSID
1143 // Option Code and Option Length are both 2
1144 maxanswersize -= 2 + 2 + mode_server_id.size();
1145 }
b40562da
RG
1146 }
1147 }
10321a98 1148 }
b40562da
RG
1149 /* perhaps there was no EDNS or no ECS but by now we looked */
1150 dc->d_ecsParsed = true;
e325f20c 1151 vector<DNSRecord> ret;
ea634573 1152 vector<uint8_t> packet;
b23b8614 1153
ad42489c 1154 auto luaconfsLocal = g_luaconfs.getLocal();
b8470add
PL
1155 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
1156 bool wantsRPZ(true);
1fbc6dc5 1157 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
f1c7929a 1158 bool logResponse = false;
aa7929a3 1159#ifdef HAVE_PROTOBUF
63341e8d 1160 if (checkProtobufExport(luaconfsLocal)) {
b773359c 1161 logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
5cc8371b 1162 Netmask requestorNM(dc->d_source, dc->d_source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
e1c8a4bb 1163 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
0bd2e252 1164 pbMessage = RecProtoBufMessage(RecProtoBufMessage::Response, dc->d_uuid, &requestor, &dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass, dc->d_mdp.d_header.id, dc->d_tcp, 0);
c165308b 1165 pbMessage->setServerIdentity(SyncRes::s_serverID);
d362f7c1 1166 pbMessage->setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
d9d3f9c1
RG
1167 }
1168#endif /* HAVE_PROTOBUF */
ad42489c 1169
b9fa43e0
OM
1170#ifdef HAVE_FSTRM
1171 checkFrameStreamExport(luaconfsLocal);
1172#endif
1173
3ddb9247 1174 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
ea634573
BH
1175
1176 pw.getHeader()->aa=0;
1177 pw.getHeader()->ra=1;
c154c8a4 1178 pw.getHeader()->qr=1;
bb4bdbaf 1179 pw.getHeader()->tc=0;
ea634573 1180 pw.getHeader()->id=dc->d_mdp.d_header.id;
10321a98 1181 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
57769f13 1182 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
ea634573 1183
70fb28d9
RG
1184 /* This is the lowest TTL seen in the records of the response,
1185 so we can't cache it for longer than this value.
1186 If we have a TTL cap, this value can't be larger than the
1187 cap no matter what. */
1188 uint32_t minTTL = dc->d_ttlCap;
904d3219
PD
1189
1190 SyncRes sr(dc->d_now);
0c43f455 1191
2e921ec6 1192 bool DNSSECOK=false;
3457a2a0 1193 if(t_pdl) {
f26bf547 1194 sr.setLuaEngine(t_pdl);
3457a2a0 1195 }
9eec8c98 1196 if(g_dnssecmode != DNSSECMode::Off) {
30ee601a 1197 sr.setDoDNSSEC(true);
9eec8c98
PL
1198
1199 // Does the requestor want DNSSEC records?
d6c335ab 1200 if(edo.d_extFlags & EDNSOpts::DNSSECOK) {
9eec8c98
PL
1201 DNSSECOK=true;
1202 g_stats.dnssecQueries++;
1203 }
88c33dca
RG
1204 if (dc->d_mdp.d_header.cd) {
1205 /* Per rfc6840 section 5.9, "When processing a request with
1206 the Checking Disabled (CD) bit set, a resolver SHOULD attempt
1207 to return all response data, even data that has failed DNSSEC
1208 validation. */
1209 ++g_stats.dnssecCheckDisabledQueries;
1210 }
1211 if (dc->d_mdp.d_header.ad) {
1212 /* Per rfc6840 section 5.7, "the AD bit in a query as a signal
1213 indicating that the requester understands and is interested in the
1214 value of the AD bit in the response. This allows a requester to
1215 indicate that it understands the AD bit without also requesting
1216 DNSSEC data via the DO bit. */
1217 ++g_stats.dnssecAuthenticDataQueries;
1218 }
9eec8c98
PL
1219 } else {
1220 // Ignore the client-set CD flag
1221 pw.getHeader()->cd=0;
5b9853c9 1222 }
0c43f455
RG
1223 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
1224
4898a348 1225#ifdef HAVE_PROTOBUF
30ee601a 1226 sr.setInitialRequestId(dc->d_uuid);
b773359c 1227 sr.setOutgoingProtobufServers(t_outgoingProtobufServers);
4898a348 1228#endif
b9fa43e0
OM
1229#ifdef HAVE_FSTRM
1230 sr.setFrameStreamServers(t_frameStreamServers);
1231#endif
2fe3354d 1232 sr.setQuerySource(dc->d_remote, g_useIncomingECS && !dc->d_ednssubnet.source.empty() ? boost::optional<const EDNSSubnetOpts&>(dc->d_ednssubnet) : boost::none);
57769f13 1233
904d3219 1234 bool tracedQuery=false; // we could consider letting Lua know about this too
9fc36e90 1235 bool shouldNotValidate = false;
904d3219 1236
ef3b6cd7
RG
1237 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
1238 int res = RCode::NoError;
1f1ca368 1239 DNSFilterEngine::Policy appliedPolicy;
6da513b2 1240 std::vector<DNSRecord> spoofed;
f1c7929a 1241 RecursorLua4::DNSQuestion dq(dc->d_source, dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ, logResponse);
d6c335ab 1242 dq.ednsFlags = &edo.d_extFlags;
5164bac3 1243 dq.ednsOptions = &ednsOpts;
6e505c5e
RG
1244 dq.tag = dc->d_tag;
1245 dq.discardedPolicies = &sr.d_discardedPolicies;
1246 dq.policyTags = &dc->d_policyTags;
1247 dq.appliedPolicy = &appliedPolicy;
1248 dq.currentRecords = &ret;
1249 dq.dh = &dc->d_mdp.d_header;
05c74122 1250 dq.data = dc->d_data;
67e31ebe
RG
1251#ifdef HAVE_PROTOBUF
1252 dq.requestorId = dc->d_requestorId;
590388d2 1253 dq.deviceId = dc->d_deviceId;
0a6a45c8 1254 dq.deviceName = dc->d_deviceName;
67e31ebe 1255#endif
ba21fcfe 1256
6cf96227
PL
1257 if(ednsExtRCode != 0) {
1258 goto sendit;
1259 }
1260
e661a20b 1261 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
56b4d21b
PD
1262 pw.getHeader()->tc = 1;
1263 res = 0;
1264 variableAnswer = true;
e661a20b
PD
1265 goto sendit;
1266 }
1267
f26bf547 1268 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
77499b05
BH
1269 sr.setLogMode(SyncRes::Store);
1270 tracedQuery=true;
1271 }
3ddb9247 1272
8f7473d7 1273
976ec823 1274 if(!g_quiet || tracedQuery) {
e6a9dde5 1275 g_log<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
976ec823 1276 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
b40562da 1277 if(!dc->d_ednssubnet.source.empty()) {
e6a9dde5 1278 g_log<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
6e986f5e 1279 }
e6a9dde5 1280 g_log<<endl;
976ec823 1281 }
c75a6a9e 1282
fededf47 1283 sr.setId(MT->getTid());
67828389 1284 if(!dc->d_mdp.d_header.rd)
c836dc19
BH
1285 sr.setCacheOnly();
1286
f26bf547
RG
1287 if (t_pdl) {
1288 t_pdl->prerpz(dq, res);
0a273054
RG
1289 }
1290
db486de5 1291 // Check if the query has a policy attached to it
0a273054 1292 if (wantsRPZ) {
5cc8371b 1293 appliedPolicy = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_source, sr.d_discardedPolicies);
0a273054 1294 }
644dd1da 1295
54be222b 1296 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
f26bf547 1297 if(!t_pdl || !t_pdl->preresolve(dq, res)) {
b8470add 1298
30ee601a 1299 sr.setWantsRPZ(wantsRPZ);
b8470add
PL
1300 if(wantsRPZ) {
1301 switch(appliedPolicy.d_kind) {
1302 case DNSFilterEngine::PolicyKind::NoAction:
1303 break;
1304 case DNSFilterEngine::PolicyKind::Drop:
1305 g_stats.policyDrops++;
7a25883a 1306 g_stats.policyResults[appliedPolicy.d_kind]++;
b8470add
PL
1307 return;
1308 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1309 g_stats.policyResults[appliedPolicy.d_kind]++;
1310 res=RCode::NXDomain;
1311 goto haveAnswer;
1312 case DNSFilterEngine::PolicyKind::NODATA:
1313 g_stats.policyResults[appliedPolicy.d_kind]++;
1314 res=RCode::NoError;
db486de5 1315 goto haveAnswer;
b8470add
PL
1316 case DNSFilterEngine::PolicyKind::Custom:
1317 g_stats.policyResults[appliedPolicy.d_kind]++;
1318 res=RCode::NoError;
6da513b2
RG
1319 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1320 for (const auto& dr : spoofed) {
1321 ret.push_back(dr);
1322 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1323 }
b8470add
PL
1324 goto haveAnswer;
1325 case DNSFilterEngine::PolicyKind::Truncate:
1326 if(!dc->d_tcp) {
1327 g_stats.policyResults[appliedPolicy.d_kind]++;
1328 res=RCode::NoError;
1329 pw.getHeader()->tc=1;
1330 goto haveAnswer;
1331 }
1332 break;
1333 }
db486de5
PL
1334 }
1335
b8470add 1336 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
44971ca0
PD
1337 try {
1338 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
9fc36e90 1339 shouldNotValidate = sr.wasOutOfBand();
44971ca0
PD
1340 }
1341 catch(ImmediateServFailException &e) {
854d44e3 1342 if(g_logCommonErrors)
e6a9dde5 1343 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
44971ca0
PD
1344 res = RCode::ServFail;
1345 }
4485aa35 1346
1921a4c2
RG
1347 dq.validationState = sr.getValidationState();
1348
b8470add
PL
1349 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
1350 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
1351 appliedPolicy = sr.d_appliedPolicy;
1352 g_stats.policyResults[appliedPolicy.d_kind]++;
1353 switch(appliedPolicy.d_kind) {
1354 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
1355 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
1356 case DNSFilterEngine::PolicyKind::Drop:
1357 g_stats.policyDrops++;
b8470add
PL
1358 return;
1359 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1360 ret.clear();
1361 res=RCode::NXDomain;
1362 goto haveAnswer;
1363
1364 case DNSFilterEngine::PolicyKind::NODATA:
1365 ret.clear();
1366 res=RCode::NoError;
1367 goto haveAnswer;
1368
1369 case DNSFilterEngine::PolicyKind::Truncate:
1370 if(!dc->d_tcp) {
1371 ret.clear();
1372 res=RCode::NoError;
1373 pw.getHeader()->tc=1;
1374 goto haveAnswer;
1375 }
1376 break;
1377
1378 case DNSFilterEngine::PolicyKind::Custom:
1379 ret.clear();
1380 res=RCode::NoError;
6da513b2
RG
1381 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1382 for (const auto& dr : spoofed) {
1383 ret.push_back(dr);
1384 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1385 }
b8470add
PL
1386 goto haveAnswer;
1387 }
1388 }
1389
1390 if (wantsRPZ) {
1f1ca368 1391 appliedPolicy = luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies);
b8470add 1392 }
db486de5 1393
f26bf547 1394 if(t_pdl) {
db486de5
PL
1395 if(res == RCode::NoError) {
1396 auto i=ret.cbegin();
1397 for(; i!= ret.cend(); ++i)
1398 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
1399 break;
f26bf547 1400 if(i == ret.cend() && t_pdl->nodata(dq, res))
3ca4e735
PL
1401 shouldNotValidate = true;
1402
db486de5 1403 }
f26bf547 1404 else if(res == RCode::NXDomain && t_pdl->nxdomain(dq, res))
3ca4e735 1405 shouldNotValidate = true;
db486de5 1406
f26bf547 1407 if(t_pdl->postresolve(dq, res))
3ca4e735 1408 shouldNotValidate = true;
db486de5
PL
1409 }
1410
b8470add
PL
1411 if (wantsRPZ) { //XXX This block is repeated, see above
1412 g_stats.policyResults[appliedPolicy.d_kind]++;
1413 switch(appliedPolicy.d_kind) {
1414 case DNSFilterEngine::PolicyKind::NoAction:
1415 break;
1416 case DNSFilterEngine::PolicyKind::Drop:
1417 g_stats.policyDrops++;
b8470add
PL
1418 return;
1419 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1420 ret.clear();
1421 res=RCode::NXDomain;
1422 goto haveAnswer;
1423
1424 case DNSFilterEngine::PolicyKind::NODATA:
1425 ret.clear();
1426 res=RCode::NoError;
1427 goto haveAnswer;
1428
1429 case DNSFilterEngine::PolicyKind::Truncate:
1430 if(!dc->d_tcp) {
1431 ret.clear();
1432 res=RCode::NoError;
1433 pw.getHeader()->tc=1;
1434 goto haveAnswer;
1435 }
1436 break;
1437
1438 case DNSFilterEngine::PolicyKind::Custom:
1439 ret.clear();
1440 res=RCode::NoError;
6da513b2
RG
1441 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1442 for (const auto& dr : spoofed) {
1443 ret.push_back(dr);
1444 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1445 }
b8470add
PL
1446 goto haveAnswer;
1447 }
644dd1da 1448 }
4485aa35 1449 }
644dd1da 1450 haveAnswer:;
3e8216c8 1451 if(res == PolicyDecision::DROP) {
e9c2ad3a 1452 g_stats.policyDrops++;
ae7e77ad 1453 return;
3ddb9247 1454 }
9cdfab64 1455 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1dc8f4d0 1456 {
85ffbc53
PD
1457 string trace(sr.getTrace());
1458 if(!trace.empty()) {
1459 vector<string> lines;
1460 boost::split(lines, trace, boost::is_any_of("\n"));
1dc8f4d0 1461 for(const string& line : lines) {
85ffbc53 1462 if(!line.empty())
e6a9dde5 1463 g_log<<Logger::Warning<< line << endl;
85ffbc53
PD
1464 }
1465 }
1466 }
3ddb9247 1467
9cdfab64 1468 if(res == -1) {
0fe1d080
PD
1469 pw.getHeader()->rcode=RCode::ServFail;
1470 // no commit here, because no record
1471 g_stats.servFails++;
1472 }
288f4aa9 1473 else {
ea634573 1474 pw.getHeader()->rcode=res;
92011b8f 1475
f3fe4ae6 1476 // Does the validation mode or query demand validation?
0c43f455 1477 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
b25cae9a 1478 try {
f3fe4ae6 1479 if(sr.doLog()) {
e6a9dde5 1480 g_log<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<endl;
2e921ec6 1481 }
4d2be65d
RG
1482
1483 auto state = sr.getValidationState();
1484
b25cae9a 1485 if(state == Secure) {
2e921ec6 1486 if(sr.doLog()) {
e6a9dde5 1487 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates correctly"<<endl;
2e921ec6 1488 }
b25cae9a 1489
1490 // Is the query source interested in the value of the ad-bit?
885c8881 1491 if (dc->d_mdp.d_header.ad || DNSSECOK)
b25cae9a 1492 pw.getHeader()->ad=1;
1493 }
1494 else if(state == Insecure) {
f3fe4ae6 1495 if(sr.doLog()) {
e6a9dde5 1496 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Insecure"<<endl;
12ce523e 1497 }
b25cae9a 1498
1499 pw.getHeader()->ad=0;
f3fe4ae6 1500 }
b25cae9a 1501 else if(state == Bogus) {
66f2e6ad
KM
1502 if(t_bogusremotes)
1503 t_bogusremotes->push_back(dc->d_source);
1504 if(t_bogusqueryring)
1505 t_bogusqueryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
c87e1876 1506 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
e6a9dde5 1507 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Bogus"<<endl;
b25cae9a 1508 }
1509
1510 // Does the query or validation mode sending out a SERVFAIL on validation errors?
885c8881 1511 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
b25cae9a 1512 if(sr.doLog()) {
e6a9dde5 1513 g_log<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
b25cae9a 1514 }
1515
1516 pw.getHeader()->rcode=RCode::ServFail;
1517 goto sendit;
1518 } else {
1519 if(sr.doLog()) {
e6a9dde5 1520 g_log<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
b25cae9a 1521 }
1522 }
1523 }
1524 }
1525 catch(ImmediateServFailException &e) {
1526 if(g_logCommonErrors)
e6a9dde5 1527 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
b25cae9a 1528 pw.getHeader()->rcode=RCode::ServFail;
1529 goto sendit;
f3fe4ae6 1530 }
b3f0ed10 1531 }
1532
c154c8a4 1533 if(ret.size()) {
92476c8b 1534 orderAndShuffle(ret);
5cc8371b 1535 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_source)) {
20d84f77 1536 stable_sort(ret.begin(), ret.end(), *sl);
3e61e7f7 1537 variableAnswer=true;
1538 }
8e079f3a 1539 }
0afa32d4
RG
1540
1541 bool needCommit = false;
8e079f3a 1542 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
3e80ebce
KM
1543 if( ! DNSSECOK &&
1544 ( i->d_type == QType::NSEC3 ||
1545 (
1546 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1547 (
1548 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1549 i->d_place != DNSResourceRecord::ANSWER
1550 )
1551 )
1552 )
1553 ) {
2e921ec6 1554 continue;
3e80ebce
KM
1555 }
1556
70fb28d9 1557 if (!addRecordToPacket(pw, *i, minTTL, dc->d_ttlCap, maxanswersize)) {
97c6d7e5
RG
1558 needCommit = false;
1559 break;
1560 }
1561 needCommit = true;
1562
41c542ec
NC
1563#ifdef NOD_ENABLED
1564 bool udr = false;
1565 if (g_udrEnabled) {
1566 udr = udrCheckUniqueDNSRecord(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, *i);
ca2526f5
NC
1567 if (!hasUDR && udr)
1568 hasUDR = true;
41c542ec
NC
1569 }
1570#endif /* NOD ENABLED */
1571
aa7929a3 1572#ifdef HAVE_PROTOBUF
b773359c 1573 if (t_protobufServers) {
41c542ec
NC
1574#ifdef NOD_ENABLED
1575 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes, udr);
1576#else
0bd2e252 1577 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes);
41c542ec 1578#endif /* NOD_ENABLED */
aa7929a3
RG
1579 }
1580#endif
ea634573 1581 }
0afa32d4 1582 if(needCommit)
8e079f3a 1583 pw.commit();
288f4aa9 1584 }
10321a98 1585 sendit:;
b3f0ed10 1586
a0ddd130 1587 if(g_useIncomingECS && dc->d_ecsFound && !sr.wasVariable() && !variableAnswer) {
9837850d 1588 // cerr<<"Stuffing in a 0 scope because answer is static"<<endl;
5a7f99b4 1589 EDNSSubnetOpts eo;
1590 eo.source = dc->d_ednssubnet.source;
1591 ComboAddress sa;
1ef18cab 1592 sa.reset();
5a7f99b4 1593 sa.sin4.sin_family = eo.source.getNetwork().sin4.sin_family;
1594 eo.scope = Netmask(sa, 0);
1595
1596 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::ECS, makeEDNSSubnetOptsString(eo)));
1597 }
1598
97c6d7e5
RG
1599 if (haveEDNS) {
1600 /* we try to add the EDNS OPT RR even for truncated answers,
1601 as rfc6891 states:
1602 "The minimal response MUST be the DNS header, question section, and an
1603 OPT record. This MUST also occur when a truncated response (using
1604 the DNS header's TC bit) is returned."
1605 */
9b60fb71 1606 pw.addOpt(512, ednsExtRCode, DNSSECOK ? EDNSOpts::DNSSECOK : 0, returnedEdnsOptions);
1f691b94 1607 pw.commit();
97c6d7e5
RG
1608 }
1609
79332bff 1610 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
5cc8371b 1611 updateResponseStats(res, dc->d_source, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
ff4d391d
NC
1612#ifdef NOD_ENABLED
1613 bool nod = false;
1614 if (g_nodEnabled) {
1615 if (nodCheckNewDomain(dc->d_mdp.d_qname))
1616 nod = true;
1617 }
1618#endif /* NOD_ENABLED */
aa7929a3 1619#ifdef HAVE_PROTOBUF
b773359c 1620 if (t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && (!appliedPolicy.d_name || appliedPolicy.d_name->empty()) && dc->d_policyTags.empty())) {
d362f7c1
RG
1621 pbMessage->setBytes(packet.size());
1622 pbMessage->setResponseCode(pw.getHeader()->rcode);
0a273054 1623 if (appliedPolicy.d_name) {
d362f7c1
RG
1624 pbMessage->setAppliedPolicy(*appliedPolicy.d_name);
1625 pbMessage->setAppliedPolicyType(appliedPolicy.d_type);
0a273054 1626 }
d362f7c1 1627 pbMessage->setPolicyTags(dc->d_policyTags);
c29d820c
RG
1628 if (g_useKernelTimestamp && dc->d_kernelTimestamp.tv_sec) {
1629 pbMessage->setQueryTime(dc->d_kernelTimestamp.tv_sec, dc->d_kernelTimestamp.tv_usec);
1630 }
1631 else {
1632 pbMessage->setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1633 }
d362f7c1
RG
1634 pbMessage->setRequestorId(dq.requestorId);
1635 pbMessage->setDeviceId(dq.deviceId);
0a6a45c8 1636 pbMessage->setDeviceName(dq.deviceName);
41c542ec
NC
1637#ifdef NOD_ENABLED
1638 if (g_nodEnabled) {
ca2526f5 1639 if (nod) {
41c542ec 1640 pbMessage->setNOD(true);
ca2526f5
NC
1641 pbMessage->addPolicyTag(g_nod_pbtag);
1642 }
1643 if (hasUDR) {
1644 pbMessage->addPolicyTag(g_udr_pbtag);
1645 }
41c542ec
NC
1646 }
1647#endif /* NOD_ENABLED */
b773359c 1648 protobufLogResponse(*pbMessage);
ac238ea7 1649#ifdef NOD_ENABLED
ca2526f5
NC
1650 if (g_nodEnabled) {
1651 pbMessage->setNOD(false);
1652 pbMessage->clearUDR();
1653 if (nod)
1654 pbMessage->removePolicyTag(g_nod_pbtag);
1655 if (hasUDR)
1656 pbMessage->removePolicyTag(g_udr_pbtag);
1657 }
ac238ea7 1658#endif /* NOD_ENABLED */
aa7929a3
RG
1659 }
1660#endif
ea634573 1661 if(!dc->d_tcp) {
b71b60ee 1662 struct msghdr msgh;
1663 struct iovec iov;
7bec330a
OM
1664 cmsgbuf_aligned cbuf;
1665 fillMSGHdr(&msgh, &iov, &cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
2c0af54f
PD
1666 msgh.msg_control=NULL;
1667
cbc03320 1668 if(g_fromtosockets.count(dc->d_socket)) {
4272d071 1669 addCMsgSrcAddr(&msgh, &cbuf, &dc->d_local, 0);
2c0af54f 1670 }
cbc03320 1671 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
e6a9dde5 1672 g_log<<Logger::Warning<<"Sending UDP reply to client "<<dc->getRemote()<<" failed with: "<<strerror(errno)<<endl;
70fb28d9 1673
49dc532e 1674 if(variableAnswer || sr.wasVariable()) {
1ef18cab 1675 g_stats.variableResponses++;
49dc532e 1676 }
3762e821 1677 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
b5e675a7 1678 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, std::move(dc->d_query), dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
76e2b9e3 1679 string((const char*)&*packet.begin(), packet.size()),
3ddb9247 1680 g_now.tv_sec,
76e2b9e3 1681 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
d9d3f9c1 1682 min(minTTL,SyncRes::s_packetcachettl),
88694a6a 1683 dq.validationState,
08b02366
RG
1684 dc->d_ecsBegin,
1685 dc->d_ecsEnd,
4b0bdd5f 1686 std::move(pbMessage));
1051f8a9 1687 }
3762e821 1688 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
feccc9fc 1689 }
9c495589
BH
1690 else {
1691 char buf[2];
ea634573
BH
1692 buf[0]=packet.size()/256;
1693 buf[1]=packet.size()%256;
feccc9fc 1694
c038218b 1695 Utility::iovec iov[2];
feccc9fc 1696
ea634573
BH
1697 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1698 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
feccc9fc 1699
dd079764 1700 int wret=Utility::writev(dc->d_socket, iov, 2);
0e9d9ce2 1701 bool hadError=true;
feccc9fc 1702
dd079764 1703 if(wret == 0)
e6a9dde5 1704 g_log<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
dd079764 1705 else if(wret < 0 )
e6a9dde5 1706 g_log<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
dd079764 1707 else if((unsigned int)wret != 2 + packet.size())
e6a9dde5 1708 g_log<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
0e9d9ce2 1709 else
18af64a8 1710 hadError=false;
3ddb9247 1711
09e6702a 1712 // update tcp connection status, either by closing or moving to 'BYTE0'
3ddb9247 1713
09e6702a 1714 if(hadError) {
18af64a8 1715 // no need to remove us from FDM, we weren't there
c36bc97a 1716 dc->d_socket = -1;
09e6702a 1717 }
a6ae6414 1718 else {
fde296a3
RG
1719 dc->d_tcpConnection->queriesCount++;
1720 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1721 dc->d_socket = -1;
1722 }
1723 else {
1724 dc->d_tcpConnection->state=TCPConnection::BYTE0;
1725 Utility::gettimeofday(&g_now, 0); // needs to be updated
27ae2e3c
RG
1726 struct timeval ttd = g_now;
1727 ttd.tv_sec += g_tcpTimeout;
1728
1729 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection, &ttd);
fde296a3 1730 }
0e9d9ce2 1731 }
9c495589 1732 }
2c9119cd 1733 float spent=makeFloat(sr.getNow()-dc->d_now);
1d5b3ce6 1734 if(!g_quiet) {
e6a9dde5
PL
1735 g_log<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1736 g_log<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
2c9119cd 1737 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
1738 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
1739
1740 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
e6a9dde5 1741 g_log<< ", dnssec="<<vStates[sr.getValidationState()];
2c9119cd 1742 }
1743
e6a9dde5 1744 g_log<<endl;
2c9119cd 1745
c75a6a9e 1746 }
b23b8614 1747
f7b8cffa
RG
1748 if (sr.d_outqueries || sr.d_authzonequeries) {
1749 t_RC->cacheMisses++;
1750 }
1751 else {
1752 t_RC->cacheHits++;
1753 }
2c9119cd 1754
fe213470
BH
1755 if(spent < 0.001)
1756 g_stats.answers0_1++;
1757 else if(spent < 0.010)
1758 g_stats.answers1_10++;
1759 else if(spent < 0.1)
1760 g_stats.answers10_100++;
1761 else if(spent < 1.0)
1762 g_stats.answers100_1000++;
1763 else
1764 g_stats.answersSlow++;
1765
574af7ea 1766 uint64_t newLat=(uint64_t)(spent*1000000);
b841314c 1767 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
08f3f638 1768 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
0a6b1027 1769 // no worries, we do this for packet cache hits elsewhere
19178da9 1770
1771 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
1772 if(ourtime < 1)
1773 g_stats.ourtime0_1++;
1774 else if(ourtime < 2)
1775 g_stats.ourtime1_2++;
1776 else if(ourtime < 4)
1777 g_stats.ourtime2_4++;
1778 else if(ourtime < 8)
1779 g_stats.ourtime4_8++;
1780 else if(ourtime < 16)
1781 g_stats.ourtime8_16++;
1782 else if(ourtime < 32)
1783 g_stats.ourtime16_32++;
1784 else {
1785 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1786 g_stats.ourtimeSlow++;
1787 }
042da1a1 1788 if(ourtime >= 0.0) {
1789 newLat=ourtime*1000; // usec
1790 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
1791 }
c6d04bdc 1792 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
288f4aa9 1793 }
3f81d239 1794 catch(PDNSException &ae) {
e6a9dde5 1795 g_log<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
288f4aa9 1796 }
16ce7f18
JS
1797 catch(const MOADNSException &mde) {
1798 g_log<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<mde.what()<<endl;
7b1469bb 1799 }
fdbf35ac 1800 catch(std::exception& e) {
e6a9dde5 1801 g_log<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
068c7634
PD
1802
1803 // Luawrapper nests the exception from Lua, so we unnest it here
1804 try {
1805 std::rethrow_if_nested(e);
2010ac95 1806 } catch(const std::exception& ne) {
e6a9dde5 1807 g_log<<". Extra info: "<<ne.what();
068c7634
PD
1808 } catch(...) {}
1809
e6a9dde5 1810 g_log<<endl;
c154c8a4 1811 }
288f4aa9 1812 catch(...) {
e6a9dde5 1813 g_log<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
288f4aa9 1814 }
3ddb9247 1815
ec6eacbc 1816 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
288f4aa9
BH
1817}
1818
d187038c 1819static void makeControlChannelSocket(int processNum=-1)
1d5b3ce6 1820{
2d733c0f 1821 string sockname=::arg()["socket-dir"]+"/"+s_programname;
677e2a46 1822 if(processNum >= 0)
335da0ba 1823 sockname += "."+std::to_string(processNum);
677e2a46 1824 sockname+=".controlsocket";
41f7a068 1825 s_rcc.listen(sockname);
3ddb9247 1826
387de317
BH
1827 int sockowner = -1;
1828 int sockgroup = -1;
1829
1830 if (!::arg().isEmpty("socket-group"))
1831 sockgroup=::arg().asGid("socket-group");
1832 if (!::arg().isEmpty("socket-owner"))
1833 sockowner=::arg().asUid("socket-owner");
3ddb9247 1834
f838ad8d
BH
1835 if (sockgroup > -1 || sockowner > -1) {
1836 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1837 unixDie("Failed to chown control socket");
1838 }
1839 }
387de317
BH
1840
1841 // do mode change if socket-mode is given
1842 if(!::arg().isEmpty("socket-mode")) {
1843 mode_t sockmode=::arg().asMode("socket-mode");
34c513f9
RG
1844 if(chmod(sockname.c_str(), sockmode) < 0) {
1845 unixDie("Failed to chmod control socket");
1846 }
387de317 1847 }
1d5b3ce6
BH
1848}
1849
5cc8371b 1850static void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass,
29e6303a 1851 bool& foundECS, EDNSSubnetOpts* ednssubnet, EDNSOptionViewMap* options,
5cc8371b 1852 bool& foundXPF, ComboAddress* xpfSource, ComboAddress* xpfDest)
02b47f43 1853{
59cb4a79 1854 const bool lookForXPF = xpfSource != nullptr && g_xpfRRCode != 0;
5cc8371b
RG
1855 const bool lookForECS = ednssubnet != nullptr;
1856 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(question.c_str());
02b47f43
RG
1857 size_t questionLen = question.length();
1858 unsigned int consumed=0;
1859 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1860
1861 size_t pos= sizeof(dnsheader)+consumed+4;
5cc8371b
RG
1862 const size_t headerSize = /* root */ 1 + sizeof(dnsrecordheader);
1863 const uint16_t arcount = ntohs(dh->arcount);
1864
1865 for (uint16_t arpos = 0; arpos < arcount && questionLen > (pos + headerSize) && ((lookForECS && !foundECS) || (lookForXPF && !foundXPF)); arpos++) {
1866 if (question.at(pos) != 0) {
1867 /* not an OPT or a XPF, bye. */
1868 return;
1869 }
1870
1871 pos += 1;
1872 const dnsrecordheader* drh = reinterpret_cast<const dnsrecordheader*>(&question.at(pos));
1873 pos += sizeof(dnsrecordheader);
1874
1875 if (pos >= questionLen) {
1876 return;
1877 }
1878
02b47f43 1879 /* OPT root label (1) followed by type (2) */
5cc8371b 1880 if(lookForECS && ntohs(drh->d_type) == QType::OPT) {
00b8cadc
RG
1881 if (!options) {
1882 char* ecsStart = nullptr;
1883 size_t ecsLen = 0;
5cc8371b
RG
1884 /* we need to pass the record len */
1885 int res = getEDNSOption(const_cast<char*>(reinterpret_cast<const char*>(&question.at(pos - sizeof(drh->d_clen)))), questionLen - pos + sizeof(drh->d_clen), EDNSOptionCode::ECS, &ecsStart, &ecsLen);
00b8cadc
RG
1886 if (res == 0 && ecsLen > 4) {
1887 EDNSSubnetOpts eso;
1888 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1889 *ednssubnet=eso;
5cc8371b 1890 foundECS = true;
00b8cadc
RG
1891 }
1892 }
1893 }
1894 else {
5cc8371b
RG
1895 /* we need to pass the record len */
1896 int res = getEDNSOptions(reinterpret_cast<const char*>(&question.at(pos -sizeof(drh->d_clen))), questionLen - pos + (sizeof(drh->d_clen)), *options);
00b8cadc
RG
1897 if (res == 0) {
1898 const auto& it = options->find(EDNSOptionCode::ECS);
29e6303a 1899 if (it != options->end() && !it->second.values.empty() && it->second.values.at(0).content != nullptr && it->second.values.at(0).size > 0) {
00b8cadc 1900 EDNSSubnetOpts eso;
29e6303a 1901 if(getEDNSSubnetOptsFromString(it->second.values.at(0).content, it->second.values.at(0).size, &eso)) {
00b8cadc 1902 *ednssubnet=eso;
5cc8371b 1903 foundECS = true;
00b8cadc
RG
1904 }
1905 }
02b47f43
RG
1906 }
1907 }
1908 }
59cb4a79 1909 else if (lookForXPF && ntohs(drh->d_type) == g_xpfRRCode && ntohs(drh->d_class) == QClass::IN && drh->d_ttl == 0) {
5cc8371b
RG
1910 if ((questionLen - pos) < ntohs(drh->d_clen)) {
1911 return;
1912 }
1913
1914 foundXPF = parseXPFPayload(reinterpret_cast<const char*>(&question.at(pos)), ntohs(drh->d_clen), *xpfSource, xpfDest);
1915 }
1916
1917 pos += ntohs(drh->d_clen);
02b47f43
RG
1918 }
1919}
1920
d187038c 1921static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1922{
cd989c87 1923 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
c038218b 1924
879b3f70 1925 if(conn->state==TCPConnection::BYTE0) {
2749c3fe 1926 ssize_t bytes=recv(conn->getFD(), &conn->data[0], 2, 0);
09e6702a 1927 if(bytes==1)
667f7e60 1928 conn->state=TCPConnection::BYTE1;
3ddb9247 1929 if(bytes==2) {
a0aa4f64 1930 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2749c3fe 1931 conn->data.resize(conn->qlen);
667f7e60
BH
1932 conn->bytesread=0;
1933 conn->state=TCPConnection::GETQUESTION;
09e6702a
BH
1934 }
1935 if(!bytes || bytes < 0) {
bb4bdbaf 1936 t_fdm->removeReadFD(fd);
09e6702a
BH
1937 return;
1938 }
1939 }
667f7e60 1940 else if(conn->state==TCPConnection::BYTE1) {
2749c3fe 1941 ssize_t bytes=recv(conn->getFD(), &conn->data[1], 1, 0);
09e6702a 1942 if(bytes==1) {
667f7e60 1943 conn->state=TCPConnection::GETQUESTION;
a0aa4f64 1944 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2749c3fe 1945 conn->data.resize(conn->qlen);
667f7e60 1946 conn->bytesread=0;
09e6702a
BH
1947 }
1948 if(!bytes || bytes < 0) {
1949 if(g_logCommonErrors)
e6a9dde5 1950 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected after first byte"<<endl;
bb4bdbaf 1951 t_fdm->removeReadFD(fd);
09e6702a
BH
1952 return;
1953 }
1954 }
667f7e60 1955 else if(conn->state==TCPConnection::GETQUESTION) {
2749c3fe 1956 ssize_t bytes=recv(conn->getFD(), &conn->data[conn->bytesread], conn->qlen - conn->bytesread, 0);
f9d67b41 1957 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
c0f9be19
RG
1958 if(g_logCommonErrors) {
1959 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected while reading question body"<<endl;
1960 }
bb4bdbaf 1961 t_fdm->removeReadFD(fd);
09e6702a
BH
1962 return;
1963 }
b841314c 1964 conn->bytesread+=(uint16_t)bytes;
667f7e60 1965 if(conn->bytesread==conn->qlen) {
bb4bdbaf 1966 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
879b3f70 1967
9a864da4 1968 std::unique_ptr<DNSComboWriter> dc;
09e6702a 1969 try {
9a864da4 1970 dc=std::unique_ptr<DNSComboWriter>(new DNSComboWriter(conn->data, g_now));
09e6702a 1971 }
16ce7f18 1972 catch(const MOADNSException &mde) {
3ddb9247 1973 g_stats.clientParseError++;
4957a608 1974 if(g_logCommonErrors)
e6a9dde5 1975 g_log<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4957a608 1976 return;
09e6702a 1977 }
cd989c87
BH
1978 dc->d_tcpConnection = conn; // carry the torch
1979 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
09e6702a 1980 dc->d_tcp=true;
5cc8371b
RG
1981 dc->setRemote(conn->d_remote);
1982 dc->setSource(conn->d_remote);
a6147cd2 1983 ComboAddress dest;
d38e2ba9 1984 dest.reset();
a6147cd2 1985 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1986 socklen_t len = dest.getSocklen();
1987 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1988 dc->setLocal(dest);
5cc8371b 1989 dc->setDestination(dest);
33dcceba
RG
1990 DNSName qname;
1991 uint16_t qtype=0;
1992 uint16_t qclass=0;
1993 bool needECS = false;
5cc8371b 1994 bool needXPF = g_XPFAcl.match(conn->d_remote);
67e31ebe 1995 string requestorId;
590388d2 1996 string deviceId;
0a6a45c8 1997 string deviceName;
16bbc6e3 1998 bool logQuery = false;
aa7929a3 1999#ifdef HAVE_PROTOBUF
02b47f43 2000 auto luaconfsLocal = g_luaconfs.getLocal();
63341e8d 2001 if (checkProtobufExport(luaconfsLocal)) {
33dcceba
RG
2002 needECS = true;
2003 }
b773359c 2004 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
b9fa43e0
OM
2005#endif /* HAVE_PROTOBUF */
2006
2007#ifdef HAVE_FSTRM
2008 checkFrameStreamExport(luaconfsLocal);
33dcceba
RG
2009#endif
2010
70fb28d9 2011 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag_ffi || t_pdl->d_gettag))) {
33dcceba
RG
2012
2013 try {
29e6303a 2014 EDNSOptionViewMap ednsOptions;
5cc8371b 2015 bool xpfFound = false;
b40562da 2016 dc->d_ecsParsed = true;
5cc8371b 2017 dc->d_ecsFound = false;
2749c3fe 2018 getQNameAndSubnet(conn->data, &qname, &qtype, &qclass,
5cc8371b
RG
2019 dc->d_ecsFound, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2020 xpfFound, needXPF ? &dc->d_source : nullptr, needXPF ? &dc->d_destination : nullptr);
02b47f43 2021
70fb28d9 2022 if(t_pdl) {
33dcceba 2023 try {
70fb28d9 2024 if (t_pdl->d_gettag_ffi) {
0a6a45c8 2025 dc->d_tag = t_pdl->gettag_ffi(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, deviceName, dc->d_ttlCap, dc->d_variable, logQuery);
70fb28d9
RG
2026 }
2027 else if (t_pdl->d_gettag) {
0a6a45c8 2028 dc->d_tag = t_pdl->gettag(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, deviceName);
70fb28d9 2029 }
33dcceba 2030 }
70fb28d9 2031 catch(const std::exception& e) {
33dcceba 2032 if(g_logCommonErrors)
e6a9dde5 2033 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
33dcceba
RG
2034 }
2035 }
2036 }
70fb28d9 2037 catch(const std::exception& e)
33dcceba
RG
2038 {
2039 if(g_logCommonErrors)
e6a9dde5 2040 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
33dcceba
RG
2041 }
2042 }
f52177c3
RG
2043
2044 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(&conn->data[0]);
2045
33dcceba 2046#ifdef HAVE_PROTOBUF
b773359c 2047 if(t_protobufServers || t_outgoingProtobufServers) {
67e31ebe 2048 dc->d_requestorId = requestorId;
590388d2 2049 dc->d_deviceId = deviceId;
0a6a45c8 2050 dc->d_deviceName = deviceName;
d61aa945 2051 dc->d_uuid = getUniqueID();
4898a348 2052 }
02b47f43 2053
b773359c 2054 if(t_protobufServers) {
02b47f43 2055 try {
02b47f43 2056
845cbf4c 2057 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && dc->d_policyTags.empty())) {
0a6a45c8 2058 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, dc->d_source, dc->d_destination, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId, dc->d_deviceName);
b790ef3d 2059 }
02b47f43
RG
2060 }
2061 catch(std::exception& e) {
2062 if(g_logCommonErrors)
e6a9dde5 2063 g_log<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
02b47f43
RG
2064 }
2065 }
aa7929a3 2066#endif
5034517a
RG
2067 if(t_pdl) {
2068 if(t_pdl->ipfilter(dc->d_source, dc->d_destination, *dh)) {
2069 if(!g_quiet)
2070 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED TCP question from "<<dc->d_source.toStringWithPort()<<(dc->d_source != dc->d_remote ? " (via "+dc->d_remote.toStringWithPort()+")" : "")<<" based on policy"<<endl;
2071 g_stats.policyDrops++;
2072 return;
2073 }
2074 }
2075
879b3f70 2076 if(dc->d_mdp.d_header.qr) {
048f5db6 2077 g_stats.ignoredCount++;
c0f9be19
RG
2078 if(g_logCommonErrors) {
2079 g_log<<Logger::Error<<"Ignoring answer from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
2080 }
4957a608 2081 return;
879b3f70 2082 }
3abcdab2 2083 if(dc->d_mdp.d_header.opcode) {
048f5db6 2084 g_stats.ignoredCount++;
c0f9be19
RG
2085 if(g_logCommonErrors) {
2086 g_log<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
2087 }
c0f9be19
RG
2088 return;
2089 }
2090 else if (dh->qdcount == 0) {
2091 g_stats.emptyQueriesCount++;
2092 if(g_logCommonErrors) {
2093 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<< dc->getRemote() <<" on server socket!"<<endl;
2094 }
3abcdab2
PD
2095 return;
2096 }
09e6702a 2097 else {
4957a608
BH
2098 ++g_stats.qcounter;
2099 ++g_stats.tcpqcounter;
9a864da4 2100 MT->makeThread(startDoResolve, dc.release()); // deletes dc, will set state to BYTE0 again
4957a608 2101 return;
09e6702a
BH
2102 }
2103 }
2104 }
2105}
2106
6dcd28c3 2107//! Handle new incoming TCP connection
d187038c 2108static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
09e6702a 2109{
37d3f960 2110 ComboAddress addr;
09e6702a 2111 socklen_t addrlen=sizeof(addr);
a683e8bd 2112 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
b841314c 2113 if(newsock>=0) {
85c32340
BH
2114 if(MT->numProcesses() > g_maxMThreads) {
2115 g_stats.overCapacityDrops++;
a7b68ae7
RG
2116 try {
2117 closesocket(newsock);
2118 }
2119 catch(const PDNSException& e) {
e6a9dde5 2120 g_log<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
a7b68ae7 2121 }
85c32340
BH
2122 return;
2123 }
2124
92011b8f 2125 if(t_remotes)
2126 t_remotes->push_back(addr);
49a699c4 2127 if(t_allowFrom && !t_allowFrom->match(&addr)) {
3ddb9247 2128 if(!g_quiet)
e6a9dde5 2129 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2914b022 2130
09e6702a 2131 g_stats.unauthorizedTCP++;
a7b68ae7
RG
2132 try {
2133 closesocket(newsock);
2134 }
2135 catch(const PDNSException& e) {
e6a9dde5 2136 g_log<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
a7b68ae7 2137 }
09e6702a
BH
2138 return;
2139 }
bd0289fc 2140 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
09e6702a 2141 g_stats.tcpClientOverflow++;
a7b68ae7
RG
2142 try {
2143 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
2144 }
2145 catch(const PDNSException& e) {
e6a9dde5 2146 g_log<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
a7b68ae7 2147 }
09e6702a
BH
2148 return;
2149 }
3ddb9247 2150
3897b9e1 2151 setNonBlocking(newsock);
f26bf547 2152 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
cd989c87 2153 tc->state=TCPConnection::BYTE0;
3ddb9247 2154
27ae2e3c
RG
2155 struct timeval ttd;
2156 Utility::gettimeofday(&ttd, 0);
2157 ttd.tv_sec += g_tcpTimeout;
c038218b 2158
27ae2e3c 2159 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc, &ttd);
09e6702a
BH
2160 }
2161}
3ddb9247 2162
d187038c 2163static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
1bc3c142 2164{
183eb877 2165 gettimeofday(&g_now, 0);
c29d820c
RG
2166 if (tv.tv_sec) {
2167 struct timeval diff = g_now - tv;
2168 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
183eb877 2169
c29d820c
RG
2170 if(delta > 1000.0) {
2171 g_stats.tooOldDrops++;
2172 return nullptr;
2173 }
b71b60ee 2174 }
2175
1bc3c142 2176 ++g_stats.qcounter;
d7f10541
BH
2177 if(fromaddr.sin4.sin_family==AF_INET6)
2178 g_stats.ipv6qcounter++;
1bc3c142
BH
2179
2180 string response;
93f0da94 2181 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
49a3500d 2182 unsigned int ctag=0;
f57486f1 2183 uint32_t qhash = 0;
12aff2e5 2184 bool needECS = false;
5cc8371b 2185 bool needXPF = g_XPFAcl.match(fromaddr);
02b47f43 2186 std::vector<std::string> policyTags;
5fd2577f 2187 LuaContext::LuaObject data;
5cc8371b
RG
2188 ComboAddress source = fromaddr;
2189 ComboAddress destination = destaddr;
67e31ebe 2190 string requestorId;
590388d2 2191 string deviceId;
0a6a45c8 2192 string deviceName;
16bbc6e3 2193 bool logQuery = false;
12aff2e5 2194#ifdef HAVE_PROTOBUF
02b47f43 2195 boost::uuids::uuid uniqueId;
02b47f43 2196 auto luaconfsLocal = g_luaconfs.getLocal();
63341e8d 2197 if (checkProtobufExport(luaconfsLocal)) {
d61aa945 2198 uniqueId = getUniqueID();
02b47f43 2199 needECS = true;
63341e8d 2200 } else if (checkOutgoingProtobufExport(luaconfsLocal)) {
d61aa945 2201 uniqueId = getUniqueID();
02b47f43 2202 }
b773359c
RG
2203 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
2204 bool logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
b9fa43e0
OM
2205#endif
2206#ifdef HAVE_FSTRM
2207 checkFrameStreamExport(luaconfsLocal);
12aff2e5 2208#endif
b40562da
RG
2209 EDNSSubnetOpts ednssubnet;
2210 bool ecsFound = false;
2211 bool ecsParsed = false;
08b02366
RG
2212 uint16_t ecsBegin = 0;
2213 uint16_t ecsEnd = 0;
70fb28d9
RG
2214 uint32_t ttlCap = std::numeric_limits<uint32_t>::max();
2215 bool variable = false;
1bc3c142 2216 try {
02b47f43
RG
2217 DNSName qname;
2218 uint16_t qtype=0;
2219 uint16_t qclass=0;
1bc3c142 2220 uint32_t age;
c15ff3df 2221 bool qnameParsed=false;
8f7473d7 2222#ifdef MALLOC_TRACE
2223 /*
2224 static uint64_t last=0;
2225 if(!last)
2226 g_mtracer->clearAllocators();
2227 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
2228 last=g_mtracer->getAllocs();
2229 cout<<g_mtracer->topAllocatorsString()<<endl;
2230 g_mtracer->clearAllocators();
2231 */
2232#endif
55a1378f 2233
70fb28d9 2234 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag || t_pdl->d_gettag_ffi))) {
b2eacd67 2235 try {
29e6303a 2236 EDNSOptionViewMap ednsOptions;
5cc8371b
RG
2237 bool xpfFound = false;
2238
2239 ecsFound = false;
2240
2241 getQNameAndSubnet(question, &qname, &qtype, &qclass,
2242 ecsFound, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2243 xpfFound, needXPF ? &source : nullptr, needXPF ? &destination : nullptr);
2244
c15ff3df
RG
2245 qnameParsed = true;
2246 ecsParsed = true;
12aff2e5 2247
70fb28d9 2248 if(t_pdl) {
12aff2e5 2249 try {
70fb28d9 2250 if (t_pdl->d_gettag_ffi) {
0a6a45c8 2251 ctag = t_pdl->gettag_ffi(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, deviceName, ttlCap, variable, logQuery);
70fb28d9
RG
2252 }
2253 else if (t_pdl->d_gettag) {
0a6a45c8 2254 ctag = t_pdl->gettag(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, deviceName);
70fb28d9 2255 }
12aff2e5 2256 }
70fb28d9 2257 catch(const std::exception& e) {
12aff2e5 2258 if(g_logCommonErrors)
e6a9dde5 2259 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 2260 }
8ea8c302 2261 }
b2eacd67 2262 }
70fb28d9 2263 catch(const std::exception& e)
b2eacd67 2264 {
2265 if(g_logCommonErrors)
e6a9dde5 2266 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 2267 }
12ce523e 2268 }
3ddb9247 2269
02b47f43 2270 bool cacheHit = false;
1fbc6dc5 2271 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
02b47f43 2272#ifdef HAVE_PROTOBUF
b773359c 2273 if (t_protobufServers) {
d362f7c1 2274 pbMessage = RecProtoBufMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
c165308b 2275 pbMessage->setServerIdentity(SyncRes::s_serverID);
845cbf4c 2276 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && policyTags.empty())) {
0a6a45c8 2277 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, source, destination, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId, deviceName);
b790ef3d 2278 }
d9d3f9c1
RG
2279 }
2280#endif /* HAVE_PROTOBUF */
02b47f43 2281
70fb28d9
RG
2282 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
2283 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
2284 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
8467ec26 2285 vState valState;
c15ff3df 2286 if (qnameParsed) {
08b02366 2287 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
c15ff3df
RG
2288 }
2289 else {
08b02366 2290 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, &qtype, &qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
c15ff3df
RG
2291 }
2292
d9d3f9c1 2293 if (cacheHit) {
8467ec26
KM
2294 if(valState == Bogus) {
2295 if(t_bogusremotes)
2296 t_bogusremotes->push_back(source);
2297 if(t_bogusqueryring)
2298 t_bogusqueryring->push_back(make_pair(qname, qtype));
2299 }
2300
d9d3f9c1 2301#ifdef HAVE_PROTOBUF
b773359c 2302 if(t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && pbMessage->getAppliedPolicy().empty() && pbMessage->getPolicyTags().empty())) {
5cc8371b 2303 Netmask requestorNM(source, source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
e1c8a4bb 2304 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
d362f7c1
RG
2305 pbMessage->update(uniqueId, &requestor, &destination, false, dh->id);
2306 pbMessage->setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
c29d820c
RG
2307 if (g_useKernelTimestamp && tv.tv_sec) {
2308 pbMessage->setQueryTime(tv.tv_sec, tv.tv_usec);
2309 }
2310 else {
2311 pbMessage->setQueryTime(g_now.tv_sec, g_now.tv_usec);
2312 }
d362f7c1
RG
2313 pbMessage->setRequestorId(requestorId);
2314 pbMessage->setDeviceId(deviceId);
0a6a45c8 2315 pbMessage->setDeviceName(deviceName);
b773359c 2316 protobufLogResponse(*pbMessage);
02b47f43 2317 }
d9d3f9c1 2318#endif /* HAVE_PROTOBUF */
49a3500d 2319 if(!g_quiet)
e6a9dde5 2320 g_log<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<endl;
8f7473d7 2321
1bc3c142
BH
2322 g_stats.packetCacheHits++;
2323 SyncRes::s_queries++;
2324 ageDNSPacket(response, age);
b71b60ee 2325 struct msghdr msgh;
2326 struct iovec iov;
7bec330a
OM
2327 cmsgbuf_aligned cbuf;
2328 fillMSGHdr(&msgh, &iov, &cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
2c0af54f
PD
2329 msgh.msg_control=NULL;
2330
cbc03320 2331 if(g_fromtosockets.count(fd)) {
7bec330a 2332 addCMsgSrcAddr(&msgh, &cbuf, &destaddr, 0);
b71b60ee 2333 }
cbc03320 2334 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
e6a9dde5 2335 g_log<<Logger::Warning<<"Sending UDP reply to client "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" failed with: "<<strerror(errno)<<endl;
b71b60ee 2336
97bee66d 2337 if(response.length() >= sizeof(struct dnsheader)) {
dd079764
RG
2338 struct dnsheader tmpdh;
2339 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
5cc8371b 2340 updateResponseStats(tmpdh.rcode, source, response.length(), 0, 0);
97bee66d 2341 }
08f3f638 2342 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
19178da9 2343 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
1bc3c142
BH
2344 return 0;
2345 }
3ddb9247 2346 }
1bc3c142 2347 catch(std::exception& e) {
e6a9dde5 2348 g_log<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1bc3c142
BH
2349 return 0;
2350 }
3ddb9247 2351
f26bf547 2352 if(t_pdl) {
5cc8371b 2353 if(t_pdl->ipfilter(source, destination, *dh)) {
4ea94941 2354 if(!g_quiet)
e6a9dde5 2355 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" based on policy"<<endl;
4ea94941 2356 g_stats.policyDrops++;
2357 return 0;
2358 }
2359 }
2360
1bc3c142 2361 if(MT->numProcesses() > g_maxMThreads) {
461df9d2 2362 if(!g_quiet)
e6a9dde5 2363 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<", over capacity"<<endl;
461df9d2 2364
1bc3c142
BH
2365 g_stats.overCapacityDrops++;
2366 return 0;
2367 }
3ddb9247 2368
9a864da4 2369 auto dc = std::unique_ptr<DNSComboWriter>(new DNSComboWriter(question, g_now, std::move(policyTags), std::move(data)));
1bc3c142 2370 dc->setSocket(fd);
49a3500d 2371 dc->d_tag=ctag;
e9f63d47 2372 dc->d_qhash=qhash;
5cc8371b
RG
2373 dc->setRemote(fromaddr);
2374 dc->setSource(source);
b71b60ee 2375 dc->setLocal(destaddr);
5cc8371b 2376 dc->setDestination(destination);
1bc3c142 2377 dc->d_tcp=false;
b40562da
RG
2378 dc->d_ecsFound = ecsFound;
2379 dc->d_ecsParsed = ecsParsed;
08b02366
RG
2380 dc->d_ecsBegin = ecsBegin;
2381 dc->d_ecsEnd = ecsEnd;
b40562da 2382 dc->d_ednssubnet = ednssubnet;
70fb28d9
RG
2383 dc->d_ttlCap = ttlCap;
2384 dc->d_variable = variable;
aa7929a3 2385#ifdef HAVE_PROTOBUF
b773359c 2386 if (t_protobufServers || t_outgoingProtobufServers) {
5164bac3 2387 dc->d_uuid = std::move(uniqueId);
d9d3f9c1 2388 }
67e31ebe 2389 dc->d_requestorId = requestorId;
590388d2 2390 dc->d_deviceId = deviceId;
0a6a45c8 2391 dc->d_deviceName = deviceName;
c29d820c 2392 dc->d_kernelTimestamp = tv;
aa7929a3
RG
2393#endif
2394
9a864da4 2395 MT->makeThread(startDoResolve, (void*) dc.release()); // deletes dc
1bc3c142 2396 return 0;
3ddb9247
PD
2397}
2398
b71b60ee 2399
d187038c 2400static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
5db529f8 2401{
a683e8bd 2402 ssize_t len;
12c2f2b9 2403 static const size_t maxIncomingQuerySize = 512;
04896b99 2404 static thread_local std::string data;
5db529f8 2405 ComboAddress fromaddr;
b71b60ee 2406 struct msghdr msgh;
2407 struct iovec iov;
7bec330a 2408 cmsgbuf_aligned cbuf;
390f1dab 2409 bool firstQuery = true;
b71b60ee 2410
c0a00acd
RG
2411 for(size_t queriesCounter = 0; queriesCounter < s_maxUDPQueriesPerRound; queriesCounter++) {
2412 data.resize(maxIncomingQuerySize);
2413 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
7bec330a 2414 fillMSGHdr(&msgh, &iov, &cbuf, sizeof(cbuf), &data[0], data.size(), &fromaddr);
b71b60ee 2415
c0a00acd 2416 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
390f1dab 2417
c0a00acd 2418 firstQuery = false;
390f1dab 2419
c0a00acd
RG
2420 if (static_cast<size_t>(len) < sizeof(dnsheader)) {
2421 g_stats.ignoredCount++;
2422 if (!g_quiet) {
2423 g_log<<Logger::Error<<"Ignoring too-short ("<<std::to_string(len)<<") query from "<<fromaddr.toString()<<endl;
2424 }
2425 return;
04896b99 2426 }
04896b99 2427
c0a00acd
RG
2428 if (msgh.msg_flags & MSG_TRUNC) {
2429 g_stats.truncatedDrops++;
2430 if (!g_quiet) {
2431 g_log<<Logger::Error<<"Ignoring truncated query from "<<fromaddr.toString()<<endl;
2432 }
2433 return;
ba892c7f 2434 }
b23b8614 2435
c0a00acd
RG
2436 if(t_remotes) {
2437 t_remotes->push_back(fromaddr);
2438 }
81859ba5 2439
c0a00acd
RG
2440 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
2441 if(!g_quiet) {
2442 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2443 }
3ddb9247 2444
c0a00acd
RG
2445 g_stats.unauthorizedUDP++;
2446 return;
5db529f8 2447 }
c0a00acd
RG
2448 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
2449 if(!fromaddr.sin4.sin_port) { // also works for IPv6
2450 if(!g_quiet) {
2451 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
2452 }
2453
2454 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
2455 return;
3abcdab2 2456 }
c0a00acd
RG
2457
2458 try {
2459 data.resize(static_cast<size_t>(len));
2460 dnsheader* dh=(dnsheader*)&data[0];
2461
2462 if(dh->qr) {
2463 g_stats.ignoredCount++;
2464 if(g_logCommonErrors) {
2465 g_log<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
2466 }
2467 }
2468 else if(dh->opcode) {
2469 g_stats.ignoredCount++;
2470 if(g_logCommonErrors) {
2471 g_log<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
2472 }
a6147cd2 2473 }
c0f9be19
RG
2474 else if (dh->qdcount == 0) {
2475 g_stats.emptyQueriesCount++;
2476 if(g_logCommonErrors) {
2477 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<<fromaddr.toString()<<" on server socket!"<<endl;
2478 }
2479 }
a6147cd2 2480 else {
c0a00acd
RG
2481 struct timeval tv={0,0};
2482 HarvestTimestamp(&msgh, &tv);
2483 ComboAddress dest;
2484 dest.reset(); // this makes sure we ignore this address if not returned by recvmsg above
2485 auto loc = rplookup(g_listenSocketsAddresses, fd);
2486 if(HarvestDestinationAddress(&msgh, &dest)) {
2487 // but.. need to get port too
2488 if(loc) {
2489 dest.sin4.sin_port = loc->sin4.sin_port;
2490 }
a6147cd2 2491 }
2492 else {
c0a00acd
RG
2493 if(loc) {
2494 dest = *loc;
2495 }
2496 else {
2497 dest.sin4.sin_family = fromaddr.sin4.sin_family;
2498 socklen_t slen = dest.getSocklen();
2499 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
2500 }
2501 }
2502
2503 if(g_weDistributeQueries) {
2504 distributeAsyncFunction(data, boost::bind(doProcessUDPQuestion, data, fromaddr, dest, tv, fd));
2505 }
2506 else {
144040be 2507 ++s_threadInfos[t_id].numberOfDistributedQueries;
c0a00acd 2508 doProcessUDPQuestion(data, fromaddr, dest, tv, fd);
a6147cd2 2509 }
2510 }
c0a00acd 2511 }
16ce7f18 2512 catch(const MOADNSException &mde) {
c0a00acd
RG
2513 g_stats.clientParseError++;
2514 if(g_logCommonErrors) {
2515 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
2516 }
2517 }
2518 catch(const std::runtime_error& e) {
2519 g_stats.clientParseError++;
2520 if(g_logCommonErrors) {
2521 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
2522 }
5db529f8
BH
2523 }
2524 }
c0a00acd
RG
2525 else {
2526 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
2527 if(firstQuery && errno == EAGAIN) {
2528 g_stats.noPacketError++;
2529 }
390f1dab 2530
c0a00acd
RG
2531 break;
2532 }
ac0e821b 2533 }
5db529f8
BH
2534}
2535
adb6cd72 2536static void makeTCPServerSockets(deferredAdd_t& deferredAdds, std::set<int>& tcpSockets)
9c495589 2537{
37d3f960 2538 int fd;
f28307ad 2539 vector<string>locals;
2e3d8a19 2540 stringtok(locals,::arg()["local-address"]," ,");
9c495589 2541
f28307ad 2542 if(locals.empty())
3f81d239 2543 throw PDNSException("No local address specified");
3ddb9247 2544
f28307ad 2545 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
2546 ServiceTuple st;
2547 st.port=::arg().asNum("local-port");
2548 parseService(*i, st);
3ddb9247 2549
32252594
BH
2550 ComboAddress sin;
2551
d38e2ba9 2552 sin.reset();
37d3f960 2553 sin.sin4.sin_family = AF_INET;
32252594 2554 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 2555 sin.sin6.sin6_family = AF_INET6;
f71bc087 2556 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 2557 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
37d3f960
BH
2558 }
2559
2560 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
3ddb9247 2561 if(fd<0)
3f81d239 2562 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
f28307ad 2563
3897b9e1 2564 setCloseOnExec(fd);
a903b39c 2565
f28307ad 2566 int tmp=1;
810ff705 2567 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
e6a9dde5 2568 g_log<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
c8ddb7c2 2569 exit(1);
f28307ad 2570 }
0dfa94ab 2571 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
e6a9dde5 2572 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
0dfa94ab 2573 }
2574
c8ddb7c2 2575#ifdef TCP_DEFER_ACCEPT
38ac0821 2576 if(setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
37d3f960 2577 if(i==locals.begin())
377602e3 2578 g_log<<Logger::Info<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
c8ddb7c2
BH
2579 }
2580#endif
2581
fec7dd5a
SS
2582 if( ::arg().mustDo("non-local-bind") )
2583 Utility::setBindAny(AF_INET, fd);
2584
2332f42d 2585#ifdef SO_REUSEPORT
810ff705
RG
2586 if(g_reusePort) {
2587 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
2332f42d 2588 throw PDNSException("SO_REUSEPORT: "+stringerror());
2589 }
2590#endif
2591
0735b17e
RG
2592 if (::arg().asNum("tcp-fast-open") > 0) {
2593#ifdef TCP_FASTOPEN
2594 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
2595 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
e6a9dde5 2596 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno)<<endl;
0735b17e
RG
2597 }
2598#else
e6a9dde5 2599 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
0735b17e
RG
2600#endif
2601 }
2602
32252594 2603 sin.sin4.sin_port = htons(st.port);
a683e8bd 2604 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
3ddb9247 2605 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
3f81d239 2606 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
3ddb9247 2607
3897b9e1 2608 setNonBlocking(fd);
49a699c4 2609 setSocketSendBuffer(fd, 65000);
37d3f960 2610 listen(fd, 128);
b243ca3b 2611 deferredAdds.push_back(make_pair(fd, handleNewTCPQuestion));
adb6cd72
RG
2612 tcpSockets.insert(fd);
2613
84433b79 2614 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
2615 // - fd is not that which we know here, but returned from accept()
3ddb9247 2616 if(sin.sin4.sin_family == AF_INET)
377602e3 2617 g_log<<Logger::Info<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 2618 else
377602e3 2619 g_log<<Logger::Info<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 2620 }
9c495589
BH
2621}
2622
b243ca3b 2623static void makeUDPServerSockets(deferredAdd_t& deferredAdds)
288f4aa9 2624{
fec7dd5a 2625 int one=1;
f28307ad 2626 vector<string>locals;
2e3d8a19 2627 stringtok(locals,::arg()["local-address"]," ,");
288f4aa9 2628
f28307ad 2629 if(locals.empty())
3f81d239 2630 throw PDNSException("No local address specified");
3ddb9247 2631
f28307ad 2632 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
2633 ServiceTuple st;
2634 st.port=::arg().asNum("local-port");
2635 parseService(*i, st);
2636
37d3f960 2637 ComboAddress sin;
996c89cc 2638
d38e2ba9 2639 sin.reset();
37d3f960 2640 sin.sin4.sin_family = AF_INET;
32252594 2641 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 2642 sin.sin6.sin6_family = AF_INET6;
f71bc087 2643 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 2644 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
37d3f960 2645 }
3ddb9247 2646
bb4bdbaf 2647 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
d3b4137e 2648 if(fd < 0) {
3f81d239 2649 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
d3b4137e 2650 }
915b0c39 2651 if (!setSocketTimestamps(fd))
e6a9dde5 2652 g_log<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
0dfa94ab 2653
b71b60ee 2654 if(IsAnyAddress(sin)) {
cbc03320 2655 if(sin.sin4.sin_family == AF_INET)
2656 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
2657 g_fromtosockets.insert(fd);
757d3179 2658#ifdef IPV6_RECVPKTINFO
cbc03320 2659 if(sin.sin4.sin_family == AF_INET6)
2660 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
2661 g_fromtosockets.insert(fd);
757d3179 2662#endif
0dfa94ab 2663 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
e6a9dde5 2664 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
0dfa94ab 2665 }
b71b60ee 2666 }
fec7dd5a
SS
2667 if( ::arg().mustDo("non-local-bind") )
2668 Utility::setBindAny(AF_INET6, fd);
2669
3897b9e1 2670 setCloseOnExec(fd);
a903b39c 2671
4e9a20e6 2672 setSocketReceiveBuffer(fd, 250000);
32252594 2673 sin.sin4.sin_port = htons(st.port);
37d3f960 2674
2332f42d 2675
2573d4a6 2676#ifdef SO_REUSEPORT
810ff705 2677 if(g_reusePort) {
2332f42d 2678 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
2679 throw PDNSException("SO_REUSEPORT: "+stringerror());
2680 }
2681#endif
90f9fbc0
RG
2682
2683 if (sin.isIPv4()) {
2684 try {
2685 setSocketIgnorePMTU(fd);
2686 }
2687 catch(const std::exception& e) {
2688 g_log<<Logger::Warning<<"Failed to set IP_MTU_DISCOVER on UDP server socket: "<<e.what()<<endl;
2689 }
2690 }
2691
2692 socklen_t socklen=sin.getSocklen();
3ddb9247 2693 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
335da0ba 2694 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
3ddb9247 2695
3897b9e1 2696 setNonBlocking(fd);
c2136bf0 2697
b243ca3b 2698 deferredAdds.push_back(make_pair(fd, handleNewUDPQuestion));
40a3dd64 2699 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
3ddb9247 2700 if(sin.sin4.sin_family == AF_INET)
377602e3 2701 g_log<<Logger::Info<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 2702 else
377602e3 2703 g_log<<Logger::Info<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 2704 }
c836dc19 2705}
caa6eefa 2706
d187038c 2707static void daemonize(void)
c836dc19
BH
2708{
2709 if(fork())
2710 exit(0); // bye bye
3ddb9247
PD
2711
2712 setsid();
c836dc19 2713
27a5ead5 2714 int i=open("/dev/null",O_RDWR); /* open stdin */
3ddb9247 2715 if(i < 0)
e6a9dde5 2716 g_log<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
27a5ead5
BH
2717 else {
2718 dup2(i,0); /* stdin */
2719 dup2(i,1); /* stderr */
2720 dup2(i,2); /* stderr */
2721 close(i);
2722 }
288f4aa9 2723}
caa6eefa 2724
d187038c 2725static void usr1Handler(int)
c75a6a9e
BH
2726{
2727 statsWanted=true;
2728}
ae1b2e98 2729
d187038c 2730static void usr2Handler(int)
9170fbaf 2731{
f1f34cc2 2732 g_quiet= !g_quiet;
2733 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
2734 ::arg().set("quiet")=g_quiet ? "" : "no";
9170fbaf
BH
2735}
2736
d187038c 2737static void doStats(void)
c75a6a9e 2738{
16beeaa4
BH
2739 static time_t lastOutputTime;
2740 static uint64_t lastQueryCount;
d299d4f5 2741
2742 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
2743 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
3ddb9247 2744
d299d4f5 2745 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
e6a9dde5 2746 g_log<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
3427fa8a
BH
2747 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
2748 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
3ddb9247
PD
2749 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
2750
e6a9dde5 2751 g_log<<Logger::Notice<<"stats: throttle map: "
3427fa8a 2752 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
3ddb9247 2753 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
e6a9dde5
PL
2754 g_log<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2755 g_log<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
525b8a7c 2756 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
e6a9dde5 2757 g_log<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
3427fa8a 2758 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
81883dcc 2759
e6a9dde5 2760 //g_log<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
16beeaa4 2761 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
3ddb9247 2762
e6a9dde5 2763 g_log<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
16beeaa4 2764 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
3ddb9247 2765
144040be
RG
2766 size_t idx = 0;
2767 for (const auto& threadInfo : s_threadInfos) {
2768 if(threadInfo.isWorker) {
ad9fc3dc 2769 g_log<<Logger::Notice<<"stats: thread "<<idx<<" has been distributed "<<threadInfo.numberOfDistributedQueries<<" queries"<<endl;
144040be
RG
2770 ++idx;
2771 }
2772 }
2773
16beeaa4
BH
2774 time_t now = time(0);
2775 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
e6a9dde5 2776 g_log<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
16beeaa4
BH
2777 }
2778 lastOutputTime = now;
2779 lastQueryCount = SyncRes::s_queries;
c75a6a9e 2780 }
3ddb9247 2781 else if(statsWanted)
e6a9dde5 2782 g_log<<Logger::Notice<<"stats: no stats yet!"<<endl;
7becf07f 2783
c75a6a9e
BH
2784 statsWanted=false;
2785}
c836dc19 2786
29f0b1ce 2787static void houseKeeping(void *)
c836dc19 2788{
e4ae55e5 2789 static thread_local time_t last_rootupdate, last_prune, last_secpoll, last_trustAnchorUpdate{0};
3337c2f7
RG
2790 static thread_local int cleanCounter=0;
2791 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
e4ae55e5
PL
2792 auto luaconfsLocal = g_luaconfs.getLocal();
2793
2794 if (last_trustAnchorUpdate == 0 && !luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0) {
2795 // Loading the Lua config file already "refreshed" the TAs
2796 last_trustAnchorUpdate = g_now.tv_sec + luaconfsLocal->trustAnchorFileInfo.interval * 3600;
2797 }
2798
cc59bce6 2799 try {
6b0d90ea 2800 if(s_running) {
cc59bce6 2801 return;
6b0d90ea 2802 }
cc59bce6 2803 s_running=true;
3ddb9247 2804
cc59bce6 2805 struct timeval now;
2806 Utility::gettimeofday(&now, 0);
3ddb9247
PD
2807
2808 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
a6f7f5fe 2809 t_RC->doPrune(g_maxCacheEntries / g_numThreads); // this function is local to a thread, so fine anyhow
2810 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
3ddb9247 2811
a6f7f5fe 2812 SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
3ddb9247 2813
cc59bce6 2814 if(!((cleanCounter++)%40)) { // this is a full scan!
2815 time_t limit=now.tv_sec-300;
a712cb56 2816 SyncRes::pruneNSSpeeds(limit);
cc59bce6 2817 }
2818 last_prune=time(0);
d67620e4 2819 }
3ddb9247 2820
cc59bce6 2821 if(now.tv_sec - last_rootupdate > 7200) {
30ee601a 2822 int res = SyncRes::getRootNS(g_now, nullptr);
7836f7b4
PL
2823 if (!res)
2824 last_rootupdate=now.tv_sec;
cc59bce6 2825 }
3ddb9247 2826
b243ca3b 2827 if(isHandlerThread()) {
3ddb9247 2828
cc59bce6 2829 if(now.tv_sec - last_secpoll >= 3600) {
2830 try {
2831 doSecPoll(&last_secpoll);
2832 }
581d4ea3 2833 catch(std::exception& e)
2834 {
e6a9dde5 2835 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
581d4ea3 2836 }
47e9b74f 2837 catch(PDNSException& e)
2838 {
e6a9dde5 2839 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
47e9b74f 2840 }
d0992a65
CH
2841 catch(ImmediateServFailException &e)
2842 {
e6a9dde5 2843 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
d0992a65 2844 }
47e9b74f 2845 catch(...)
2846 {
e6a9dde5 2847 g_log<<Logger::Error<<"Exception while performing security poll"<<endl;
47e9b74f 2848 }
18b73338 2849 }
e4ae55e5
PL
2850
2851 if (!luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0 &&
2852 g_now.tv_sec - last_trustAnchorUpdate >= (luaconfsLocal->trustAnchorFileInfo.interval * 3600)) {
2853 g_log<<Logger::Debug<<"Refreshing Trust Anchors from file"<<endl;
2854 try {
2855 map<DNSName, dsmap_t> dsAnchors;
2856 if (updateTrustAnchorsFromFile(luaconfsLocal->trustAnchorFileInfo.fname, dsAnchors)) {
2857 g_luaconfs.modify([&dsAnchors](LuaConfigItems& lci) {
2858 lci.dsAnchors = dsAnchors;
2859 });
2860 }
2861 last_trustAnchorUpdate = now.tv_sec;
2862 } catch (const PDNSException &pe) {
2863 g_log<<Logger::Error<<"Unable to update Trust Anchors: "<<pe.reason<<endl;
2864 }
2865 }
d67620e4 2866 }
6b0d90ea 2867 s_running=false;
d67620e4 2868 }
cc59bce6 2869 catch(PDNSException& ae)
2870 {
2871 s_running=false;
e6a9dde5 2872 g_log<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
cc59bce6 2873 throw;
2874 }
779828c4 2875}
d6d5dea7 2876
d187038c 2877static void makeThreadPipes()
49a699c4 2878{
ee271fc4
RG
2879 auto pipeBufferSize = ::arg().asNum("distribution-pipe-buffer-size");
2880 if (pipeBufferSize > 0) {
2881 g_log<<Logger::Info<<"Resizing the buffer of the distribution pipe to "<<pipeBufferSize<<endl;
2882 }
2883
b243ca3b
RG
2884 /* thread 0 is the handler / SNMP, we start at 1 */
2885 for(unsigned int n = 1; n <= (g_numWorkerThreads + g_numDistributorThreads); ++n) {
2886 auto& threadInfos = s_threadInfos.at(n);
2887
49a699c4
BH
2888 int fd[2];
2889 if(pipe(fd) < 0)
2890 unixDie("Creating pipe for inter-thread communications");
3ddb9247 2891
b243ca3b
RG
2892 threadInfos.pipes.readToThread = fd[0];
2893 threadInfos.pipes.writeToThread = fd[1];
3ddb9247 2894
49a699c4
BH
2895 if(pipe(fd) < 0)
2896 unixDie("Creating pipe for inter-thread communications");
b243ca3b
RG
2897
2898 threadInfos.pipes.readFromThread = fd[0];
2899 threadInfos.pipes.writeFromThread = fd[1];
3ddb9247 2900
cf8cda18
RG
2901 if(pipe(fd) < 0)
2902 unixDie("Creating pipe for inter-thread communications");
d10307c5 2903
b243ca3b
RG
2904 threadInfos.pipes.readQueriesToThread = fd[0];
2905 threadInfos.pipes.writeQueriesToThread = fd[1];
2906
ee271fc4
RG
2907 if (pipeBufferSize > 0) {
2908 if (!setPipeBufferSize(threadInfos.pipes.writeQueriesToThread, pipeBufferSize)) {
2909 g_log<<Logger::Warning<<"Error resizing the buffer of the distribution pipe for thread "<<n<<" to "<<pipeBufferSize<<": "<<strerror(errno)<<endl;
2910 auto existingSize = getPipeBufferSize(threadInfos.pipes.writeQueriesToThread);
2911 if (existingSize > 0) {
2912 g_log<<Logger::Warning<<"The current size of the distribution pipe's buffer for thread "<<n<<" is "<<existingSize<<endl;
2913 }
2914 }
2915 }
2916
b243ca3b 2917 if (!setNonBlocking(threadInfos.pipes.writeQueriesToThread)) {
d10307c5
RG
2918 unixDie("Making pipe for inter-thread communications non-blocking");
2919 }
49a699c4
BH
2920 }
2921}
2922
00c9b8c1
BH
2923struct ThreadMSG
2924{
2925 pipefunc_t func;
2926 bool wantAnswer;
2927};
2928
b4e76a18 2929void broadcastFunction(const pipefunc_t& func)
49a699c4 2930{
b243ca3b
RG
2931 /* This function might be called by the worker with t_id 0 during startup
2932 for the initialization of ACLs and domain maps. After that it should only
2933 be called by the handler. */
d77abca1 2934
b243ca3b
RG
2935 if (s_threadInfos.empty() && isHandlerThread()) {
2936 /* the handler and distributors will call themselves below, but
2937 during startup we get called while s_threadInfos has not been
2938 populated yet to update the ACL or domain maps, so we need to
2939 handle that case.
2940 */
2941 func();
2942 }
b4e76a18 2943
b243ca3b
RG
2944 unsigned int n = 0;
2945 for (const auto& threadInfo : s_threadInfos) {
49a699c4 2946 if(n++ == t_id) {
b4e76a18 2947 func(); // don't write to ourselves!
49a699c4
BH
2948 continue;
2949 }
3ddb9247 2950
00c9b8c1
BH
2951 ThreadMSG* tmsg = new ThreadMSG();
2952 tmsg->func = func;
2953 tmsg->wantAnswer = true;
b243ca3b 2954 if(write(threadInfo.pipes.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
b841314c 2955 delete tmsg;
b243ca3b 2956
49a699c4 2957 unixDie("write to thread pipe returned wrong size or error");
b841314c 2958 }
3ddb9247 2959
49467864 2960 string* resp = nullptr;
b243ca3b 2961 if(read(threadInfo.pipes.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
49a699c4 2962 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 2963
49a699c4 2964 if(resp) {
49a699c4 2965 delete resp;
49467864 2966 resp = nullptr;
49a699c4
BH
2967 }
2968 }
2969}
06ea9015 2970
592d7ade 2971static bool trySendingQueryToWorker(unsigned int target, ThreadMSG* tmsg)
00c9b8c1 2972{
144040be 2973 auto& targetInfo = s_threadInfos[target];
b243ca3b
RG
2974 if(!targetInfo.isWorker) {
2975 g_log<<Logger::Error<<"distributeAsyncFunction() tried to assign a query to a non-worker thread"<<endl;
d77abca1 2976 exit(1);
00c9b8c1 2977 }
d77abca1 2978
b243ca3b 2979 const auto& tps = targetInfo.pipes;
3ddb9247 2980
cf8cda18
RG
2981 ssize_t written = write(tps.writeQueriesToThread, &tmsg, sizeof(tmsg));
2982 if (written > 0) {
2983 if (static_cast<size_t>(written) != sizeof(tmsg)) {
2984 delete tmsg;
2985 unixDie("write to thread pipe returned wrong size or error");
2986 }
2987 }
2988 else {
2989 int error = errno;
cf8cda18 2990 if (error == EAGAIN || error == EWOULDBLOCK) {
592d7ade 2991 return false;
cf8cda18 2992 } else {
592d7ade 2993 delete tmsg;
17634427 2994 unixDie("write to thread pipe returned wrong size or error:" + std::to_string(error));
cf8cda18 2995 }
b841314c 2996 }
592d7ade 2997
144040be
RG
2998 ++targetInfo.numberOfDistributedQueries;
2999
592d7ade
RG
3000 return true;
3001}
3002
144040be
RG
3003static unsigned int getWorkerLoad(size_t workerIdx)
3004{
3005 const auto mt = s_threadInfos[/* skip handler */ 1 + g_numDistributorThreads + workerIdx].mt;
3006 if (mt != nullptr) {
3007 return mt->numProcesses();
3008 }
3009 return 0;
3010}
3011
3012static unsigned int selectWorker(unsigned int hash)
3013{
3014 if (s_balancingFactor == 0) {
3015 return /* skip handler */ 1 + g_numDistributorThreads + (hash % g_numWorkerThreads);
3016 }
3017
3018 /* we start with one, representing the query we are currently handling */
3019 double currentLoad = 1;
3020 std::vector<unsigned int> load(g_numWorkerThreads);
3021 for (size_t idx = 0; idx < g_numWorkerThreads; idx++) {
3022 load[idx] = getWorkerLoad(idx);
3023 currentLoad += load[idx];
3024 // cerr<<"load for worker "<<idx<<" is "<<load[idx]<<endl;
3025 }
3026
3027 double targetLoad = (currentLoad / g_numWorkerThreads) * s_balancingFactor;
3028 // cerr<<"total load is "<<currentLoad<<", number of workers is "<<g_numWorkerThreads<<", target load is "<<targetLoad<<endl;
3029
3030 unsigned int worker = hash % g_numWorkerThreads;
1b9d2d46 3031 /* at least one server has to be at or below the average load */
596bf482
RG
3032 if (load[worker] > targetLoad) {
3033 ++g_stats.rebalancedQueries;
3034 do {
3035 // cerr<<"worker "<<worker<<" is above the target load, selecting another one"<<endl;
3036 worker = (worker + 1) % g_numWorkerThreads;
3037 }
3038 while(load[worker] > targetLoad);
144040be
RG
3039 }
3040
3041 return /* skip handler */ 1 + g_numDistributorThreads + worker;
3042}
3043
592d7ade
RG
3044// This function is only called by the distributor threads, when pdns-distributes-queries is set
3045void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
3046{
3047 if (!isDistributorThread()) {
3048 g_log<<Logger::Error<<"distributeAsyncFunction() has been called by a worker ("<<t_id<<")"<<endl;
3049 exit(1);
3050 }
3051
3052 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
144040be 3053 unsigned int target = selectWorker(hash);
592d7ade
RG
3054
3055 ThreadMSG* tmsg = new ThreadMSG();
3056 tmsg->func = func;
3057 tmsg->wantAnswer = false;
3058
3059 if (!trySendingQueryToWorker(target, tmsg)) {
3060 /* if this function failed but did not raise an exception, it means that the pipe
3061 was full, let's try another one */
3062 unsigned int newTarget = 0;
3063 do {
3064 newTarget = /* skip handler */ 1 + g_numDistributorThreads + dns_random(g_numWorkerThreads);
3065 } while (newTarget == target);
3066
3067 if (!trySendingQueryToWorker(newTarget, tmsg)) {
3068 g_stats.queryPipeFullDrops++;
3069 delete tmsg;
3070 }
3071 }
00c9b8c1 3072}
3427fa8a 3073
d187038c 3074static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
49a699c4 3075{
f26bf547 3076 ThreadMSG* tmsg = nullptr;
3ddb9247 3077
cf8cda18 3078 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread || fd == readQueriesToThread
49a699c4
BH
3079 unixDie("read from thread pipe returned wrong size or error");
3080 }
3ddb9247 3081
2f22827a 3082 void *resp=0;
3083 try {
3084 resp = tmsg->func();
3085 }
3086 catch(std::exception& e) {
6d2010a8 3087 if(g_logCommonErrors)
e6a9dde5 3088 g_log<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2f22827a 3089 }
3090 catch(PDNSException& e) {
6d2010a8 3091 if(g_logCommonErrors)
e6a9dde5 3092 g_log<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2f22827a 3093 }
d7c676a5 3094 if(tmsg->wantAnswer) {
b243ca3b
RG
3095 const auto& threadInfo = s_threadInfos.at(t_id);
3096 if(write(threadInfo.pipes.writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
d7c676a5 3097 delete tmsg;
00c9b8c1 3098 unixDie("write to thread pipe returned wrong size or error");
d7c676a5
RG
3099 }
3100 }
3ddb9247 3101
00c9b8c1 3102 delete tmsg;
49a699c4 3103}
09e6702a 3104
13034931
BH
3105template<class T> void *voider(const boost::function<T*()>& func)
3106{
3107 return func();
3108}
3109
b3b5459d
BH
3110vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
3111{
3112 a.insert(a.end(), b.begin(), b.end());
3113 return a;
3114}
3115
92011b8f 3116vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
3117{
3118 a.insert(a.end(), b.begin(), b.end());
3119 return a;
3120}
3121
3ddb9247
PD
3122vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
3123{
3124 a.insert(a.end(), b.begin(), b.end());
3125 return a;
3126}
3127
92011b8f 3128
387b9ca6
RG
3129/*
3130 This function should only be called by the handler to gather metrics, wipe the cache,
788eeb4c
RG
3131 reload the Lua script (not the Lua config) or change the current trace regex,
3132 and by the SNMP thread to gather metrics. */
b4e76a18 3133template<class T> T broadcastAccFunction(const boost::function<T*()>& func)
3427fa8a 3134{
b243ca3b 3135 if (!isHandlerThread()) {
788eeb4c 3136 g_log<<Logger::Error<<"broadcastAccFunction has been called by a worker ("<<t_id<<")"<<endl;
d77abca1 3137 exit(1);
d77abca1
RG
3138 }
3139
b243ca3b 3140 unsigned int n = 0;
3427fa8a 3141 T ret=T();
b243ca3b
RG
3142 for (const auto& threadInfo : s_threadInfos) {
3143 if (n++ == t_id) {
3144 continue;
3145 }
3146
3147 const auto& tps = threadInfo.pipes;
00c9b8c1
BH
3148 ThreadMSG* tmsg = new ThreadMSG();
3149 tmsg->func = boost::bind(voider<T>, func);
3150 tmsg->wantAnswer = true;
3ddb9247 3151
b841314c
RG
3152 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
3153 delete tmsg;
3427fa8a 3154 unixDie("write to thread pipe returned wrong size or error");
b841314c 3155 }
3ddb9247 3156
49467864 3157 T* resp = nullptr;
3427fa8a
BH
3158 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
3159 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 3160
3427fa8a 3161 if(resp) {
3427fa8a
BH
3162 ret += *resp;
3163 delete resp;
49467864 3164 resp = nullptr;
3427fa8a
BH
3165 }
3166 }
3167 return ret;
3168}
3169
b4e76a18
RG
3170template string broadcastAccFunction(const boost::function<string*()>& fun); // explicit instantiation
3171template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun); // explicit instantiation
3172template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun); // explicit instantiation
3173template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun); // explicit instantiation
5ac6d761 3174template ThreadTimes broadcastAccFunction(const boost::function<ThreadTimes*()>& fun);
3427fa8a 3175
d187038c 3176static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3177{
fbfc1809
RG
3178 try {
3179 string remote;
3180 string msg=s_rcc.recv(&remote);
3181 RecursorControlParser rcp;
3182 RecursorControlParser::func_t* command;
3ddb9247 3183
fbfc1809 3184 string answer=rcp.getAnswer(msg, &command);
f0f3f0b0 3185
fbfc1809
RG
3186 // If we are inside a chroot, we need to strip
3187 if (!arg()["chroot"].empty()) {
3188 size_t len = arg()["chroot"].length();
3189 remote = remote.substr(len);
3190 }
f0f3f0b0 3191
ab5c053d
BH
3192 s_rcc.send(answer, &remote);
3193 command();
3194 }
fbfc1809 3195 catch(const std::exception& e) {
e6a9dde5 3196 g_log<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
ab5c053d 3197 }
fbfc1809 3198 catch(const PDNSException& ae) {
e6a9dde5 3199 g_log<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
ab5c053d 3200 }
09e6702a
BH
3201}
3202
d187038c 3203static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3204{
0b18b22e 3205 PacketID* pident=any_cast<PacketID>(&var);
667f7e60 3206 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
09e6702a 3207
667f7e60 3208 shared_array<char> buffer(new char[pident->inNeeded]);
09e6702a 3209
a683e8bd 3210 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
09e6702a 3211 if(ret > 0) {
667f7e60 3212 pident->inMSG.append(&buffer[0], &buffer[ret]);
a683e8bd 3213 pident->inNeeded-=(size_t)ret;
825fa717 3214 if(!pident->inNeeded || pident->inIncompleteOkay) {
667f7e60
BH
3215 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
3216 PacketID pid=*pident;
3217 string msg=pident->inMSG;
3ddb9247 3218
bb4bdbaf 3219 t_fdm->removeReadFD(fd);
3ddb9247 3220 MT->sendEvent(pid, &msg);
09e6702a
BH
3221 }
3222 else {
667f7e60 3223 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
09e6702a
BH
3224 }
3225 }
3226 else {
667f7e60 3227 PacketID tmp=*pident;
bb4bdbaf 3228 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
09e6702a
BH
3229 string empty;
3230 MT->sendEvent(tmp, &empty); // this conveys error status
3231 }
3232}
3233
d187038c 3234static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3235{
0b18b22e 3236 PacketID* pid=any_cast<PacketID>(&var);
a683e8bd 3237 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
09e6702a 3238 if(ret > 0) {
a683e8bd 3239 pid->outPos+=(ssize_t)ret;
667f7e60
BH
3240 if(pid->outPos==pid->outMSG.size()) {
3241 PacketID tmp=*pid;
bb4bdbaf 3242 t_fdm->removeWriteFD(fd);
09e6702a
BH
3243 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
3244 }
3245 }
3246 else { // error or EOF
667f7e60 3247 PacketID tmp(*pid);
bb4bdbaf 3248 t_fdm->removeWriteFD(fd);
09e6702a 3249 string sent;
998a4334 3250 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
09e6702a
BH
3251 }
3252}
3253
34801ab1 3254// resend event to everybody chained onto it
d187038c 3255static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
34801ab1
BH
3256{
3257 if(iter->key.chain.empty())
3258 return;
e27e91a8 3259 // cerr<<"doResends called!\n";
34801ab1
BH
3260 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
3261 resend.fd=-1;
3262 resend.id=*i;
e27e91a8 3263 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
4665c31e 3264
34801ab1
BH
3265 MT->sendEvent(resend, &content);
3266 g_stats.chainResends++;
34801ab1
BH
3267 }
3268}
3269
d187038c 3270static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3271{
600fc20b 3272 PacketID pid=any_cast<PacketID>(var);
a683e8bd 3273 ssize_t len;
fae8fe07
RG
3274 std::string packet;
3275 packet.resize(g_outgoingEDNSBufsize);
996c89cc 3276 ComboAddress fromaddr;
09e6702a
BH
3277 socklen_t addrlen=sizeof(fromaddr);
3278
fae8fe07 3279 len=recvfrom(fd, &packet.at(0), packet.size(), 0, (sockaddr *)&fromaddr, &addrlen);
c1da7976 3280
a683e8bd 3281 if(len < (ssize_t) sizeof(dnsheader)) {
998a4334 3282 if(len < 0)
996c89cc 3283 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
09e6702a 3284 else {
3ddb9247 3285 g_stats.serverParseError++;
09e6702a 3286 if(g_logCommonErrors)
e6a9dde5 3287 g_log<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
e44d9fa7 3288 ": packet smaller than DNS header"<<endl;
998a4334 3289 }
34801ab1 3290
49a699c4 3291 t_udpclientsocks->returnSocket(fd);
34801ab1
BH
3292 string empty;
3293
3294 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3ddb9247 3295 if(iter != MT->d_waiters.end())
34801ab1 3296 doResends(iter, pid, empty);
3ddb9247 3297
34801ab1 3298 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
998a4334 3299 return;
3ddb9247 3300 }
998a4334 3301
fae8fe07 3302 packet.resize(len);
998a4334 3303 dnsheader dh;
fae8fe07 3304 memcpy(&dh, &packet.at(0), sizeof(dh));
3ddb9247 3305
6da3b3ad
PD
3306 PacketID pident;
3307 pident.remote=fromaddr;
3308 pident.id=dh.id;
3309 pident.fd=fd;
34801ab1 3310
33a928af 3311 if(!dh.qr && g_logCommonErrors) {
e6a9dde5 3312 g_log<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
6da3b3ad
PD
3313 }
3314
3315 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
3316 !dh.qr) { // one weird server
3317 pident.domain.clear();
3318 pident.type = 0;
3319 }
3320 else {
3321 try {
0b31e67e 3322 if(len > 12)
fae8fe07 3323 pident.domain=DNSName(&packet.at(0), len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
6da3b3ad
PD
3324 }
3325 catch(std::exception& e) {
3326 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
e6a9dde5 3327 g_log<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
6da3b3ad 3328 return;
34801ab1 3329 }
6da3b3ad 3330 }
34801ab1 3331
6da3b3ad
PD
3332 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
3333 if(iter != MT->d_waiters.end()) {
3334 doResends(iter, pident, packet);
3335 }
c1da7976 3336
6da3b3ad 3337retryWithName:
4957a608 3338
6da3b3ad 3339 if(!MT->sendEvent(pident, &packet)) {
9ec48f21
RG
3340 /* we did not find a match for this response, something is wrong */
3341
6da3b3ad
PD
3342 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
3343 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
3344 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
e325f20c 3345 pident.domain == mthread->key.domain) {
6da3b3ad 3346 mthread->key.nearMisses++;
998a4334 3347 }
6da3b3ad
PD
3348
3349 // be a bit paranoid here since we're weakening our matching
3ddb9247 3350 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
6da3b3ad
PD
3351 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
3352 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
3353 pident.domain = mthread->key.domain;
3354 pident.type = mthread->key.type;
3355 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
d4fb76e9 3356 }
09e6702a 3357 }
6da3b3ad
PD
3358 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
3359 if(g_logCommonErrors) {
e6a9dde5 3360 g_log<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
d8f6d49f 3361 }
09e6702a 3362 }
6da3b3ad 3363 else if(fd >= 0) {
9ec48f21 3364 /* we either found a waiter (1) or encountered an issue (-1), it's up to us to clean the socket anyway */
6da3b3ad
PD
3365 t_udpclientsocks->returnSocket(fd);
3366 }
09e6702a
BH
3367}
3368
1f4abb20
BH
3369FDMultiplexer* getMultiplexer()
3370{
3371 FDMultiplexer* ret;
f26bf547 3372 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
1f4abb20 3373 try {
f26bf547 3374 ret=i.second();
1f4abb20
BH
3375 return ret;
3376 }
98d0ee4a 3377 catch(FDMultiplexerException &fe) {
e6a9dde5 3378 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
98d0ee4a
BH
3379 }
3380 catch(...) {
e6a9dde5 3381 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
98d0ee4a 3382 }
1f4abb20 3383 }
e6a9dde5 3384 g_log<<Logger::Error<<"No working multiplexer found!"<<endl;
1f4abb20
BH
3385 exit(1);
3386}
3387
3ddb9247 3388
d187038c 3389static string* doReloadLuaScript()
4485aa35 3390{
674cf0f6 3391 string fname= ::arg()["lua-dns-script"];
4485aa35 3392 try {
674cf0f6 3393 if(fname.empty()) {
f26bf547 3394 t_pdl.reset();
377602e3 3395 g_log<<Logger::Info<<t_id<<" Unloaded current lua script"<<endl;
0f39c1a3 3396 return new string("unloaded\n");
4485aa35
BH
3397 }
3398 else {
9694e14f
AT
3399 t_pdl = std::make_shared<RecursorLua4>();
3400 t_pdl->loadFile(fname);
4485aa35
BH
3401 }
3402 }
fdbf35ac 3403 catch(std::exception& e) {
e6a9dde5 3404 g_log<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
0f39c1a3 3405 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
4485aa35 3406 }
3ddb9247 3407
e6a9dde5 3408 g_log<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
0f39c1a3 3409 return new string("(re)loaded '"+fname+"'\n");
4485aa35
BH
3410}
3411
49a699c4
BH
3412string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3413{
3ddb9247 3414 if(begin != end)
49a699c4 3415 ::arg().set("lua-dns-script") = *begin;
3ddb9247 3416
0f39c1a3 3417 return broadcastAccFunction<string>(doReloadLuaScript);
3ddb9247 3418}
49a699c4 3419
d187038c 3420static string* pleaseUseNewTraceRegex(const std::string& newRegex)
77499b05
BH
3421try
3422{
3423 if(newRegex.empty()) {
f26bf547 3424 t_traceRegex.reset();
77499b05
BH
3425 return new string("unset\n");
3426 }
3427 else {
f26bf547 3428 t_traceRegex = std::make_shared<Regex>(newRegex);
77499b05
BH
3429 return new string("ok\n");
3430 }
3431}
3f81d239 3432catch(PDNSException& ae)
77499b05
BH
3433{
3434 return new string(ae.reason+"\n");
3435}
3436
3437string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3438{
3439 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
3440}
3441
4e9a20e6 3442static void checkLinuxIPv6Limits()
3443{
3444#ifdef __linux__
3445 string line;
3446 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
335da0ba 3447 int lim=std::stoi(line);
4e9a20e6 3448 if(lim < 16384) {
e6a9dde5 3449 g_log<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
4e9a20e6 3450 }
3451 }
3452#endif
3453}
36849ff2 3454static void checkOrFixFDS()
4e9a20e6 3455{
c0063e60 3456 unsigned int availFDs=getFilenumLimit();
3457 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
3458
3459 if(wantFDs > availFDs) {
067ad20e 3460 unsigned int hardlimit= getFilenumLimit(true);
3461 if(hardlimit >= wantFDs) {
c0063e60 3462 setFilenumLimit(wantFDs);
e6a9dde5 3463 g_log<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
36849ff2 3464 }
3465 else {
067ad20e 3466 int newval = (hardlimit - 25) / g_numWorkerThreads;
e6a9dde5 3467 g_log<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
36849ff2 3468 g_maxMThreads = newval;
067ad20e 3469 setFilenumLimit(hardlimit);
36849ff2 3470 }
3471 }
4e9a20e6 3472}
77499b05 3473
c390b2da 3474static void* recursorThread(unsigned int tid, const string& threadName);
51e2144e 3475
f26bf547 3476static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
49a699c4
BH
3477{
3478 t_allowFrom = ng;
f26bf547 3479 return nullptr;
49a699c4
BH
3480}
3481
dbd23fc2
BH
3482int g_argc;
3483char** g_argv;
3484
18af64a8 3485void parseACLs()
f7c1d4e3 3486{
18af64a8 3487 static bool l_initialized;
3ddb9247 3488
49a699c4 3489 if(l_initialized) { // only reload configuration file on second call
18af64a8 3490 string configname=::arg()["config-dir"]+"/recursor.conf";
3e63da83
JR
3491 if(::arg()["config-name"]!="") {
3492 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3493 }
18af64a8 3494 cleanSlashes(configname);
3ddb9247
PD
3495
3496 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
7e818521 3497 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
49a699c4 3498 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
242b90e1 3499 ::arg().preParseFile(configname.c_str(), "include-dir");
829849d6
AT
3500 ::arg().preParse(g_argc, g_argv, "include-dir");
3501
3502 // then process includes
3503 std::vector<std::string> extraConfigs;
242b90e1
AT
3504 ::arg().gatherIncludes(extraConfigs);
3505
1dc8f4d0 3506 for(const std::string& fn : extraConfigs) {
7e818521 3507 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
3508 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
3509 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
3510 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
829849d6 3511 }
ca2c884c
AT
3512
3513 ::arg().preParse(g_argc, g_argv, "allow-from-file");
3514 ::arg().preParse(g_argc, g_argv, "allow-from");
f27e6356 3515 }
49a699c4 3516
f26bf547
RG
3517 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
3518 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
3ddb9247 3519
2c95fc65
BH
3520 if(!::arg()["allow-from-file"].empty()) {
3521 string line;
2c95fc65
BH
3522 ifstream ifs(::arg()["allow-from-file"].c_str());
3523 if(!ifs) {
9c61b9d0 3524 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2c95fc65
BH
3525 }
3526
3527 string::size_type pos;
3528 while(getline(ifs,line)) {
3529 pos=line.find('#');
3530 if(pos!=string::npos)
3531 line.resize(pos);
3532 trim(line);
3533 if(line.empty())
3534 continue;
3535
18af64a8 3536 allowFrom->addMask(line);
2c95fc65 3537 }
e6a9dde5 3538 g_log<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2c95fc65
BH
3539 }
3540 else if(!::arg()["allow-from"].empty()) {
f7c1d4e3
BH
3541 vector<string> ips;
3542 stringtok(ips, ::arg()["allow-from"], ", ");
3ddb9247 3543
e6a9dde5 3544 g_log<<Logger::Warning<<"Only allowing queries from: ";
f7c1d4e3 3545 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
18af64a8 3546 allowFrom->addMask(*i);
f7c1d4e3 3547 if(i!=ips.begin())
e6a9dde5
PL
3548 g_log<<Logger::Warning<<", ";
3549 g_log<<Logger::Warning<<*i;
f7c1d4e3 3550 }
e6a9dde5 3551 g_log<<Logger::Warning<<endl;
f7c1d4e3 3552 }
49a699c4 3553 else {
3ddb9247 3554 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
377602e3 3555 g_log<<Logger::Warning<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
f26bf547 3556 allowFrom = nullptr;
49a699c4 3557 }
3ddb9247 3558
49a699c4 3559 g_initialAllowFrom = allowFrom;
d7dae798 3560 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
f26bf547 3561 oldAllowFrom = nullptr;
3ddb9247 3562
49a699c4 3563 l_initialized = true;
18af64a8
BH
3564}
3565
795215f2 3566
756e82cf 3567static void setupDelegationOnly()
3568{
3569 vector<string> parts;
3570 stringtok(parts, ::arg()["delegation-only"], ", \t");
3571 for(const auto& p : parts) {
9065eb05 3572 SyncRes::addDelegationOnly(DNSName(p));
756e82cf 3573 }
3574}
795215f2 3575
8fd25133
RG
3576static std::map<unsigned int, std::set<int> > parseCPUMap()
3577{
3578 std::map<unsigned int, std::set<int> > result;
3579
3580 const std::string value = ::arg()["cpu-map"];
3581
3582 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
e6a9dde5 3583 g_log<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
8fd25133
RG
3584 return result;
3585 }
3586
3587 std::vector<std::string> parts;
3588
3589 stringtok(parts, value, " \t");
3590
3591 for(const auto& part : parts) {
3592 if (part.find('=') == string::npos)
3593 continue;
3594
3595 try {
3596 auto headers = splitField(part, '=');
3597 trim(headers.first);
3598 trim(headers.second);
3599
3600 unsigned int threadId = pdns_stou(headers.first);
3601 std::vector<std::string> cpus;
3602
3603 stringtok(cpus, headers.second, ",");
3604
3605 for(const auto& cpu : cpus) {
3606 int cpuId = std::stoi(cpu);
3607
3608 result[threadId].insert(cpuId);
3609 }
3610 }
3611 catch(const std::exception& e) {
e6a9dde5 3612 g_log<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
8fd25133
RG
3613 }
3614 }
3615
3616 return result;
3617}
3618
3619static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
3620{
3621 const auto& cpuMapping = cpusMap.find(n);
3622 if (cpuMapping != cpusMap.cend()) {
3623 int rc = mapThreadToCPUList(tid, cpuMapping->second);
3624 if (rc == 0) {
e6a9dde5 3625 g_log<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
8fd25133 3626 for (const auto cpu : cpuMapping->second) {
e6a9dde5 3627 g_log<<Logger::Info<<" "<<cpu;
8fd25133 3628 }
e6a9dde5 3629 g_log<<Logger::Info<<endl;
8fd25133
RG
3630 }
3631 else {
e6a9dde5 3632 g_log<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
8fd25133 3633 for (const auto cpu : cpuMapping->second) {
e6a9dde5 3634 g_log<<Logger::Info<<" "<<cpu;
8fd25133 3635 }
e6a9dde5 3636 g_log<<Logger::Info<<strerror(rc)<<endl;
8fd25133
RG
3637 }
3638 }
3639}
3640
af1377b7
NC
3641#ifdef NOD_ENABLED
3642static void setupNODThread()
3643{
3644 if (g_nodEnabled) {
b78727c6
NC
3645 uint32_t num_cells = ::arg().asNum("new-domain-db-size");
3646 t_nodDBp = std::make_shared<nod::NODDB>(num_cells);
af1377b7
NC
3647 try {
3648 t_nodDBp->setCacheDir(::arg()["new-domain-history-dir"]);
3649 }
3650 catch (const PDNSException& e) {
3651 g_log<<Logger::Error<<"new-domain-history-dir (" << ::arg()["new-domain-history-dir"] << ") is not readable or does not exist"<<endl;
3652 _exit(1);
3653 }
3654 if (!t_nodDBp->init()) {
3655 g_log<<Logger::Error<<"Could not initialize domain tracking"<<endl;
3656 _exit(1);
3657 }
41c542ec 3658 std::thread t(nod::NODDB::startHousekeepingThread, t_nodDBp, std::this_thread::get_id());
af1377b7 3659 t.detach();
ca2526f5 3660 g_nod_pbtag = ::arg()["new-domain-pb-tag"];
41c542ec
NC
3661 }
3662 if (g_udrEnabled) {
b78727c6
NC
3663 uint32_t num_cells = ::arg().asNum("unique-response-db-size");
3664 t_udrDBp = std::make_shared<nod::UniqueResponseDB>(num_cells);
41c542ec
NC
3665 try {
3666 t_udrDBp->setCacheDir(::arg()["unique-response-history-dir"]);
3667 }
3668 catch (const PDNSException& e) {
3669 g_log<<Logger::Error<<"unique-response-history-dir (" << ::arg()["unique-response-history-dir"] << ") is not readable or does not exist"<<endl;
3670 _exit(1);
3671 }
3672 if (!t_udrDBp->init()) {
3673 g_log<<Logger::Error<<"Could not initialize unique response tracking"<<endl;
3674 _exit(1);
3675 }
3676 std::thread t(nod::UniqueResponseDB::startHousekeepingThread, t_udrDBp, std::this_thread::get_id());
af1377b7 3677 t.detach();
ca2526f5 3678 g_udr_pbtag = ::arg()["unique-response-pb-tag"];
af1377b7
NC
3679 }
3680}
3681
3682void parseNODWhitelist(const std::string& wlist)
3683{
3684 vector<string> parts;
3685 stringtok(parts, wlist, ",; ");
3686 for(const auto& a : parts) {
3687 g_nodDomainWL.add(DNSName(a));
3688 }
3689}
3690
3691static void setupNODGlobal()
3692{
3693 // Setup NOD subsystem
3694 g_nodEnabled = ::arg().mustDo("new-domain-tracking");
3695 g_nodLookupDomain = DNSName(::arg()["new-domain-lookup"]);
3696 g_nodLog = ::arg().mustDo("new-domain-log");
3697 parseNODWhitelist(::arg()["new-domain-whitelist"]);
41c542ec
NC
3698
3699 // Setup Unique DNS Response subsystem
3700 g_udrEnabled = ::arg().mustDo("unique-response-tracking");
3701 g_udrLog = ::arg().mustDo("unique-response-log");
af1377b7
NC
3702}
3703#endif /* NOD_ENABLED */
3704
d187038c 3705static int serviceMain(int argc, char*argv[])
18af64a8 3706{
e6a9dde5
PL
3707 g_log.setName(s_programname);
3708 g_log.disableSyslog(::arg().mustDo("disable-syslog"));
3709 g_log.setTimestamps(::arg().mustDo("log-timestamp"));
18af64a8
BH
3710
3711 if(!::arg()["logging-facility"].empty()) {
f8499e52
BH
3712 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
3713 if(val >= 0)
e6a9dde5 3714 g_log.setFacility(val);
18af64a8 3715 else
e6a9dde5 3716 g_log<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
18af64a8
BH
3717 }
3718
ba1a571d 3719 showProductVersion();
3afde9b2 3720
06ea9015 3721 g_disthashseed=dns_random(0xffffffff);
3722
b7ef5828
PL
3723 checkLinuxIPv6Limits();
3724 try {
3725 vector<string> addrs;
3726 if(!::arg()["query-local-address6"].empty()) {
3727 SyncRes::s_doIPv6=true;
e6a9dde5 3728 g_log<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
b7ef5828
PL
3729
3730 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
3731 for(const string& addr : addrs) {
3732 g_localQueryAddresses6.push_back(ComboAddress(addr));
3733 }
3734 }
3735 else {
e6a9dde5 3736 g_log<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
b7ef5828
PL
3737 }
3738 addrs.clear();
3739 stringtok(addrs, ::arg()["query-local-address"], ", ;");
3740 for(const string& addr : addrs) {
3741 g_localQueryAddresses4.push_back(ComboAddress(addr));
3742 }
3743 }
3744 catch(std::exception& e) {
e6a9dde5 3745 g_log<<Logger::Error<<"Assigning local query addresses: "<<e.what();
b7ef5828
PL
3746 exit(99);
3747 }
3748
e48c6b8a
PL
3749 // keep this ABOVE loadRecursorLuaConfig!
3750 if(::arg()["dnssec"]=="off")
3751 g_dnssecmode=DNSSECMode::Off;
3752 else if(::arg()["dnssec"]=="process-no-validate")
3753 g_dnssecmode=DNSSECMode::ProcessNoValidate;
3754 else if(::arg()["dnssec"]=="process")
3755 g_dnssecmode=DNSSECMode::Process;
3756 else if(::arg()["dnssec"]=="validate")
3757 g_dnssecmode=DNSSECMode::ValidateAll;
3758 else if(::arg()["dnssec"]=="log-fail")
3759 g_dnssecmode=DNSSECMode::ValidateForLog;
3760 else {
e6a9dde5 3761 g_log<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
e48c6b8a
PL
3762 exit(1);
3763 }
3764
9a3ab3e4
KM
3765 g_signatureInceptionSkew = ::arg().asNum("signature-inception-skew");
3766 if (g_signatureInceptionSkew < 0) {
3767 g_log<<Logger::Error<<"A negative value for 'signature-inception-skew' is not allowed"<<endl;
3768 exit(1);
3769 }
3770
e48c6b8a 3771 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
d377bb54 3772 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
e48c6b8a 3773
a6f7f5fe 3774 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
3775 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
e6ec15bf
RG
3776
3777 luaConfigDelayedThreads delayedLuaThreads;
0f5785a6 3778 try {
e6ec15bf 3779 loadRecursorLuaConfig(::arg()["lua-config-file"], delayedLuaThreads);
0f5785a6
PL
3780 }
3781 catch (PDNSException &e) {
e6a9dde5 3782 g_log<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
0f5785a6
PL
3783 exit(1);
3784 }
ad42489c 3785
18af64a8 3786 parseACLs();
d6f3fcfa 3787 initPublicSuffixList(::arg()["public-suffix-list-file"]);
92011b8f 3788
eb5bae86 3789 if(!::arg()["dont-query"].empty()) {
eb5bae86
BH
3790 vector<string> ips;
3791 stringtok(ips, ::arg()["dont-query"], ", ");
66e0b6ea
BH
3792 ips.push_back("0.0.0.0");
3793 ips.push_back("::");
c36bc97a 3794
e6a9dde5 3795 g_log<<Logger::Warning<<"Will not send queries to: ";
eb5bae86 3796 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
9065eb05 3797 SyncRes::addDontQuery(*i);
eb5bae86 3798 if(i!=ips.begin())
e6a9dde5
PL
3799 g_log<<Logger::Warning<<", ";
3800 g_log<<Logger::Warning<<*i;
eb5bae86 3801 }
e6a9dde5 3802 g_log<<Logger::Warning<<endl;
eb5bae86
BH
3803 }
3804
f7c1d4e3 3805 g_quiet=::arg().mustDo("quiet");
3ddb9247 3806
b243ca3b 3807 /* this needs to be done before parseACLs(), which call broadcastFunction() */
1bc3c142
BH
3808 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
3809 if(g_weDistributeQueries) {
b243ca3b 3810 g_log<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
1bc3c142 3811 }
3ddb9247 3812
756e82cf 3813 setupDelegationOnly();
b33c2462 3814 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
756e82cf 3815
77499b05
BH
3816 if(::arg()["trace"]=="fail") {
3817 SyncRes::setDefaultLogMode(SyncRes::Store);
3818 }
3819 else if(::arg().mustDo("trace")) {
3820 SyncRes::setDefaultLogMode(SyncRes::Log);
f7c1d4e3
BH
3821 ::arg().set("quiet")="no";
3822 g_quiet=false;
3e9c6c0a 3823 g_dnssecLOG=true;
f7c1d4e3 3824 }
43a9b290
PL
3825 string myHostname = getHostname();
3826 if (myHostname == "UNKNOWN"){
3827 g_log<<Logger::Warning<<"Unable to get the hostname, NSID and id.server values will be empty"<<endl;
3828 myHostname = "";
d0983bff 3829 }
3ddb9247 3830
aadceba8 3831 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
5cf4b2e7 3832 SyncRes::s_minimumECSTTL = ::arg().asNum("ecs-minimum-ttl-override");
aadceba8 3833
1051f8a9
BH
3834 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
3835
f7c1d4e3 3836 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
b9473937 3837 SyncRes::s_maxbogusttl=::arg().asNum("max-cache-bogus-ttl");
63637fd8 3838 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
1051f8a9 3839 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
79ec0627
PL
3840 // Cap the packetcache-servfail-ttl to the packetcache-ttl
3841 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
3842 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
628e2c7b
PA
3843 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
3844 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
f7c1d4e3 3845 SyncRes::s_serverID=::arg()["server-id"];
173d790e 3846 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
9de3e034 3847 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
7c3398aa 3848 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
01402d56 3849 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
f7c1d4e3 3850 if(SyncRes::s_serverID.empty()) {
d0983bff 3851 SyncRes::s_serverID = myHostname;
f7c1d4e3 3852 }
3ddb9247 3853
e9f9b8ec
RG
3854 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
3855 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
c9783016 3856 SyncRes::clearECSStats();
fd8898fb 3857 SyncRes::s_ecsipv4cachelimit = ::arg().asNum("ecs-ipv4-cache-bits");
3858 SyncRes::s_ecsipv6cachelimit = ::arg().asNum("ecs-ipv6-cache-bits");
ed9019c9 3859 SyncRes::s_ecscachelimitttl = ::arg().asNum("ecs-cache-limit-ttl");
e9f9b8ec 3860
116d1288
OM
3861 SyncRes::s_qnameminimization = ::arg().mustDo("qname-minimization");
3862
8a3a3822
RG
3863 if (!::arg().isEmpty("ecs-scope-zero-address")) {
3864 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
3865 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
3866 }
3867 else {
3868 bool found = false;
3869 for (const auto& addr : g_localQueryAddresses4) {
3870 if (!IsAnyAddress(addr)) {
3871 SyncRes::setECSScopeZeroAddress(Netmask(addr, 32));
3872 found = true;
3873 break;
3874 }
3875 }
3876 if (!found) {
3877 for (const auto& addr : g_localQueryAddresses6) {
3878 if (!IsAnyAddress(addr)) {
3879 SyncRes::setECSScopeZeroAddress(Netmask(addr, 128));
3880 found = true;
3881 break;
3882 }
3883 }
3884 if (!found) {
3885 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
3886 }
3887 }
3888 }
3889
2fe3354d
CH
3890 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
3891 SyncRes::parseEDNSSubnetAddFor(::arg()["ecs-add-for"]);
3892 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
3893
5cc8371b 3894 g_XPFAcl.toMasks(::arg()["xpf-allow-from"]);
59cb4a79 3895 g_xpfRRCode = ::arg().asNum("xpf-rr-code");
5cc8371b 3896
5b0ddd18 3897 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
bb4bdbaf 3898
49a699c4 3899 g_initialDomainMap = parseAuthAndForwards();
3ddb9247 3900
08f3f638 3901 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3ddb9247 3902
f7c1d4e3 3903 g_logCommonErrors=::arg().mustDo("log-common-errors");
98d36505 3904 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
e661a20b
PD
3905
3906 g_anyToTcp = ::arg().mustDo("any-to-tcp");
a09a8ce0
PD
3907 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
3908
b3adda56
PD
3909 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
3910
b243ca3b 3911 g_numDistributorThreads = ::arg().asNum("distributor-threads");
810ff705 3912 g_numWorkerThreads = ::arg().asNum("threads");
1c4f2e1b 3913 if (g_numWorkerThreads < 1) {
e6a9dde5 3914 g_log<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
1c4f2e1b
RG
3915 g_numWorkerThreads = 1;
3916 }
3917
b243ca3b 3918 g_numThreads = g_numDistributorThreads + g_numWorkerThreads;
810ff705
RG
3919 g_maxMThreads = ::arg().asNum("max-mthreads");
3920
00b8cadc
RG
3921 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
3922
0ec489bf 3923 g_statisticsInterval = ::arg().asNum("statistics-interval");
3924
559b6c93
PL
3925 {
3926 SuffixMatchNode dontThrottleNames;
3927 vector<string> parts;
3928 stringtok(parts, ::arg()["dont-throttle-names"]);
3929 for (const auto &p : parts) {
3930 dontThrottleNames.add(DNSName(p));
3931 }
3932 g_dontThrottleNames.setState(dontThrottleNames);
3933
3934 NetmaskGroup dontThrottleNetmasks;
3935 stringtok(parts, ::arg()["dont-throttle-netmasks"]);
3936 for (const auto &p : parts) {
3937 dontThrottleNetmasks.addMask(Netmask(p));
3938 }
3939 g_dontThrottleNetmasks.setState(dontThrottleNetmasks);
3940 }
3941
144040be 3942 s_balancingFactor = ::arg().asDouble("distribution-load-factor");
078be17f
RG
3943 if (s_balancingFactor != 0.0 && s_balancingFactor < 1.0) {
3944 s_balancingFactor = 0.0;
3945 g_log<<Logger::Warning<<"Asked to run with a distribution-load-factor below 1.0, disabling it instead"<<endl;
3946 }
144040be 3947
810ff705
RG
3948#ifdef SO_REUSEPORT
3949 g_reusePort = ::arg().mustDo("reuseport");
3950#endif
3951
b243ca3b 3952 s_threadInfos.resize(g_numDistributorThreads + g_numWorkerThreads + /* handler */ 1);
810ff705 3953
b243ca3b
RG
3954 if (g_reusePort) {
3955 if (g_weDistributeQueries) {
3956 /* first thread is the handler, then distributors */
3957 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3958 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
adb6cd72 3959 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
b243ca3b 3960 makeUDPServerSockets(deferredAdds);
adb6cd72 3961 makeTCPServerSockets(deferredAdds, tcpSockets);
b243ca3b
RG
3962 }
3963 }
3964 else {
3965 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3966 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3967 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
adb6cd72 3968 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
b243ca3b 3969 makeUDPServerSockets(deferredAdds);
adb6cd72 3970 makeTCPServerSockets(deferredAdds, tcpSockets);
b243ca3b 3971 }
810ff705
RG
3972 }
3973 }
3974 else {
c47f201b 3975 std::set<int> tcpSockets;
b243ca3b
RG
3976 /* we don't have reuseport so we can only open one socket per
3977 listening addr:port and everyone will listen on it */
3978 makeUDPServerSockets(g_deferredAdds);
c47f201b
RG
3979 makeTCPServerSockets(g_deferredAdds, tcpSockets);
3980
3981 /* every listener (so distributor if g_weDistributeQueries, workers otherwise)
3982 needs to listen to the shared sockets */
3983 if (g_weDistributeQueries) {
3984 /* first thread is the handler, then distributors */
3985 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3986 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3987 }
3988 }
3989 else {
3990 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3991 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3992 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3993 }
3994 }
810ff705 3995 }
815099b2 3996
af1377b7
NC
3997#ifdef NOD_ENABLED
3998 // Setup newly observed domain globals
3999 setupNODGlobal();
4000#endif /* NOD_ENABLED */
4001
677e2a46
BH
4002 int forks;
4003 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
1bc3c142
BH
4004 if(!fork()) // we are child
4005 break;
4006 }
3ddb9247 4007
f7c1d4e3 4008 if(::arg().mustDo("daemon")) {
e6a9dde5
PL
4009 g_log<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
4010 g_log.toConsole(Logger::Critical);
f7c1d4e3
BH
4011 daemonize();
4012 }
4013 signal(SIGUSR1,usr1Handler);
4014 signal(SIGUSR2,usr2Handler);
4015 signal(SIGPIPE,SIG_IGN);
810ff705 4016
a6414fdc 4017 checkOrFixFDS();
3ddb9247 4018
d1b28475
KM
4019#ifdef HAVE_LIBSODIUM
4020 if (sodium_init() == -1) {
e6a9dde5 4021 g_log<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
d1b28475
KM
4022 exit(99);
4023 }
4024#endif
4025
3afde9b2
PL
4026 openssl_thread_setup();
4027 openssl_seed();
e97cb679
AT
4028 /* setup rng before chroot */
4029 dns_random_init();
3afde9b2 4030
bdbb07e0 4031 if(::arg()["server-id"].empty()) {
d0983bff 4032 ::arg().set("server-id") = myHostname;
bdbb07e0
PL
4033 }
4034
138435cb
BH
4035 int newgid=0;
4036 if(!::arg()["setgid"].empty())
2211dac9 4037 newgid = strToGID(::arg()["setgid"]);
138435cb
BH
4038 int newuid=0;
4039 if(!::arg()["setuid"].empty())
2211dac9 4040 newuid = strToUID(::arg()["setuid"]);
138435cb 4041
f1d6a7ce
KM
4042 Utility::dropGroupPrivs(newuid, newgid);
4043
138435cb 4044 if (!::arg()["chroot"].empty()) {
75336810
PL
4045#ifdef HAVE_SYSTEMD
4046 char *ns;
4047 ns = getenv("NOTIFY_SOCKET");
4048 if (ns != nullptr) {
e6a9dde5 4049 g_log<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
75336810
PL
4050 exit(1);
4051 }
4052#endif
138435cb 4053 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
e6a9dde5 4054 g_log<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
138435cb
BH
4055 exit(1);
4056 }
f0f3f0b0 4057 else
377602e3 4058 g_log<<Logger::Info<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
138435cb
BH
4059 }
4060
f0f3f0b0
PL
4061 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
4062 if(!s_pidfname.empty())
4063 unlink(s_pidfname.c_str()); // remove possible old pid file
4064 writePid();
4065
4066 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
4067
f1d6a7ce 4068 Utility::dropUserPrivs(newuid);
1f2b341e
RG
4069 try {
4070 /* we might still have capabilities remaining, for example if we have been started as root
4071 without --setuid (please don't do that) or as an unprivileged user with ambient capabilities
4072 like CAP_NET_BIND_SERVICE.
4073 */
4074 dropCapabilities();
4075 }
4076 catch(const std::exception& e) {
4077 g_log<<Logger::Warning<<e.what()<<endl;
4078 }
c0063e60 4079
e6ec15bf
RG
4080 startLuaConfigDelayedThreads(delayedLuaThreads, g_luaconfs.getCopy().generation);
4081
49a699c4 4082 makeThreadPipes();
3ddb9247 4083
5d4dd7fe
BH
4084 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
4085 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
fde296a3 4086 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
a5886e6a 4087 s_maxUDPQueriesPerRound=::arg().asNum("max-udp-queries-per-round");
343257a4 4088
c29d820c
RG
4089 g_useKernelTimestamp = ::arg().mustDo("protobuf-use-kernel-timestamp");
4090
563517f3
RG
4091 blacklistStats(StatComponent::API, ::arg()["stats-api-blacklist"]);
4092 blacklistStats(StatComponent::Carbon, ::arg()["stats-carbon-blacklist"]);
4093 blacklistStats(StatComponent::RecControl, ::arg()["stats-rec-control-blacklist"]);
4094 blacklistStats(StatComponent::SNMP, ::arg()["stats-snmp-blacklist"]);
72259676 4095
d705aad9
RG
4096 if (::arg().mustDo("snmp-agent")) {
4097 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
4098 g_snmpAgent->run();
4099 }
4100
b47026fd 4101 int port = ::arg().asNum("udp-source-port-min");
58da9034 4102 if(port < 1024 || port > 65535){
e6a9dde5 4103 g_log<<Logger::Error<<"Unable to launch, udp-source-port-min is not a valid port number"<<endl;
bf6f28ca
CHB
4104 exit(99); // this isn't going to fix itself either
4105 }
4106 s_minUdpSourcePort = port;
b47026fd 4107 port = ::arg().asNum("udp-source-port-max");
58da9034 4108 if(port < 1024 || port > 65535 || port < s_minUdpSourcePort){
e6a9dde5 4109 g_log<<Logger::Error<<"Unable to launch, udp-source-port-max is not a valid port number or is smaller than udp-source-port-min"<<endl;
bf6f28ca
CHB
4110 exit(99); // this isn't going to fix itself either
4111 }
4112 s_maxUdpSourcePort = port;
4113 std::vector<string> parts {};
b47026fd 4114 stringtok(parts, ::arg()["udp-source-port-avoid"], ", ");
bf6f28ca
CHB
4115 for (const auto &part : parts)
4116 {
4117 port = std::stoi(part);
58da9034 4118 if(port < 1024 || port > 65535){
e6a9dde5 4119 g_log<<Logger::Error<<"Unable to launch, udp-source-port-avoid contains an invalid port number: "<<part<<endl;
bf6f28ca
CHB
4120 exit(99); // this isn't going to fix itself either
4121 }
4122 s_avoidUdpSourcePorts.insert(port);
4123 }
4124
b243ca3b 4125 unsigned int currentThreadId = 1;
8fd25133 4126 const auto cpusMap = parseCPUMap();
d77abca1 4127
c3828c03 4128 if(g_numThreads == 1) {
e6a9dde5 4129 g_log<<Logger::Warning<<"Operating unthreaded"<<endl;
6b6720de
PL
4130#ifdef HAVE_SYSTEMD
4131 sd_notify(0, "READY=1");
4132#endif
b243ca3b
RG
4133
4134 /* This thread handles the web server, carbon, statistics and the control channel */
4135 auto& handlerInfos = s_threadInfos.at(0);
4136 handlerInfos.isHandler = true;
c390b2da 4137 handlerInfos.thread = std::thread(recursorThread, 0, "main");
b243ca3b
RG
4138
4139 setCPUMap(cpusMap, currentThreadId, pthread_self());
4140
4141 auto& infos = s_threadInfos.at(currentThreadId);
4142 infos.isListener = true;
4143 infos.isWorker = true;
c390b2da 4144 recursorThread(currentThreadId++, "worker");
76698c6e
BH
4145 }
4146 else {
8fd25133 4147
b243ca3b
RG
4148 if (g_weDistributeQueries) {
4149 g_log<<Logger::Warning<<"Launching "<< g_numDistributorThreads <<" distributor threads"<<endl;
4150 for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
4151 auto& infos = s_threadInfos.at(currentThreadId);
4152 infos.isListener = true;
c390b2da 4153 infos.thread = std::thread(recursorThread, currentThreadId++, "distr");
b243ca3b
RG
4154
4155 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
4156 }
4157 }
8fd25133 4158
62b549e0
RG
4159 g_log<<Logger::Warning<<"Launching "<< g_numWorkerThreads <<" worker threads"<<endl;
4160
b243ca3b
RG
4161 for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
4162 auto& infos = s_threadInfos.at(currentThreadId);
4163 infos.isListener = g_weDistributeQueries ? false : true;
4164 infos.isWorker = true;
c390b2da 4165 infos.thread = std::thread(recursorThread, currentThreadId++, "worker");
b243ca3b
RG
4166
4167 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
76698c6e 4168 }
b243ca3b 4169
6b6720de
PL
4170#ifdef HAVE_SYSTEMD
4171 sd_notify(0, "READY=1");
4172#endif
b243ca3b
RG
4173
4174 /* This thread handles the web server, carbon, statistics and the control channel */
4175 auto& infos = s_threadInfos.at(0);
4176 infos.isHandler = true;
c390b2da 4177 infos.thread = std::thread(recursorThread, 0, "web+stat");
b243ca3b
RG
4178
4179 s_threadInfos.at(0).thread.join();
bb4bdbaf 4180 }
bb4bdbaf
BH
4181 return 0;
4182}
4183
c390b2da 4184static void* recursorThread(unsigned int n, const string& threadName)
bb4bdbaf
BH
4185try
4186{
d77abca1 4187 t_id=n;
b243ca3b 4188 auto& threadInfo = s_threadInfos.at(t_id);
c390b2da
PL
4189
4190 static string threadPrefix = "pdns-r/";
519f5484 4191 setThreadName(threadPrefix + threadName);
c390b2da 4192
49a699c4 4193 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
a712cb56 4194 SyncRes::setDomainMap(g_initialDomainMap);
49a699c4 4195 t_allowFrom = g_initialAllowFrom;
f26bf547
RG
4196 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
4197 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
49a699c4 4198 primeHints();
3ddb9247 4199
f26bf547 4200 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
3ddb9247 4201
e6a9dde5 4202 g_log<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3ddb9247 4203
af1377b7 4204#ifdef NOD_ENABLED
41c542ec
NC
4205 if (threadInfo.isWorker)
4206 setupNODThread();
af1377b7 4207#endif /* NOD_ENABLED */
c1751a59
RG
4208
4209 /* the listener threads handle TCP queries */
4210 if(threadInfo.isWorker || threadInfo.isListener) {
5b388d28
PD
4211 try {
4212 if(!::arg()["lua-dns-script"].empty()) {
4213 t_pdl = std::make_shared<RecursorLua4>();
4214 t_pdl->loadFile(::arg()["lua-dns-script"]);
4215 g_log<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
4216 }
4217 }
4218 catch(std::exception &e) {
4219 g_log<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
4220 _exit(99);
674cf0f6 4221 }
674cf0f6 4222 }
3ddb9247 4223
f8f243b0 4224 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
92011b8f 4225 if(ringsize) {
f26bf547 4226 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
b243ca3b
RG
4227 if(g_weDistributeQueries)
4228 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries") / g_numDistributorThreads);
f8f243b0 4229 else
3ddb9247 4230 t_remotes->set_capacity(ringsize);
f26bf547 4231 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 4232 t_servfailremotes->set_capacity(ringsize);
66f2e6ad
KM
4233 t_bogusremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4234 t_bogusremotes->set_capacity(ringsize);
f26bf547 4235 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 4236 t_largeanswerremotes->set_capacity(ringsize);
621ccf89 4237 t_timeouts = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4238 t_timeouts->set_capacity(ringsize);
92011b8f 4239
f26bf547 4240 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 4241 t_queryring->set_capacity(ringsize);
f26bf547 4242 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 4243 t_servfailqueryring->set_capacity(ringsize);
66f2e6ad
KM
4244 t_bogusqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4245 t_bogusqueryring->set_capacity(ringsize);
92011b8f 4246 }
3ddb9247 4247
f26bf547 4248 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
144040be 4249 threadInfo.mt = MT.get();
3ddb9247 4250
63341e8d
RG
4251#ifdef HAVE_PROTOBUF
4252 /* start protobuf export threads if needed */
4253 auto luaconfsLocal = g_luaconfs.getLocal();
4254 checkProtobufExport(luaconfsLocal);
4255 checkOutgoingProtobufExport(luaconfsLocal);
4256#endif /* HAVE_PROTOBUF */
b9fa43e0
OM
4257#ifdef HAVE_FSTRM
4258 checkFrameStreamExport(luaconfsLocal);
4259#endif
63341e8d 4260
bb4bdbaf
BH
4261 PacketID pident;
4262
4263 t_fdm=getMultiplexer();
d77abca1 4264
b243ca3b 4265 if(threadInfo.isHandler) {
d07bf7ff 4266 if(::arg().mustDo("webserver")) {
e6a9dde5 4267 g_log<<Logger::Warning << "Enabling web server" << endl;
8989097d 4268 try {
1ce57618 4269 new RecursorWebServer(t_fdm);
8989097d
CH
4270 }
4271 catch(PDNSException &e) {
e6a9dde5 4272 g_log<<Logger::Error<<"Exception: "<<e.reason<<endl;
8989097d
CH
4273 exit(99);
4274 }
f3d1d67b 4275 }
377602e3 4276 g_log<<Logger::Info<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
f3d1d67b 4277 }
810ff705 4278 else {
d77abca1 4279
b243ca3b
RG
4280 t_fdm->addReadFD(threadInfo.pipes.readToThread, handlePipeRequest);
4281 t_fdm->addReadFD(threadInfo.pipes.readQueriesToThread, handlePipeRequest);
4282
4283 if (threadInfo.isListener) {
4284 if (g_reusePort) {
4285 /* then every listener has its own FDs */
4286 for(const auto deferred : threadInfo.deferredAdds) {
4287 t_fdm->addReadFD(deferred.first, deferred.second);
4288 }
810ff705 4289 }
b243ca3b
RG
4290 else {
4291 /* otherwise all listeners are listening on the same ones */
4292 for(const auto deferred : g_deferredAdds) {
4293 t_fdm->addReadFD(deferred.first, deferred.second);
d77abca1
RG
4294 }
4295 }
4296 }
810ff705 4297 }
3ddb9247 4298
b0b37121 4299 registerAllStats();
d77abca1 4300
b243ca3b 4301 if(threadInfo.isHandler) {
674cf0f6
BH
4302 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
4303 }
1bc3c142 4304
f7c1d4e3 4305 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3ddb9247 4306
f7c1d4e3 4307 bool listenOnTCP(true);
49a699c4 4308
cb1523d1 4309 time_t last_stat = 0;
a2f87dd1 4310 time_t last_carbon=0, last_lua_maintenance=0;
2c78bd57 4311 time_t carbonInterval=::arg().asNum("carbon-interval");
a2f87dd1 4312 time_t luaMaintenanceInterval=::arg().asNum("lua-maintenance-interval");
ac0995bb 4313 counter.store(0); // used to periodically execute certain tasks
f7c1d4e3 4314 for(;;) {
ac0e821b 4315 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3ddb9247 4316
3427fa8a
BH
4317 if(!(counter%500)) {
4318 MT->makeThread(houseKeeping, 0);
f7c1d4e3
BH
4319 }
4320
d2392145 4321 if(!(counter%55)) {
d8f6d49f 4322 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
bb4bdbaf 4323 expired_t expired=t_fdm->getTimeouts(g_now);
3ddb9247 4324
f7c1d4e3 4325 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
cd989c87 4326 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4957a608 4327 if(g_logCommonErrors)
e6a9dde5 4328 g_log<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4957a608 4329 t_fdm->removeReadFD(i->first);
f7c1d4e3
BH
4330 }
4331 }
3ddb9247 4332
f7c1d4e3
BH
4333 counter++;
4334
b243ca3b 4335 if(threadInfo.isHandler) {
cb1523d1
RG
4336 if(statsWanted || (g_statisticsInterval > 0 && (g_now.tv_sec - last_stat) >= g_statisticsInterval)) {
4337 doStats();
4338 last_stat = g_now.tv_sec;
4339 }
f7c1d4e3 4340
cb1523d1 4341 Utility::gettimeofday(&g_now, 0);
2c78bd57 4342
cb1523d1
RG
4343 if((g_now.tv_sec - last_carbon) >= carbonInterval) {
4344 MT->makeThread(doCarbonDump, 0);
4345 last_carbon = g_now.tv_sec;
4346 }
2c78bd57 4347 }
2a0276a9 4348 if (t_pdl != nullptr) {
9adbe790 4349 // lua-dns-script directive is present, call the maintenance callback if needed
c1751a59
RG
4350 /* remember that the listener threads handle TCP queries */
4351 if (threadInfo.isWorker || threadInfo.isListener) {
2a0276a9
CHB
4352 // Only on threads processing queries
4353 if(g_now.tv_sec - last_lua_maintenance >= luaMaintenanceInterval) {
4354 t_pdl->maintenance();
4355 last_lua_maintenance = g_now.tv_sec;
4356 }
9adbe790 4357 }
a2f87dd1 4358 }
2c78bd57 4359
bb4bdbaf 4360 t_fdm->run(&g_now);
3ea54bf0 4361 // 'run' updates g_now for us
f7c1d4e3 4362
b243ca3b 4363 if(threadInfo.isListener) {
5c889cf5 4364 if(listenOnTCP) {
c47f201b
RG
4365 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
4366 for(const auto fd : threadInfo.tcpSockets) {
4367 t_fdm->removeReadFD(fd);
b243ca3b 4368 }
c47f201b
RG
4369 listenOnTCP=false;
4370 }
f7c1d4e3 4371 }
5c889cf5 4372 else {
c47f201b
RG
4373 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
4374 for(const auto fd : threadInfo.tcpSockets) {
4375 t_fdm->addReadFD(fd, handleNewTCPQuestion);
b243ca3b 4376 }
c47f201b
RG
4377 listenOnTCP=true;
4378 }
f7c1d4e3
BH
4379 }
4380 }
4381 }
4382}
3f81d239 4383catch(PDNSException &ae) {
e6a9dde5 4384 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
bb4bdbaf
BH
4385 return 0;
4386}
4387catch(std::exception &e) {
e6a9dde5 4388 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
bb4bdbaf
BH
4389 return 0;
4390}
4391catch(...) {
e6a9dde5 4392 g_log<<Logger::Error<<"any other exception in main: "<<endl;
bb4bdbaf
BH
4393 return 0;
4394}
4395
51e2144e 4396
3ddb9247 4397int main(int argc, char **argv)
288f4aa9 4398{
dbd23fc2
BH
4399 g_argc = argc;
4400 g_argv = argv;
5e3de507 4401 g_stats.startupTime=time(0);
b51ef4f9 4402 Utility::srandom();
3e135495 4403 versionSetProduct(ProductRecursor);
8a63d3ce 4404 reportBasicTypes();
0007c2e5 4405 reportOtherTypes();
ea634573 4406
22030c37 4407 int ret = EXIT_SUCCESS;
caa6eefa 4408
288f4aa9 4409 try {
f888311c 4410 ::arg().set("stack-size","stack size per mthread")="200000";
2e3d8a19 4411 ::arg().set("soa-minimum-ttl","Don't change")="0";
2e3d8a19 4412 ::arg().set("no-shuffle","Don't change")="off";
2e3d8a19 4413 ::arg().set("local-port","port to listen on")="53";
32252594 4414 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
fec7dd5a 4415 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
77499b05 4416 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
a6415142 4417 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
c87e1876 4418 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
13c46e62 4419 ::arg().set("signature-inception-skew", "Allow the signature inception to be off by this number of seconds")="60";
d3f809bf 4420 ::arg().set("daemon","Operate as a daemon")="no";
191f2e47 4421 ::arg().setSwitch("write-pid","Write a PID file")="yes";
9afa8662 4422 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
b6cfa948 4423 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
b18fa400 4424 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
22e0810c 4425 ::arg().set("log-common-errors","If we should log rather common errors")="no";
2e3d8a19 4426 ::arg().set("chroot","switch to chroot jail")="";
fe9e7b8d
PL
4427 ::arg().set("setgid","If set, change group id to this gid for more security"
4428#ifdef HAVE_SYSTEMD
4429#define SYSTEMD_SETID_MSG ". When running inside systemd, use the User and Group settings in the unit-file!"
4430 SYSTEMD_SETID_MSG
4431#endif
4432 )="";
4433 ::arg().set("setuid","If set, change user id to this uid for more security"
4434#ifdef HAVE_SYSTEMD
4435 SYSTEMD_SETID_MSG
4436#endif
4437 )="";
c83ee49d 4438 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
bb4bdbaf 4439 ::arg().set("threads", "Launch this number of threads")="2";
b243ca3b 4440 ::arg().set("distributor-threads", "Launch this number of distributor threads, distributing queries to other threads")="0";
adabfcb9 4441 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
5124de27 4442 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
d07bf7ff 4443 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
479e0976 4444 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
479e0976 4445 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
d07bf7ff
PL
4446 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
4447 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
4448 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
be3e1477 4449 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
8ca656a8 4450 ::arg().set("webserver-loglevel", "Amount of logging in the webserver (none, normal, detailed)") = "normal";
cc08b5a9 4451 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
e12f8407 4452 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
2c78bd57 4453 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
f7a645ec
RG
4454 ::arg().set("carbon-namespace", "If set overwrites the first part of the carbon string")="pdns";
4455 ::arg().set("carbon-instance", "If set overwrites the the instance name default")="recursor";
4456
0ec489bf 4457 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
c038218b 4458 ::arg().set("quiet","Suppress logging of questions and answers")="";
f27e6356 4459 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
2e3d8a19 4460 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
fdbf35ac
BH
4461 ::arg().set("socket-owner","Owner of socket")="";
4462 ::arg().set("socket-group","Group of socket")="";
4463 ::arg().set("socket-mode", "Permissions for socket")="";
3ddb9247 4464
0524add9 4465 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+"/pdns-recursor when unset and not chrooted" )="";
2e3d8a19
BH
4466 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
4467 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
d4fb76e9 4468 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
2e3d8a19 4469 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
85c32340 4470 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
2e3d8a19 4471 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
324dc148 4472 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
979edd70 4473 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
559b6c93
PL
4474 ::arg().set("dont-throttle-names", "Do not throttle nameservers with this name or suffix")="";
4475 ::arg().set("dont-throttle-netmasks", "Do not throttle nameservers with this IP netmask")="";
2e3d8a19 4476 ::arg().set("hint-file", "If set, load root hints from this file")="";
b45eb27c 4477 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
a9af3782 4478 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
b9473937 4479 ::arg().set("max-cache-bogus-ttl", "maximum number of seconds to keep a Bogus (positive or negative) cached entry in memory")="3600";
c3e753c7 4480 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
1051f8a9 4481 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
927c12b0 4482 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
1051f8a9 4483 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
950626be 4484 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname, set custom or 'disabled'")="";
92011b8f 4485 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
ba1a571d 4486 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
49a699c4 4487 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
2c95fc65 4488 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
51e2144e 4489 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3ddb9247 4490 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4e120339 4491 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
fde296a3 4492 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
0d5f0a9f 4493 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4ef015cd 4494 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
5605c067 4495 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3e61e7f7 4496 ::arg().set("lua-config-file", "More powerful configuration options")="";
0273d4ab 4497 ::arg().setSwitch("allow-trust-anchor-query", "Allow queries for trustanchor.server CH TXT and negativetrustanchor.server CH TXT")="no";
644dd1da 4498
5605c067 4499 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
927c12b0
BH
4500 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
4501 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
5605c067 4502 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
ac0b4eb3 4503 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3ea54bf0 4504 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
e498dac1 4505 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
4485aa35 4506 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
a2f87dd1 4507 ::arg().set("lua-maintenance-interval", "Number of seconds between calls to the lua user defined maintenance() function")="1";
08f3f638 4508 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3ddb9247 4509 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
35695d18 4510 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
fd8898fb 4511 ::arg().set("ecs-ipv4-cache-bits", "Maximum number of bits of IPv4 mask to cache ECS response")="24";
35695d18 4512 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
fd8898fb 4513 ::arg().set("ecs-ipv6-cache-bits", "Maximum number of bits of IPv6 mask to cache ECS response")="56";
5cf4b2e7 4514 ::arg().set("ecs-minimum-ttl-override", "Set under adverse conditions, a minimum TTL for records in ECS-specific answers")="0";
ed9019c9 4515 ::arg().set("ecs-cache-limit-ttl", "Minimum TTL to cache ECS response")="0";
3f975863 4516 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
2fe3354d 4517 ::arg().set("ecs-add-for", "List of client netmasks for which EDNS Client Subnet will be added")="0.0.0.0/0, ::/0, " LOCAL_NETS_INVERSE;
8a3a3822 4518 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
a16c4536 4519 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
e498dac1 4520 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
4ca2d205 4521 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
e661a20b 4522 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
b3adda56 4523 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
00b8cadc 4524 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
54c36063
PL
4525 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1232";
4526 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1232";
aadceba8 4527 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
173d790e 4528 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
c5950146 4529 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
7c3398aa 4530 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
78227847 4531 ::arg().set("max-udp-queries-per-round", "Maximum number of UDP queries processed per recvmsg() round, before returning back to normal processing")="10000";
c29d820c 4532 ::arg().set("protobuf-use-kernel-timestamp", "Compute the latency of queries in protobuf messages by using the timestamp set by the kernel when the query was received (when available)")="";
ee271fc4 4533 ::arg().set("distribution-pipe-buffer-size", "Size in bytes of the internal buffer of the pipe used by the distributor to pass incoming queries to a worker thread")="0";
a09a8ce0 4534
68e6df3c 4535 ::arg().set("include-dir","Include *.conf files from this directory")="";
d67620e4 4536 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
2332f42d 4537
4538 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
2e3d8a19 4539
d705aad9 4540 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
396f126e 4541 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
d705aad9 4542
72259676
RG
4543 std::string defaultBlacklistedStats = "cache-bytes, packetcache-bytes, special-memory-usage";
4544 for (size_t idx = 0; idx < 32; idx++) {
4545 defaultBlacklistedStats += ", ecs-v4-response-bits-" + std::to_string(idx + 1);
4546 }
4547 for (size_t idx = 0; idx < 128; idx++) {
4548 defaultBlacklistedStats += ", ecs-v6-response-bits-" + std::to_string(idx + 1);
4549 }
563517f3
RG
4550 ::arg().set("stats-api-blacklist", "List of statistics that are disabled when retrieving the complete list of statistics via the API")=defaultBlacklistedStats;
4551 ::arg().set("stats-carbon-blacklist", "List of statistics that are prevented from being exported via Carbon")=defaultBlacklistedStats;
4552 ::arg().set("stats-rec-control-blacklist", "List of statistics that are prevented from being exported via rec_control get-all")=defaultBlacklistedStats;
4553 ::arg().set("stats-snmp-blacklist", "List of statistics that are prevented from being exported via SNMP")=defaultBlacklistedStats;
d705aad9 4554
0735b17e 4555 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
d377bb54 4556 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
0735b17e 4557
8fd25133
RG
4558 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
4559
98d36505
RG
4560 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
4561
5cc8371b 4562 ::arg().set("xpf-allow-from","XPF information is only processed from these subnets")="";
59cb4a79 4563 ::arg().set("xpf-rr-code","XPF option code to use")="0";
5cc8371b 4564
58da9034 4565 ::arg().set("udp-source-port-min", "Minimum UDP port to bind on")="1024";
b47026fd
CHB
4566 ::arg().set("udp-source-port-max", "Maximum UDP port to bind on")="65535";
4567 ::arg().set("udp-source-port-avoid", "List of comma separated UDP port number to avoid")="11211";
e97cb679 4568 ::arg().set("rng", "Specify random number generator to use. Valid values are auto,sodium,openssl,getrandom,arc4random,urandom.")="auto";
d6f3fcfa 4569 ::arg().set("public-suffix-list-file", "Path to the Public Suffix List file, if any")="";
144040be 4570 ::arg().set("distribution-load-factor", "The load factor used when PowerDNS is distributing queries to worker threads")="0.0";
116d1288 4571 ::arg().setSwitch("qname-minimization", "Use Query Name Minimization")="no";
af1377b7
NC
4572#ifdef NOD_ENABLED
4573 ::arg().set("new-domain-tracking", "Track newly observed domains (i.e. never seen before).")="no";
4574 ::arg().set("new-domain-log", "Log newly observed domains.")="yes";
4575 ::arg().set("new-domain-lookup", "Perform a DNS lookup newly observed domains as a subdomain of the configured domain")="";
4576 ::arg().set("new-domain-history-dir", "Persist new domain tracking data here to persist between restarts")=string(NODCACHEDIR)+"/nod";
4577 ::arg().set("new-domain-whitelist", "List of domains (and implicitly all subdomains) which will never be considered a new domain")="";
b78727c6 4578 ::arg().set("new-domain-db-size", "Size of the DB used to track new domains in terms of number of cells. Defaults to 67108864")="67108864";
ca2526f5 4579 ::arg().set("new-domain-pb-tag", "If protobuf is configured, the tag to use for messages containing newly observed domains. Defaults to 'pdns-nod'")="pdns-nod";
41c542ec
NC
4580 ::arg().set("unique-response-tracking", "Track unique responses (tuple of query name, type and RR).")="no";
4581 ::arg().set("unique-response-log", "Log unique responses")="yes";
4582 ::arg().set("unique-response-history-dir", "Persist unique response tracking data here to persist between restarts")=string(NODCACHEDIR)+"/udr";
b78727c6 4583 ::arg().set("unique-response-db-size", "Size of the DB used to track unique responses in terms of number of cells. Defaults to 67108864")="67108864";
ca2526f5 4584 ::arg().set("unique-response-pb-tag", "If protobuf is configured, the tag to use for messages containing unique DNS responses. Defaults to 'pdns-udr'")="pdns-udr";
af1377b7 4585#endif /* NOD_ENABLED */
2e3d8a19 4586 ::arg().setCmd("help","Provide a helpful message");
ba1a571d 4587 ::arg().setCmd("version","Print version string");
d5141417 4588 ::arg().setCmd("config","Output blank configuration");
e6a9dde5 4589 g_log.toConsole(Logger::Info);
2e3d8a19 4590 ::arg().laxParse(argc,argv); // do a lax parse
c75a6a9e 4591
2d733c0f
CH
4592 string configname=::arg()["config-dir"]+"/recursor.conf";
4593 if(::arg()["config-name"]!="") {
4594 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
5124de27 4595 s_programname+="-"+::arg()["config-name"];
2d733c0f
CH
4596 }
4597 cleanSlashes(configname);
5124de27 4598
5cc1ea1d
CH
4599 if(!::arg().getCommands().empty()) {
4600 cerr<<"Fatal: non-option on the command line, perhaps a '--setting=123' statement missed the '='?"<<endl;
4601 exit(99);
4602 }
4603
577cf284
BH
4604 if(::arg().mustDo("config")) {
4605 cout<<::arg().configstring()<<endl;
4606 exit(0);
4607 }
4608
3ddb9247 4609 if(!::arg().file(configname.c_str()))
e6a9dde5 4610 g_log<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
c75a6a9e 4611
2e3d8a19 4612 ::arg().parse(argc,argv);
c836dc19 4613
2054afbb
CH
4614 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() ) {
4615 g_log<<Logger::Error<<"Using chroot and enabling the API is not possible"<<endl;
f0f3f0b0
PL
4616 exit(EXIT_FAILURE);
4617 }
4618
4619 if (::arg()["socket-dir"].empty()) {
4620 if (::arg()["chroot"].empty())
0524add9 4621 ::arg().set("socket-dir") = std::string(LOCALSTATEDIR) + "/pdns-recursor";
f0f3f0b0
PL
4622 else
4623 ::arg().set("socket-dir") = "/";
4624 }
4625
2e3d8a19 4626 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
562588a3 4627
b243ca3b
RG
4628 if(::arg().asNum("threads")==1) {
4629 if (::arg().mustDo("pdns-distributes-queries")) {
4630 g_log<<Logger::Warning<<"Only one thread, no need to distribute queries ourselves"<<endl;
4631 ::arg().set("pdns-distributes-queries")="no";
4632 }
4633 }
4634
4635 if(::arg().mustDo("pdns-distributes-queries") && ::arg().asNum("distributor-threads") <= 0) {
4636 g_log<<Logger::Warning<<"Asked to run with pdns-distributes-queries set but no distributor threads, raising to 1"<<endl;
4637 ::arg().set("distributor-threads")="1";
4638 }
4639
4640 if (!::arg().mustDo("pdns-distributes-queries")) {
4641 ::arg().set("distributor-threads")="0";
4642 }
61d74169 4643
2e3d8a19 4644 if(::arg().mustDo("help")) {
ff5ba4f9
WA
4645 cout<<"syntax:"<<endl<<endl;
4646 cout<<::arg().helpstring(::arg()["help"])<<endl;
4647 exit(0);
b636533b 4648 }
5e3de507 4649 if(::arg().mustDo("version")) {
ba1a571d 4650 showProductVersion();
3613a51c 4651 showBuildConfiguration();
67076869 4652 exit(0);
5e3de507 4653 }
b636533b 4654
34162f8f 4655 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
f48d7b65 4656
34162f8f
CH
4657 if (logUrgency < Logger::Error)
4658 logUrgency = Logger::Error;
f48d7b65 4659 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
4660 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
4661 }
e6a9dde5
PL
4662 g_log.setLoglevel(logUrgency);
4663 g_log.toConsole(logUrgency);
34162f8f 4664
f7c1d4e3 4665 serviceMain(argc, argv);
288f4aa9 4666 }
3f81d239 4667 catch(PDNSException &ae) {
e6a9dde5 4668 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
22030c37 4669 ret=EXIT_FAILURE;
288f4aa9 4670 }
fdbf35ac 4671 catch(std::exception &e) {
e6a9dde5 4672 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
22030c37 4673 ret=EXIT_FAILURE;
288f4aa9
BH
4674 }
4675 catch(...) {
e6a9dde5 4676 g_log<<Logger::Error<<"any other exception in main: "<<endl;
22030c37 4677 ret=EXIT_FAILURE;
288f4aa9 4678 }
3ddb9247 4679
22030c37 4680 return ret;
288f4aa9 4681}