]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/pdns_recursor.cc
auth docs: talk about glibc & MALLOC_ARENA_MAX
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
CommitLineData
288f4aa9 1/*
6edbf68a
PL
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
870a0fe4
AT
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
3e61e7f7 25
76473b92
KM
26#include <netdb.h>
27#include <sys/stat.h>
28#include <unistd.h>
f097141b 29#ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
b47026fd 30#include <boost/container/flat_set.hpp>
f097141b 31#endif
2470b36e 32#include "ws-recursor.hh"
c390b2da 33#include <thread>
519f5484 34#include "threadname.hh"
3ea54bf0 35#include "recpacketcache.hh"
3ddb9247 36#include "utility.hh"
51e2144e 37#include "dns_random.hh"
d1b28475
KM
38#ifdef HAVE_LIBSODIUM
39#include <sodium.h>
40#endif
3afde9b2 41#include "opensslsigners.hh"
288f4aa9
BH
42#include <iostream>
43#include <errno.h>
81859ba5 44#include <boost/static_assert.hpp>
288f4aa9
BH
45#include <map>
46#include <set>
97bb160b 47#include "recursor_cache.hh"
38c9ceaa 48#include "cachecleaner.hh"
288f4aa9 49#include <stdio.h>
c75a6a9e 50#include <signal.h>
288f4aa9 51#include <stdlib.h>
bb4bdbaf 52#include "misc.hh"
288f4aa9
BH
53#include "mtasker.hh"
54#include <utility>
288f4aa9
BH
55#include "arguments.hh"
56#include "syncres.hh"
88def049
BH
57#include <fcntl.h>
58#include <fstream>
3e61e7f7 59#include "sortlist.hh"
5c633640
BH
60#include "sstuff.hh"
61#include <boost/tuple/tuple.hpp>
62#include <boost/tuple/tuple_comparison.hpp>
72df400f 63#include <boost/shared_array.hpp>
7f1fa77d 64#include <boost/function.hpp>
5605c067 65#include <boost/algorithm/string.hpp>
8f7473d7 66#ifdef MALLOC_TRACE
67#include "malloctrace.hh"
68#endif
40a3dd64 69#include <netinet/tcp.h>
f12666f2 70#include "capabilities.hh"
ea634573
BH
71#include "dnsparser.hh"
72#include "dnswriter.hh"
73#include "dnsrecords.hh"
f814d7c8 74#include "zoneparser-tng.hh"
1d5b3ce6 75#include "rec_channel.hh"
aaacf7f2 76#include "logger.hh"
c8ddb7c2 77#include "iputils.hh"
09e6702a 78#include "mplexer.hh"
c038218b 79#include "config.h"
808c5ef7 80#include "lua-recursor4.hh"
ba1a571d 81#include "version.hh"
79332bff 82#include "responsestats.hh"
d67620e4 83#include "secpoll-recursor.hh"
c5c066bf 84#include "dnsname.hh"
644dd1da 85#include "filterpo.hh"
86#include "rpzloader.hh"
b3f0ed10 87#include "validate-recursor.hh"
f3c18728 88#include "rec-lua-conf.hh"
5c3b5e7f 89#include "ednsoptions.hh"
85c7ca75 90#include "gettime.hh"
d6f3fcfa 91#include "pubsuffix.hh"
af1377b7
NC
92#ifdef NOD_ENABLED
93#include "nod.hh"
94#endif /* NOD_ENABLED */
f3c18728 95
d9d3f9c1 96#include "rec-protobuf.hh"
d705aad9 97#include "rec-snmp.hh"
aa7929a3 98
6b6720de
PL
99#ifdef HAVE_SYSTEMD
100#include <systemd/sd-daemon.h>
101#endif
102
d187038c
RG
103#include "namespaces.hh"
104
d61aa945
RG
105#ifdef HAVE_PROTOBUF
106#include "uuid-utils.hh"
b9fa43e0 107#endif /* HAVE_PROTOBUF */
d61aa945 108
5cc8371b
RG
109#include "xpf.hh"
110
d187038c
RG
111typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
112
f26bf547 113static thread_local std::shared_ptr<RecursorLua4> t_pdl;
b243ca3b 114static thread_local unsigned int t_id = 0;
f26bf547
RG
115static thread_local std::shared_ptr<Regex> t_traceRegex;
116static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
63341e8d 117#ifdef HAVE_PROTOBUF
3fe06137 118static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_protobufServers{nullptr};
b773359c 119static thread_local uint64_t t_protobufServersGeneration;
3fe06137 120static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_outgoingProtobufServers{nullptr};
b773359c 121static thread_local uint64_t t_outgoingProtobufServersGeneration;
63341e8d 122#endif /* HAVE_PROTOBUF */
f26bf547 123
b9fa43e0 124#ifdef HAVE_FSTRM
10ba6d01 125static thread_local std::shared_ptr<std::vector<std::unique_ptr<FrameStreamLogger>>> t_frameStreamServers{nullptr};
b9fa43e0
OM
126static thread_local uint64_t t_frameStreamServersGeneration;
127#endif /* HAVE_FSTRM */
128
f26bf547 129thread_local std::unique_ptr<MT_t> MT; // the big MTasker
7ce9aad6 130std::unique_ptr<MemRecursorCache> s_RC;
a7956123
OM
131
132
f26bf547 133thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
3337c2f7 134thread_local FDMultiplexer* t_fdm{nullptr};
be9078b3 135thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes, t_bogusremotes;
66f2e6ad 136thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring, t_bogusqueryring;
f26bf547 137thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
af1377b7
NC
138#ifdef NOD_ENABLED
139thread_local std::shared_ptr<nod::NODDB> t_nodDBp;
41c542ec 140thread_local std::shared_ptr<nod::UniqueResponseDB> t_udrDBp;
af1377b7 141#endif /* NOD_ENABLED */
d187038c 142__thread struct timeval g_now; // timestamp, updated (too) frequently
d7dae798 143
b243ca3b
RG
144typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
145
d7dae798 146// for communicating with our threads
b243ca3b
RG
147// effectively readonly after startup
148struct RecThreadInfo
149{
150 struct ThreadPipeSet
151 {
152 int writeToThread{-1};
153 int readToThread{-1};
154 int writeFromThread{-1};
155 int readFromThread{-1};
156 int writeQueriesToThread{-1}; // this one is non-blocking
157 int readQueriesToThread{-1};
158 };
159
adb6cd72 160 /* FD corresponding to TCP sockets this thread is listening
c47f201b 161 on.
adb6cd72
RG
162 These FDs are also in deferredAdds when we have one
163 socket per listener, and in g_deferredAdds instead. */
164 std::set<int> tcpSockets;
b243ca3b
RG
165 /* FD corresponding to listening sockets if we have one socket per
166 listener (with reuseport), otherwise all listeners share the
167 same FD and g_deferredAdds is then used instead */
168 deferredAdd_t deferredAdds;
169 struct ThreadPipeSet pipes;
170 std::thread thread;
144040be
RG
171 MT_t* mt{nullptr};
172 uint64_t numberOfDistributedQueries{0};
b243ca3b
RG
173 /* handle the web server, carbon, statistics and the control channel */
174 bool isHandler{false};
175 /* accept incoming queries (and distributes them to the workers if pdns-distributes-queries is set) */
176 bool isListener{false};
177 /* process queries */
178 bool isWorker{false};
49a699c4 179};
810ff705 180
b243ca3b
RG
181/* first we have the handler thread, t_id == 0 (some other
182 helper threads like SNMP might have t_id == 0 as well)
183 then the distributor threads if any
184 and finally the workers */
185static std::vector<RecThreadInfo> s_threadInfos;
186/* without reuseport, all listeners share the same sockets */
187static deferredAdd_t g_deferredAdds;
faf580f5 188
d187038c
RG
189typedef vector<int> tcpListenSockets_t;
190typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
3ea54bf0 191
d187038c 192static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
d187038c 193static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
d187038c
RG
194static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
195static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
196static AtomicCounter counter;
9065eb05 197static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
f26bf547 198static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
5cc8371b 199static NetmaskGroup g_XPFAcl;
d187038c 200static size_t g_tcpMaxQueriesPerConn;
a5886e6a 201static size_t s_maxUDPQueriesPerRound;
d187038c
RG
202static uint64_t g_latencyStatSize;
203static uint32_t g_disthashseed;
204static unsigned int g_maxTCPPerClient;
d187038c 205static unsigned int g_maxMThreads;
b243ca3b 206static unsigned int g_numDistributorThreads;
d187038c
RG
207static unsigned int g_numWorkerThreads;
208static int g_tcpTimeout;
209static uint16_t g_udpTruncationThreshold;
59cb4a79 210static uint16_t g_xpfRRCode{0};
d187038c
RG
211static std::atomic<bool> statsWanted;
212static std::atomic<bool> g_quiet;
213static bool g_logCommonErrors;
214static bool g_anyToTcp;
b243ca3b 215static bool g_weDistributeQueries; // if true, 1 or more threads listen on the incoming query sockets and distribute them to workers
810ff705 216static bool g_reusePort{false};
00b8cadc 217static bool g_gettagNeedsEDNSOptions{false};
0ec489bf 218static time_t g_statisticsInterval;
9065eb05 219static bool g_useIncomingECS;
c29d820c 220static bool g_useKernelTimestamp;
a6f7f5fe 221std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
af1377b7
NC
222#ifdef NOD_ENABLED
223static bool g_nodEnabled;
224static DNSName g_nodLookupDomain;
225static bool g_nodLog;
226static SuffixMatchNode g_nodDomainWL;
ca2526f5 227static std::string g_nod_pbtag;
41c542ec
NC
228static bool g_udrEnabled;
229static bool g_udrLog;
ca2526f5 230static std::string g_udr_pbtag;
af1377b7 231#endif /* NOD_ENABLED */
f097141b 232#ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
bf6f28ca 233static boost::container::flat_set<uint16_t> s_avoidUdpSourcePorts;
f097141b
CHB
234#else
235static std::set<uint16_t> s_avoidUdpSourcePorts;
236#endif
bf6f28ca
CHB
237static uint16_t s_minUdpSourcePort;
238static uint16_t s_maxUdpSourcePort;
144040be 239static double s_balancingFactor;
49a699c4 240
b243ca3b 241RecursorControlChannel s_rcc; // only active in the handler thread
d187038c 242RecursorStats g_stats;
2d733c0f 243string s_programname="pdns_recursor";
d187038c 244string s_pidfname;
c1c29961 245bool g_lowercaseOutgoing;
bf19ccfd 246unsigned int g_networkTimeoutMsec;
d187038c
RG
247unsigned int g_numThreads;
248uint16_t g_outgoingEDNSBufsize;
98d36505 249bool g_logRPZChanges{false};
c3828c03 250
559b6c93
PL
251// Used in the Syncres to not throttle certain servers
252GlobalStateHolder<SuffixMatchNode> g_dontThrottleNames;
253GlobalStateHolder<NetmaskGroup> g_dontThrottleNetmasks;
254
12cd44ee 255#define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
2fe3354d 256#define LOCAL_NETS_INVERSE "!127.0.0.0/8, !10.0.0.0/8, !100.64.0.0/10, !169.254.0.0/16, !192.168.0.0/16, !172.16.0.0/12, !::1/128, !fc00::/7, !fe80::/10"
12cd44ee 257// Bad Nets taken from both:
3ddb9247 258// http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
12cd44ee 259// and
260// http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
261// where such a network may not be considered a valid destination
262#define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
263#define DONT_QUERY LOCAL_NETS ", " BAD_NETS
49a699c4 264
d7dae798 265//! used to send information to a newborn mthread
ea634573 266struct DNSComboWriter {
08b02366 267 DNSComboWriter(const std::string& query, const struct timeval& now): d_mdp(true, query), d_now(now), d_query(query)
2749c3fe
RG
268 {
269 }
5cc8371b 270
37a919d4 271 DNSComboWriter(const std::string& query, const struct timeval& now, std::vector<std::string>&& policyTags, LuaContext::LuaObject&& data, std::vector<DNSRecord>&& records): d_mdp(true, query), d_now(now), d_query(query), d_policyTags(std::move(policyTags)), d_records(std::move(records)), d_data(std::move(data))
5164bac3
RG
272 {
273 }
274
5cc8371b
RG
275 void setRemote(const ComboAddress& sa)
276 {
277 d_remote=sa;
278 }
279
280 void setSource(const ComboAddress& sa)
ea634573 281 {
5cc8371b 282 d_source=sa;
ea634573
BH
283 }
284
b71b60ee 285 void setLocal(const ComboAddress& sa)
286 {
287 d_local=sa;
288 }
289
5cc8371b
RG
290 void setDestination(const ComboAddress& sa)
291 {
292 d_destination=sa;
293 }
b71b60ee 294
ea634573
BH
295 void setSocket(int sock)
296 {
297 d_socket=sock;
298 }
a1754c6a
BH
299
300 string getRemote() const
301 {
5cc8371b
RG
302 if (d_source == d_remote) {
303 return d_source.toStringWithPort();
304 }
305 return d_source.toStringWithPort() + " (proxied by " + d_remote.toStringWithPort() + ")";
a1754c6a
BH
306 }
307
5cc8371b 308 MOADNSParser d_mdp;
c9e9e5e0 309 struct timeval d_now;
5cc8371b
RG
310 /* Remote client, might differ from d_source
311 in case of XPF, in which case d_source holds
312 the IP of the client and d_remote of the proxy
313 */
314 ComboAddress d_remote;
315 ComboAddress d_source;
316 /* Destination address, might differ from
317 d_destination in case of XPF, in which case
318 d_destination holds the IP of the proxy and
319 d_local holds our own. */
320 ComboAddress d_local;
321 ComboAddress d_destination;
aa7929a3
RG
322#ifdef HAVE_PROTOBUF
323 boost::uuids::uuid d_uuid;
67e31ebe 324 string d_requestorId;
590388d2 325 string d_deviceId;
0a6a45c8 326 string d_deviceName;
c29d820c 327 struct timeval d_kernelTimestamp{0,0};
aa7929a3 328#endif
08b02366 329 std::string d_query;
5164bac3 330 std::vector<std::string> d_policyTags;
37a919d4 331 std::vector<DNSRecord> d_records;
5164bac3 332 LuaContext::LuaObject d_data;
b40562da 333 EDNSSubnetOpts d_ednssubnet;
5164bac3 334 shared_ptr<TCPConnection> d_tcpConnection;
37a919d4 335 boost::optional<int> d_rcode{boost::none};
e53b77e2 336 int d_socket{-1};
b673817a 337 unsigned int d_tag{0};
e9f63d47 338 uint32_t d_qhash{0};
70fb28d9 339 uint32_t d_ttlCap{std::numeric_limits<uint32_t>::max()};
08b02366
RG
340 uint16_t d_ecsBegin{0};
341 uint16_t d_ecsEnd{0};
70fb28d9 342 bool d_variable{false};
5164bac3
RG
343 bool d_ecsFound{false};
344 bool d_ecsParsed{false};
37a919d4 345 bool d_followCNAMERecords{false};
406b722e 346 bool d_logResponse{false};
a73da04b 347 bool d_tcp{false};
ea634573
BH
348};
349
06857845
RG
350MT_t* getMT()
351{
352 return MT ? MT.get() : nullptr;
353}
ea634573 354
288f4aa9
BH
355ArgvMap &arg()
356{
357 static ArgvMap theArg;
358 return theArg;
359}
4ef015cd 360
8fb594ba 361unsigned int getRecursorThreadId()
b4015453 362{
30da2030 363 return t_id;
b4015453 364}
09e6702a 365
30ee601a
RG
366int getMTaskerTID()
367{
368 return MT->getTid();
369}
370
b243ca3b
RG
371static bool isDistributorThread()
372{
373 if (t_id == 0) {
374 return false;
375 }
376
377 return g_weDistributeQueries && s_threadInfos.at(t_id).isListener;
378}
379
380static bool isHandlerThread()
381{
382 if (t_id == 0) {
383 return true;
384 }
385
386 return s_threadInfos.at(t_id).isHandler;
387}
388
d187038c 389static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 390
50c81227 391// -1 is error, 0 is timeout, 1 is success
3ddb9247 392int asendtcp(const string& data, Socket* sock)
5c633640
BH
393{
394 PacketID pident;
395 pident.sock=sock;
396 pident.outMSG=data;
3ddb9247 397
bb4bdbaf 398 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
50c81227 399 string packet;
5c633640 400
5b0ddd18 401 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
23db0a09 402
9170fbaf 403 if(!ret || ret==-1) { // timeout
bb4bdbaf 404 t_fdm->removeWriteFD(sock->getHandle());
5c633640 405 }
50c81227
BH
406 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
407 return -1;
408 }
9170fbaf 409 return ret;
5c633640
BH
410}
411
d187038c 412static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 413
9170fbaf 414// -1 is error, 0 is timeout, 1 is success
a683e8bd 415int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
288f4aa9 416{
50c81227 417 data.clear();
5c633640
BH
418 PacketID pident;
419 pident.sock=sock;
420 pident.inNeeded=len;
825fa717 421 pident.inIncompleteOkay=incompleteOkay;
bb4bdbaf 422 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
5c633640 423
bb4bdbaf 424 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
9170fbaf 425 if(!ret || ret==-1) { // timeout
bb4bdbaf 426 t_fdm->removeReadFD(sock->getHandle());
288f4aa9 427 }
50c81227
BH
428 else if(data.empty()) {// error, EOF or other
429 return -1;
430 }
431
9170fbaf 432 return ret;
288f4aa9
BH
433}
434
d187038c 435static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
4465e941 436{
fba1e944 437 PacketID pident=*any_cast<PacketID>(&var);
4465e941 438 char resp[512];
7c77ce63
RG
439 ComboAddress fromaddr;
440 socklen_t addrlen=sizeof(fromaddr);
441
442 ssize_t ret=recvfrom(fd, resp, sizeof(resp), 0, (sockaddr *)&fromaddr, &addrlen);
443 if (fromaddr != pident.remote) {
e6a9dde5 444 g_log<<Logger::Notice<<"Response received from the wrong remote host ("<<fromaddr.toStringWithPort()<<" instead of "<<pident.remote.toStringWithPort()<<"), discarding"<<endl;
7c77ce63
RG
445
446 }
447
4465e941 448 t_fdm->removeReadFD(fd);
449 if(ret >= 0) {
a683e8bd 450 string data(resp, (size_t) ret);
fba1e944 451 MT->sendEvent(pident, &data);
4465e941 452 }
453 else {
fba1e944 454 string empty;
455 MT->sendEvent(pident, &empty);
04360367 456 // cerr<<"Had some kind of error: "<<ret<<", "<<stringerror()<<endl;
4465e941 457 }
458}
fba1e944 459string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
4465e941 460{
4465e941 461 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
462 s.setNonBlocking();
463 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
464
465 s.bind(local);
466 s.connect(dest);
4465e941 467 s.send(query);
468
469 PacketID pident;
470 pident.sock=&s;
7c77ce63 471 pident.remote=dest;
4465e941 472 pident.type=0;
fba1e944 473 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
4465e941 474
475 string data;
fba1e944 476
4465e941 477 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
fba1e944 478
4465e941 479 if(!ret || ret==-1) { // timeout
4465e941 480 t_fdm->removeReadFD(s.getHandle());
481 }
482 else if(data.empty()) {// error, EOF or other
fba1e944 483 // we could special case this
4465e941 484 return data;
485 }
4465e941 486 return data;
487}
488
d7dae798 489//! pick a random query local address
1652a63e 490ComboAddress getQueryLocalAddress(int family, uint16_t port)
5a38281c 491{
1652a63e 492 ComboAddress ret;
5a38281c 493 if(family==AF_INET) {
3ddb9247 494 if(g_localQueryAddresses4.empty())
1652a63e 495 ret = g_local4;
3ddb9247 496 else
1652a63e
BH
497 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
498 ret.sin4.sin_port = htons(port);
5a38281c
BH
499 }
500 else {
501 if(g_localQueryAddresses6.empty())
1652a63e
BH
502 ret = g_local6;
503 else
504 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
3ddb9247 505
1652a63e 506 ret.sin6.sin6_port = htons(port);
5a38281c 507 }
1652a63e 508 return ret;
5a38281c 509}
4ef015cd 510
d187038c 511static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
09e6702a 512
d187038c 513static void setSocketBuffer(int fd, int optname, uint32_t size)
d7dae798
BH
514{
515 uint32_t psize=0;
516 socklen_t len=sizeof(psize);
3ddb9247 517
d7dae798 518 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
e6a9dde5 519 g_log<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
3ddb9247 520 return;
d7dae798
BH
521 }
522
a2a81d42
OM
523 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0) {
524 int err = errno;
296ddbfe 525 g_log << Logger::Error << "Unable to raise socket buffer size to " << size << ": " << stringerror(err) << endl;
a2a81d42 526 }
d7dae798
BH
527}
528
529
530static void setSocketReceiveBuffer(int fd, uint32_t size)
531{
532 setSocketBuffer(fd, SO_RCVBUF, size);
533}
534
535static void setSocketSendBuffer(int fd, uint32_t size)
536{
537 setSocketBuffer(fd, SO_SNDBUF, size);
538}
539
540
4ef015cd
BH
541// you can ask this class for a UDP socket to send a query from
542// this socket is not yours, don't even think about deleting it
543// but after you call 'returnSocket' on it, don't assume anything anymore
544class UDPClientSocks
545{
4ef015cd 546 unsigned int d_numsocks;
4ef015cd 547public:
e2642526 548 UDPClientSocks() : d_numsocks(0)
4ef015cd
BH
549 {
550 }
551
2ee280cf 552 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
d8f6d49f 553 int getSocket(const ComboAddress& toaddr, int* fd)
4ef015cd 554 {
d8f6d49f
BH
555 *fd=makeClientSocket(toaddr.sin4.sin_family);
556 if(*fd < 0) // temporary error - receive exception otherwise
2ee280cf 557 return -2;
d8f6d49f
BH
558
559 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
560 int err = errno;
a7b68ae7
RG
561 try {
562 closesocket(*fd);
563 }
564 catch(const PDNSException& e) {
e6a9dde5 565 g_log<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
a7b68ae7
RG
566 }
567
d8f6d49f 568 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
4957a608 569 return -2;
998a4334 570 return -1;
d8f6d49f 571 }
998a4334 572
998a4334 573 d_numsocks++;
d8f6d49f 574 return 0;
4ef015cd
BH
575 }
576
577 // return a socket to the pool, or simply erase it
2bee9b7c 578 void returnSocket(int fd)
4ef015cd 579 {
80baf329 580 try {
2bee9b7c 581 t_fdm->removeReadFD(fd);
80baf329 582 }
2bee9b7c 583 catch(const FDMultiplexerException& e) {
bb4bdbaf 584 // we sometimes return a socket that has not yet been assigned to t_fdm
80baf329 585 }
2bee9b7c 586
a7b68ae7 587 try {
2bee9b7c 588 closesocket(fd);
a7b68ae7
RG
589 }
590 catch(const PDNSException& e) {
e6a9dde5 591 g_log<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
a7b68ae7 592 }
3ddb9247 593
998a4334 594 --d_numsocks;
4ef015cd 595 }
d8f6d49f 596
2bee9b7c
RG
597private:
598
d8f6d49f 599 // returns -1 for errors which might go away, throws for ones that won't
bb4bdbaf 600 static int makeClientSocket(int family)
d8f6d49f 601 {
a683e8bd 602 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
42c235e5 603
d8f6d49f
BH
604 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
605 return ret;
3ddb9247
PD
606
607 if(ret<0)
335da0ba 608 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
36855b53 609
7eb73ffa 610 // setCloseOnExec(ret); // we're not going to exec
5a38281c 611
d8f6d49f 612 int tries=10;
3aa91c3e 613 ComboAddress sin;
d8f6d49f 614 while(--tries) {
1652a63e 615 uint16_t port;
3ddb9247 616
d8f6d49f 617 if(tries==1) // fall back to kernel 'random'
4957a608 618 port = 0;
bf6f28ca
CHB
619 else {
620 do {
621 port = s_minUdpSourcePort + dns_random(s_maxUdpSourcePort - s_minUdpSourcePort + 1);
622 }
623 while (s_avoidUdpSourcePorts.count(port));
624 }
5a38281c 625
3aa91c3e 626 sin=getQueryLocalAddress(family, port); // does htons for us
5a38281c 627
3ddb9247 628 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
4957a608 629 break;
d8f6d49f 630 }
9ec48f21
RG
631
632 if(!tries) {
633 closesocket(ret);
3aa91c3e 634 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
9ec48f21
RG
635 }
636
637 try {
638 setReceiveSocketErrors(ret, family);
639 setNonBlocking(ret);
640 }
641 catch(...) {
642 closesocket(ret);
643 throw;
644 }
3ddb9247 645
d8f6d49f
BH
646 return ret;
647 }
49a699c4
BH
648};
649
f26bf547 650static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
4ef015cd 651
288f4aa9 652/* these two functions are used by LWRes */
34801ab1 653// -2 is OS error, -1 is error that depends on the remote, > 0 is success
a683e8bd 654int asendto(const char *data, size_t len, int flags,
3ddb9247 655 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
288f4aa9 656{
34801ab1
BH
657
658 PacketID pident;
787e5eab
BH
659 pident.domain = domain;
660 pident.remote = toaddr;
661 pident.type = qtype;
34801ab1
BH
662
663 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
664 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
665
666 for(; chain.first != chain.second; chain.first++) {
667 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
e27e91a8 668 /*
4665c31e
BH
669 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
670 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
4957a608 671 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
e27e91a8 672 */
34801ab1
BH
673 chain.first->key.chain.insert(id); // we can chain
674 *fd=-1; // gets used in waitEvent / sendEvent later on
675 return 1;
676 }
677 }
678
49a699c4 679 int ret=t_udpclientsocks->getSocket(toaddr, fd);
d8f6d49f
BH
680 if(ret < 0)
681 return ret;
34801ab1 682
998a4334
BH
683 pident.fd=*fd;
684 pident.id=id;
3ddb9247 685
bb4bdbaf
BH
686 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
687 ret = send(*fd, data, len, 0);
688
5b0ddd18 689 int tmp = errno;
bb4bdbaf 690
7302ed0a 691 if(ret < 0)
49a699c4 692 t_udpclientsocks->returnSocket(*fd);
bb4bdbaf 693
5b0ddd18 694 errno = tmp; // this is for logging purposes only
7302ed0a 695 return ret;
288f4aa9
BH
696}
697
9170fbaf 698// -1 is error, 0 is timeout, 1 is success
f128d20d 699int arecvfrom(std::string& packet, int flags, const ComboAddress& fromaddr, size_t *d_len,
c5c066bf 700 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
288f4aa9 701{
0d5f0a9f 702 static optional<unsigned int> nearMissLimit;
3ddb9247 703 if(!nearMissLimit)
0d5f0a9f
BH
704 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
705
288f4aa9 706 PacketID pident;
4ef015cd 707 pident.fd=fd;
288f4aa9 708 pident.id=id;
0d5f0a9f 709 pident.domain=domain;
787e5eab 710 pident.type = qtype;
996c89cc 711 pident.remote=fromaddr;
b636533b 712
5b0ddd18 713 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
34801ab1 714
9ec48f21 715 /* -1 means error, 0 means timeout, 1 means a result from handleUDPServerResponse() which might still be an error */
9170fbaf 716 if(ret > 0) {
9ec48f21 717 /* handleUDPServerResponse() will close the socket for us no matter what */
996c89cc 718 if(packet.empty()) // means "error"
3ddb9247 719 return -1;
998a4334 720
a683e8bd 721 *d_len=packet.size();
f128d20d 722
0d5f0a9f 723 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
e6a9dde5 724 g_log<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
0d5f0a9f 725 g_stats.spoofCount++;
35ce8576
BH
726 return -1;
727 }
288f4aa9 728 }
09e6702a 729 else {
9ec48f21 730 /* getting there means error or timeout, it's up to us to close the socket */
34801ab1 731 if(fd >= 0)
49a699c4 732 t_udpclientsocks->returnSocket(fd);
09e6702a 733 }
9170fbaf 734 return ret;
288f4aa9
BH
735}
736
88def049
BH
737static void writePid(void)
738{
191f2e47 739 if(!::arg().mustDo("write-pid"))
740 return;
18e7758c 741 ofstream of(s_pidfname.c_str(), std::ios_base::app);
88def049 742 if(of)
705f31ae 743 of<< Utility::getpid() <<endl;
a2a81d42
OM
744 else {
745 int err = errno;
746 g_log << Logger::Error << "Writing pid for " << Utility::getpid() << " to " << s_pidfname << " failed: "
296ddbfe 747 << stringerror(err) << endl;
a2a81d42 748 }
88def049
BH
749}
750
c51c551e
OM
751uint16_t TCPConnection::s_maxInFlight;
752
2749c3fe 753TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : data(2, 0), d_remote(addr), d_fd(fd)
3ddb9247
PD
754{
755 ++s_currentConnections;
cd989c87 756 (*t_tcpClientCounts)[d_remote]++;
0e408828 757}
cd989c87
BH
758
759TCPConnection::~TCPConnection()
0e408828 760{
a7b68ae7
RG
761 try {
762 if(closesocket(d_fd) < 0)
e6a9dde5 763 g_log<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
a7b68ae7
RG
764 }
765 catch(const PDNSException& e) {
e6a9dde5 766 g_log<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
a7b68ae7
RG
767 }
768
3ddb9247 769 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
cd989c87 770 t_tcpClientCounts->erase(d_remote);
1bc9e6bd 771 --s_currentConnections;
0e408828 772}
0e9d9ce2 773
3ddb9247 774AtomicCounter TCPConnection::s_currentConnections;
d187038c
RG
775
776static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
6dcd28c3 777
92011b8f 778// the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
d187038c 779static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
2cc13433 780{
92011b8f 781 if(packetsize > 1000 && t_largeanswerremotes)
782 t_largeanswerremotes->push_back(remote);
2cc13433
BH
783 switch(res) {
784 case RCode::ServFail:
92011b8f 785 if(t_servfailremotes) {
786 t_servfailremotes->push_back(remote);
5af86fdc 787 if(query && t_servfailqueryring) // packet cache
92011b8f 788 t_servfailqueryring->push_back(make_pair(*query, qtype));
789 }
2cc13433
BH
790 g_stats.servFails++;
791 break;
792 case RCode::NXDomain:
793 g_stats.nxDomains++;
794 break;
795 case RCode::NoError:
796 g_stats.noErrors++;
797 break;
798 }
799}
800
9a864da4 801static string makeLoginfo(const std::unique_ptr<DNSComboWriter>& dc)
a903b39c 802try
803{
5cc8371b 804 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->getRemote())+")";
a903b39c 805}
806catch(...)
807{
808 return "Exception making error message for exception";
809}
810
aa7929a3 811#ifdef HAVE_PROTOBUF
0a6a45c8 812static void protobufLogQuery(uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId, const std::string& deviceName)
aa7929a3 813{
b773359c
RG
814 if (!t_protobufServers) {
815 return;
816 }
817
e1c8a4bb 818 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
79816288 819 ComboAddress requestor = requestorNM.getMaskedNetwork();
5d2e9a83 820 requestor.setPort(remote.getPort());
e1c8a4bb 821 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
c165308b 822 message.setServerIdentity(SyncRes::s_serverID);
d14121a8 823 message.setEDNSSubnet(ednssubnet, ednssubnet.isIPv4() ? maskV4 : maskV6);
67e31ebe 824 message.setRequestorId(requestorId);
590388d2 825 message.setDeviceId(deviceId);
0a6a45c8 826 message.setDeviceName(deviceName);
02b47f43 827
02b47f43 828 if (!policyTags.empty()) {
d9d3f9c1 829 message.setPolicyTags(policyTags);
02b47f43 830 }
aa7929a3 831
d9d3f9c1 832// cerr <<message.toDebugString()<<endl;
aa7929a3 833 std::string str;
d9d3f9c1 834 message.serialize(str);
b773359c
RG
835
836 for (auto& server : *t_protobufServers) {
837 server->queueData(str);
838 }
aa7929a3
RG
839}
840
b773359c 841static void protobufLogResponse(const RecProtoBufMessage& message)
aa7929a3 842{
b773359c
RG
843 if (!t_protobufServers) {
844 return;
845 }
846
d9d3f9c1 847// cerr <<message.toDebugString()<<endl;
aa7929a3 848 std::string str;
d9d3f9c1 849 message.serialize(str);
b773359c
RG
850
851 for (auto& server : *t_protobufServers) {
852 server->queueData(str);
853 }
aa7929a3
RG
854}
855#endif
856
53508135
PL
857/**
858 * Chases the CNAME provided by the PolicyCustom RPZ policy.
859 *
860 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
861 * @param qtype: The QType of the original query
862 * @param sr: A SyncRes
863 * @param res: An integer that will contain the RCODE of the lookup we do
864 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
865 */
d187038c 866static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
53508135
PL
867{
868 if (spoofed.d_type == QType::CNAME) {
30ee601a
RG
869 bool oldWantsRPZ = sr.getWantsRPZ();
870 sr.setWantsRPZ(false);
53508135 871 vector<DNSRecord> ans;
6da513b2 872 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, QClass::IN, ans);
53508135
PL
873 for (const auto& rec : ans) {
874 if(rec.d_place == DNSResourceRecord::ANSWER) {
875 ret.push_back(rec);
876 }
877 }
878 // Reset the RPZ state of the SyncRes
30ee601a 879 sr.setWantsRPZ(oldWantsRPZ);
53508135
PL
880 }
881}
882
70fb28d9 883static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, uint32_t ttlCap, const uint16_t maxAnswerSize)
97c6d7e5 884{
70fb28d9 885 pw.startRecord(rec.d_name, rec.d_type, (rec.d_ttl > ttlCap ? ttlCap : rec.d_ttl), rec.d_class, rec.d_place);
97c6d7e5
RG
886
887 if(rec.d_type != QType::OPT) // their TTL ain't real
888 minTTL = min(minTTL, rec.d_ttl);
889
890 rec.d_content->toPacket(pw);
891 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
892 pw.rollback();
893 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
894 pw.getHeader()->tc=1;
895 pw.truncate();
896 }
897 return false;
898 }
899
900 return true;
901}
902
63341e8d 903#ifdef HAVE_PROTOBUF
3fe06137 904static std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> startProtobufServers(const ProtobufExportConfig& config)
63341e8d 905{
3fe06137 906 auto result = std::make_shared<std::vector<std::unique_ptr<RemoteLogger>>>();
b773359c
RG
907
908 for (const auto& server : config.servers) {
909 try {
5d6c7a46
RG
910 auto logger = make_unique<RemoteLogger>(server, config.timeout, 100*config.maxQueuedEntries, config.reconnectWaitTime, config.asyncConnect);
911 logger->setLogQueries(config.logQueries);
912 logger->setLogResponses(config.logResponses);
913 result->emplace_back(std::move(logger));
b773359c
RG
914 }
915 catch(const std::exception& e) {
916 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.what()<<endl;
917 }
918 catch(const PDNSException& e) {
919 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.reason<<endl;
920 }
63341e8d
RG
921 }
922
923 return result;
924}
925
926static bool checkProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
927{
928 if (!luaconfsLocal->protobufExportConfig.enabled) {
b773359c
RG
929 if (t_protobufServers) {
930 for (auto& server : *t_protobufServers) {
931 server->stop();
932 }
933 t_protobufServers.reset();
63341e8d
RG
934 }
935
936 return false;
937 }
938
939 /* if the server was not running, or if it was running according to a
940 previous configuration */
b773359c
RG
941 if (!t_protobufServers ||
942 t_protobufServersGeneration < luaconfsLocal->generation) {
63341e8d 943
b773359c
RG
944 if (t_protobufServers) {
945 for (auto& server : *t_protobufServers) {
946 server->stop();
947 }
63341e8d 948 }
b773359c 949 t_protobufServers.reset();
63341e8d 950
b773359c
RG
951 t_protobufServers = startProtobufServers(luaconfsLocal->protobufExportConfig);
952 t_protobufServersGeneration = luaconfsLocal->generation;
63341e8d
RG
953 }
954
955 return true;
956}
957
958static bool checkOutgoingProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
959{
960 if (!luaconfsLocal->outgoingProtobufExportConfig.enabled) {
b773359c
RG
961 if (t_outgoingProtobufServers) {
962 for (auto& server : *t_outgoingProtobufServers) {
963 server->stop();
964 }
63341e8d 965 }
b773359c 966 t_outgoingProtobufServers.reset();
63341e8d
RG
967
968 return false;
969 }
970
971 /* if the server was not running, or if it was running according to a
972 previous configuration */
b773359c
RG
973 if (!t_outgoingProtobufServers ||
974 t_outgoingProtobufServersGeneration < luaconfsLocal->generation) {
63341e8d 975
b773359c
RG
976 if (t_outgoingProtobufServers) {
977 for (auto& server : *t_outgoingProtobufServers) {
978 server->stop();
979 }
63341e8d 980 }
b773359c 981 t_outgoingProtobufServers.reset();
63341e8d 982
b773359c
RG
983 t_outgoingProtobufServers = startProtobufServers(luaconfsLocal->outgoingProtobufExportConfig);
984 t_outgoingProtobufServersGeneration = luaconfsLocal->generation;
63341e8d
RG
985 }
986
987 return true;
988}
b9fa43e0
OM
989
990#ifdef HAVE_FSTRM
991
10ba6d01 992static std::shared_ptr<std::vector<std::unique_ptr<FrameStreamLogger>>> startFrameStreamServers(const FrameStreamExportConfig& config)
b9fa43e0 993{
10ba6d01 994 auto result = std::make_shared<std::vector<std::unique_ptr<FrameStreamLogger>>>();
b9fa43e0
OM
995
996 for (const auto& server : config.servers) {
997 try {
573f4ff0
OM
998 std::unordered_map<string,unsigned> options;
999 options["bufferHint"] = config.bufferHint;
1000 options["flushTimeout"] = config.flushTimeout;
1001 options["inputQueueSize"] = config.inputQueueSize;
1002 options["outputQueueSize"] = config.outputQueueSize;
1003 options["queueNotifyThreshold"] = config.queueNotifyThreshold;
1004 options["reopenInterval"] = config.reopenInterval;
dea8a6bc
OM
1005 FrameStreamLogger *fsl = nullptr;
1006 try {
1007 ComboAddress address(server);
1008 fsl = new FrameStreamLogger(address.sin4.sin_family, address.toStringWithPort(), true, options);
1009 }
1010 catch (const PDNSException& e) {
1011 fsl = new FrameStreamLogger(AF_UNIX, server, true, options);
1012 }
573f4ff0
OM
1013 fsl->setLogQueries(config.logQueries);
1014 fsl->setLogResponses(config.logResponses);
1015 result->emplace_back(fsl);
b9fa43e0
OM
1016 }
1017 catch(const std::exception& e) {
1018 g_log<<Logger::Error<<"Error while starting dnstap framestream logger to '"<<server<<": "<<e.what()<<endl;
1019 }
1020 catch(const PDNSException& e) {
1021 g_log<<Logger::Error<<"Error while starting dnstap framestream logger to '"<<server<<": "<<e.reason<<endl;
1022 }
1023 }
1024
1025 return result;
1026}
1027
1028static bool checkFrameStreamExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
1029{
1030 if (!luaconfsLocal->frameStreamExportConfig.enabled) {
1031 if (t_frameStreamServers) {
1032 // dt's take care of cleanup
1033 t_frameStreamServers.reset();
1034 }
1035
1036 return false;
1037 }
1038
1039 /* if the server was not running, or if it was running according to a
1040 previous configuration */
1041 if (!t_frameStreamServers ||
1042 t_frameStreamServersGeneration < luaconfsLocal->generation) {
1043
1044 if (t_frameStreamServers) {
1045 // dt's take care of cleanup
1046 t_frameStreamServers.reset();
1047 }
1048
1049 t_frameStreamServers = startFrameStreamServers(luaconfsLocal->frameStreamExportConfig);
1050 t_frameStreamServersGeneration = luaconfsLocal->generation;
1051 }
1052
1053 return true;
1054}
1055#endif /* HAVE_FSTRM */
63341e8d
RG
1056#endif /* HAVE_PROTOBUF */
1057
af1377b7 1058#ifdef NOD_ENABLED
41c542ec 1059static bool nodCheckNewDomain(const DNSName& dname)
af1377b7
NC
1060{
1061 static const QType qt(QType::A);
1062 static const uint16_t qc(QClass::IN);
41c542ec 1063 bool ret = false;
af1377b7
NC
1064 // First check the (sub)domain isn't whitelisted for NOD purposes
1065 if (!g_nodDomainWL.check(dname)) {
1066 // Now check the NODDB (note this is probablistic so can have FNs/FPs)
1067 if (t_nodDBp && t_nodDBp->isNewDomain(dname)) {
1068 if (g_nodLog) {
1069 // This should probably log to a dedicated log file
1070 g_log<<Logger::Notice<<"Newly observed domain nod="<<dname.toLogString()<<endl;
1071 }
1072 if (!(g_nodLookupDomain.isRoot())) {
1073 // Send a DNS A query to <domain>.g_nodLookupDomain
1074 DNSName qname = dname;
1075 vector<DNSRecord> dummy;
1076 qname += g_nodLookupDomain;
1077 directResolve(qname, qt, qc, dummy);
1078 }
41c542ec 1079 ret = true;
af1377b7
NC
1080 }
1081 }
41c542ec 1082 return ret;
af1377b7
NC
1083}
1084
41c542ec
NC
1085static bool udrCheckUniqueDNSRecord(const DNSName& dname, uint16_t qtype, const DNSRecord& record)
1086{
1087 bool ret = false;
1088 if (record.d_place == DNSResourceRecord::ANSWER ||
1089 record.d_place == DNSResourceRecord::ADDITIONAL) {
1090 // Create a string that represent a triplet of (qname, qtype and RR[type, name, content])
1091 std::stringstream ss;
1092 ss << dname.toDNSStringLC() << ":" << qtype << ":" << qtype << ":" << record.d_type << ":" << record.d_name.toDNSStringLC() << ":" << record.d_content->getZoneRepresentation();
1093 if (t_udrDBp && t_udrDBp->isUniqueResponse(ss.str())) {
ff4d391d
NC
1094 if (g_udrLog) {
1095 // This should also probably log to a dedicated file.
1096 g_log<<Logger::Notice<<"Unique response observed: qname="<<dname.toLogString()<<" qtype="<<QType(qtype).getName()<< " rrtype=" << QType(record.d_type).getName() << " rrname=" << record.d_name.toLogString() << " rrcontent=" << record.d_content->getZoneRepresentation() << endl;
41c542ec
NC
1097 }
1098 ret = true;
1099 }
1100 }
1101 return ret;
1102}
af1377b7
NC
1103#endif /* NOD_ENABLED */
1104
37a919d4
RG
1105int followCNAMERecords(vector<DNSRecord>& ret, const QType& qtype)
1106{
1107 vector<DNSRecord> resolved;
1108 DNSName target;
1109 for(const DNSRecord& rr : ret) {
1110 if(rr.d_type == QType::CNAME) {
1111 auto rec = getRR<CNAMERecordContent>(rr);
1112 if(rec) {
1113 target=rec->getTarget();
1114 break;
1115 }
1116 }
1117 }
1118
1119 if(target.empty()) {
1120 return 0;
1121 }
1122
1123 int rcode = directResolve(target, qtype, QClass::IN, resolved);
1124
1125 for(DNSRecord& rr : resolved) {
1126 ret.push_back(std::move(rr));
1127 }
1128 return rcode;
1129}
1130
d187038c 1131static void startDoResolve(void *p)
288f4aa9 1132{
9a864da4 1133 auto dc=std::unique_ptr<DNSComboWriter>(reinterpret_cast<DNSComboWriter*>(p));
288f4aa9 1134 try {
5af86fdc
RG
1135 if (t_queryring)
1136 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
92011b8f 1137
32015748 1138 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
7f7b8d55 1139 EDNSOpts edo;
5164bac3 1140 std::vector<pair<uint16_t, string> > ednsOpts;
eb9444be 1141 bool variableAnswer = dc->d_variable;
8e079f3a 1142 bool haveEDNS=false;
ca2526f5
NC
1143#ifdef NOD_ENABLED
1144 bool hasUDR = false;
1145#endif /* NOD_ENABLED */
f1db0de2
PL
1146 DNSPacketWriter::optvect_t returnedEdnsOptions; // Here we stuff all the options for the return packet
1147 uint8_t ednsExtRCode = 0;
8e079f3a 1148 if(getEDNSOpts(dc->d_mdp, &edo)) {
f1db0de2
PL
1149 haveEDNS=true;
1150 if (edo.d_version != 0) {
1151 ednsExtRCode = ERCode::BADVERS;
1152 }
1153
32015748
RG
1154 if(!dc->d_tcp) {
1155 /* rfc6891 6.2.3:
1156 "Values lower than 512 MUST be treated as equal to 512."
1157 */
1158 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
1159 }
5164bac3 1160 ednsOpts = edo.d_options;
3af35968 1161 maxanswersize -= 11; // EDNS header size
b40562da 1162
1f691b94
PL
1163 for (const auto& o : edo.d_options) {
1164 if (o.first == EDNSOptionCode::ECS && g_useIncomingECS && !dc->d_ecsParsed) {
1165 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
1166 } else if (o.first == EDNSOptionCode::NSID) {
f1db0de2 1167 const static string mode_server_id = ::arg()["server-id"];
8a42919a
PL
1168 if(mode_server_id != "disabled" && !mode_server_id.empty() &&
1169 maxanswersize > (2 + 2 + mode_server_id.size())) {
f1db0de2
PL
1170 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::NSID, mode_server_id));
1171 variableAnswer = true; // Can't packetcache an answer with NSID
1172 // Option Code and Option Length are both 2
1173 maxanswersize -= 2 + 2 + mode_server_id.size();
1174 }
b40562da
RG
1175 }
1176 }
10321a98 1177 }
b40562da
RG
1178 /* perhaps there was no EDNS or no ECS but by now we looked */
1179 dc->d_ecsParsed = true;
e325f20c 1180 vector<DNSRecord> ret;
ea634573 1181 vector<uint8_t> packet;
b23b8614 1182
ad42489c 1183 auto luaconfsLocal = g_luaconfs.getLocal();
b8470add
PL
1184 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
1185 bool wantsRPZ(true);
1fbc6dc5 1186 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
aa7929a3 1187#ifdef HAVE_PROTOBUF
63341e8d 1188 if (checkProtobufExport(luaconfsLocal)) {
5cc8371b 1189 Netmask requestorNM(dc->d_source, dc->d_source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
5d2e9a83
RG
1190 ComboAddress requestor = requestorNM.getMaskedNetwork();
1191 requestor.setPort(dc->d_source.getPort());
0bd2e252 1192 pbMessage = RecProtoBufMessage(RecProtoBufMessage::Response, dc->d_uuid, &requestor, &dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass, dc->d_mdp.d_header.id, dc->d_tcp, 0);
c165308b 1193 pbMessage->setServerIdentity(SyncRes::s_serverID);
d14121a8 1194 pbMessage->setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIPv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
d9d3f9c1
RG
1195 }
1196#endif /* HAVE_PROTOBUF */
ad42489c 1197
b9fa43e0
OM
1198#ifdef HAVE_FSTRM
1199 checkFrameStreamExport(luaconfsLocal);
1200#endif
1201
3ddb9247 1202 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
ea634573
BH
1203
1204 pw.getHeader()->aa=0;
1205 pw.getHeader()->ra=1;
c154c8a4 1206 pw.getHeader()->qr=1;
bb4bdbaf 1207 pw.getHeader()->tc=0;
ea634573 1208 pw.getHeader()->id=dc->d_mdp.d_header.id;
10321a98 1209 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
57769f13 1210 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
ea634573 1211
70fb28d9
RG
1212 /* This is the lowest TTL seen in the records of the response,
1213 so we can't cache it for longer than this value.
1214 If we have a TTL cap, this value can't be larger than the
1215 cap no matter what. */
1216 uint32_t minTTL = dc->d_ttlCap;
904d3219
PD
1217
1218 SyncRes sr(dc->d_now);
37a919d4 1219 sr.setId(MT->getTid());
0c43f455 1220
2e921ec6 1221 bool DNSSECOK=false;
3457a2a0 1222 if(t_pdl) {
f26bf547 1223 sr.setLuaEngine(t_pdl);
3457a2a0 1224 }
9eec8c98 1225 if(g_dnssecmode != DNSSECMode::Off) {
30ee601a 1226 sr.setDoDNSSEC(true);
9eec8c98
PL
1227
1228 // Does the requestor want DNSSEC records?
d6c335ab 1229 if(edo.d_extFlags & EDNSOpts::DNSSECOK) {
9eec8c98
PL
1230 DNSSECOK=true;
1231 g_stats.dnssecQueries++;
1232 }
88c33dca
RG
1233 if (dc->d_mdp.d_header.cd) {
1234 /* Per rfc6840 section 5.9, "When processing a request with
1235 the Checking Disabled (CD) bit set, a resolver SHOULD attempt
1236 to return all response data, even data that has failed DNSSEC
1237 validation. */
1238 ++g_stats.dnssecCheckDisabledQueries;
1239 }
1240 if (dc->d_mdp.d_header.ad) {
1241 /* Per rfc6840 section 5.7, "the AD bit in a query as a signal
1242 indicating that the requester understands and is interested in the
1243 value of the AD bit in the response. This allows a requester to
1244 indicate that it understands the AD bit without also requesting
1245 DNSSEC data via the DO bit. */
1246 ++g_stats.dnssecAuthenticDataQueries;
1247 }
9eec8c98
PL
1248 } else {
1249 // Ignore the client-set CD flag
1250 pw.getHeader()->cd=0;
5b9853c9 1251 }
0c43f455
RG
1252 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
1253
4898a348 1254#ifdef HAVE_PROTOBUF
30ee601a 1255 sr.setInitialRequestId(dc->d_uuid);
b773359c 1256 sr.setOutgoingProtobufServers(t_outgoingProtobufServers);
4898a348 1257#endif
b9fa43e0
OM
1258#ifdef HAVE_FSTRM
1259 sr.setFrameStreamServers(t_frameStreamServers);
1260#endif
2fe3354d 1261 sr.setQuerySource(dc->d_remote, g_useIncomingECS && !dc->d_ednssubnet.source.empty() ? boost::optional<const EDNSSubnetOpts&>(dc->d_ednssubnet) : boost::none);
57769f13 1262
904d3219 1263 bool tracedQuery=false; // we could consider letting Lua know about this too
9fc36e90 1264 bool shouldNotValidate = false;
904d3219 1265
ef3b6cd7
RG
1266 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
1267 int res = RCode::NoError;
37a919d4 1268
1f1ca368 1269 DNSFilterEngine::Policy appliedPolicy;
6da513b2 1270 std::vector<DNSRecord> spoofed;
406b722e 1271 RecursorLua4::DNSQuestion dq(dc->d_source, dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ, dc->d_logResponse);
d6c335ab 1272 dq.ednsFlags = &edo.d_extFlags;
5164bac3 1273 dq.ednsOptions = &ednsOpts;
6e505c5e
RG
1274 dq.tag = dc->d_tag;
1275 dq.discardedPolicies = &sr.d_discardedPolicies;
1276 dq.policyTags = &dc->d_policyTags;
1277 dq.appliedPolicy = &appliedPolicy;
1278 dq.currentRecords = &ret;
1279 dq.dh = &dc->d_mdp.d_header;
05c74122 1280 dq.data = dc->d_data;
67e31ebe
RG
1281#ifdef HAVE_PROTOBUF
1282 dq.requestorId = dc->d_requestorId;
590388d2 1283 dq.deviceId = dc->d_deviceId;
0a6a45c8 1284 dq.deviceName = dc->d_deviceName;
67e31ebe 1285#endif
ba21fcfe 1286
6cf96227
PL
1287 if(ednsExtRCode != 0) {
1288 goto sendit;
1289 }
1290
e661a20b 1291 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
56b4d21b
PD
1292 pw.getHeader()->tc = 1;
1293 res = 0;
1294 variableAnswer = true;
e661a20b
PD
1295 goto sendit;
1296 }
1297
f26bf547 1298 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
77499b05
BH
1299 sr.setLogMode(SyncRes::Store);
1300 tracedQuery=true;
1301 }
3ddb9247 1302
976ec823 1303 if(!g_quiet || tracedQuery) {
e6a9dde5 1304 g_log<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
976ec823 1305 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
b40562da 1306 if(!dc->d_ednssubnet.source.empty()) {
e6a9dde5 1307 g_log<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
6e986f5e 1308 }
e6a9dde5 1309 g_log<<endl;
976ec823 1310 }
c75a6a9e 1311
37a919d4 1312 if(!dc->d_mdp.d_header.rd) {
c836dc19 1313 sr.setCacheOnly();
37a919d4
RG
1314 }
1315
1316 if (dc->d_rcode != boost::none) {
1317 /* we have a response ready to go, most likely from gettag_ffi */
1318 ret = std::move(dc->d_records);
1319 res = *dc->d_rcode;
1320 if (res == RCode::NoError && dc->d_followCNAMERecords) {
1321 res = followCNAMERecords(ret, QType(dc->d_mdp.d_qtype));
1322 }
1323 goto haveAnswer;
1324 }
c836dc19 1325
f26bf547
RG
1326 if (t_pdl) {
1327 t_pdl->prerpz(dq, res);
0a273054
RG
1328 }
1329
db486de5 1330 // Check if the query has a policy attached to it
e37e5795 1331 if (wantsRPZ && (appliedPolicy.d_type == DNSFilterEngine::PolicyType::None || appliedPolicy.d_kind == DNSFilterEngine::PolicyKind::NoAction)) {
2996400c 1332 luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_source, sr.d_discardedPolicies, appliedPolicy);
0a273054 1333 }
644dd1da 1334
54be222b 1335 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
f26bf547 1336 if(!t_pdl || !t_pdl->preresolve(dq, res)) {
b8470add 1337
30ee601a 1338 sr.setWantsRPZ(wantsRPZ);
b8470add
PL
1339 if(wantsRPZ) {
1340 switch(appliedPolicy.d_kind) {
1341 case DNSFilterEngine::PolicyKind::NoAction:
1342 break;
1343 case DNSFilterEngine::PolicyKind::Drop:
1344 g_stats.policyDrops++;
7a25883a 1345 g_stats.policyResults[appliedPolicy.d_kind]++;
b8470add
PL
1346 return;
1347 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1348 g_stats.policyResults[appliedPolicy.d_kind]++;
1349 res=RCode::NXDomain;
1350 goto haveAnswer;
1351 case DNSFilterEngine::PolicyKind::NODATA:
1352 g_stats.policyResults[appliedPolicy.d_kind]++;
1353 res=RCode::NoError;
db486de5 1354 goto haveAnswer;
b8470add
PL
1355 case DNSFilterEngine::PolicyKind::Custom:
1356 g_stats.policyResults[appliedPolicy.d_kind]++;
1357 res=RCode::NoError;
6da513b2
RG
1358 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1359 for (const auto& dr : spoofed) {
1360 ret.push_back(dr);
1361 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1362 }
b8470add
PL
1363 goto haveAnswer;
1364 case DNSFilterEngine::PolicyKind::Truncate:
1365 if(!dc->d_tcp) {
1366 g_stats.policyResults[appliedPolicy.d_kind]++;
1367 res=RCode::NoError;
1368 pw.getHeader()->tc=1;
1369 goto haveAnswer;
1370 }
1371 break;
1372 }
db486de5
PL
1373 }
1374
b8470add 1375 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
44971ca0 1376 try {
124dd1d4 1377 sr.d_appliedPolicy = appliedPolicy;
44971ca0 1378 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
9fc36e90 1379 shouldNotValidate = sr.wasOutOfBand();
44971ca0 1380 }
124dd1d4
RG
1381 catch(const ImmediateServFailException &e) {
1382 if(g_logCommonErrors) {
e6a9dde5 1383 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
124dd1d4 1384 }
44971ca0
PD
1385 res = RCode::ServFail;
1386 }
124dd1d4
RG
1387 catch(const PolicyHitException& e) {
1388 res = -2;
1389 }
1921a4c2 1390 dq.validationState = sr.getValidationState();
2996400c 1391 appliedPolicy = sr.d_appliedPolicy;
1921a4c2 1392
b8470add
PL
1393 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
1394 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
1395 appliedPolicy = sr.d_appliedPolicy;
1396 g_stats.policyResults[appliedPolicy.d_kind]++;
1397 switch(appliedPolicy.d_kind) {
1398 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
1399 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
1400 case DNSFilterEngine::PolicyKind::Drop:
1401 g_stats.policyDrops++;
b8470add
PL
1402 return;
1403 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1404 ret.clear();
1405 res=RCode::NXDomain;
1406 goto haveAnswer;
1407
1408 case DNSFilterEngine::PolicyKind::NODATA:
1409 ret.clear();
1410 res=RCode::NoError;
1411 goto haveAnswer;
1412
1413 case DNSFilterEngine::PolicyKind::Truncate:
1414 if(!dc->d_tcp) {
1415 ret.clear();
1416 res=RCode::NoError;
1417 pw.getHeader()->tc=1;
1418 goto haveAnswer;
1419 }
1420 break;
1421
1422 case DNSFilterEngine::PolicyKind::Custom:
1423 ret.clear();
1424 res=RCode::NoError;
6da513b2
RG
1425 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1426 for (const auto& dr : spoofed) {
1427 ret.push_back(dr);
1428 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1429 }
b8470add
PL
1430 goto haveAnswer;
1431 }
1432 }
1433
e37e5795 1434 if (wantsRPZ && (appliedPolicy.d_type == DNSFilterEngine::PolicyType::None || appliedPolicy.d_kind == DNSFilterEngine::PolicyKind::NoAction)) {
2996400c 1435 luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies, appliedPolicy);
b8470add 1436 }
db486de5 1437
f26bf547 1438 if(t_pdl) {
db486de5
PL
1439 if(res == RCode::NoError) {
1440 auto i=ret.cbegin();
1441 for(; i!= ret.cend(); ++i)
1442 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
1443 break;
f26bf547 1444 if(i == ret.cend() && t_pdl->nodata(dq, res))
3ca4e735
PL
1445 shouldNotValidate = true;
1446
db486de5 1447 }
f26bf547 1448 else if(res == RCode::NXDomain && t_pdl->nxdomain(dq, res))
3ca4e735 1449 shouldNotValidate = true;
db486de5 1450
f26bf547 1451 if(t_pdl->postresolve(dq, res))
3ca4e735 1452 shouldNotValidate = true;
db486de5
PL
1453 }
1454
b8470add
PL
1455 if (wantsRPZ) { //XXX This block is repeated, see above
1456 g_stats.policyResults[appliedPolicy.d_kind]++;
1457 switch(appliedPolicy.d_kind) {
1458 case DNSFilterEngine::PolicyKind::NoAction:
1459 break;
1460 case DNSFilterEngine::PolicyKind::Drop:
1461 g_stats.policyDrops++;
b8470add
PL
1462 return;
1463 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1464 ret.clear();
1465 res=RCode::NXDomain;
1466 goto haveAnswer;
1467
1468 case DNSFilterEngine::PolicyKind::NODATA:
1469 ret.clear();
1470 res=RCode::NoError;
1471 goto haveAnswer;
1472
1473 case DNSFilterEngine::PolicyKind::Truncate:
1474 if(!dc->d_tcp) {
1475 ret.clear();
1476 res=RCode::NoError;
1477 pw.getHeader()->tc=1;
1478 goto haveAnswer;
1479 }
1480 break;
1481
1482 case DNSFilterEngine::PolicyKind::Custom:
1483 ret.clear();
1484 res=RCode::NoError;
6da513b2
RG
1485 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1486 for (const auto& dr : spoofed) {
1487 ret.push_back(dr);
1488 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1489 }
b8470add
PL
1490 goto haveAnswer;
1491 }
644dd1da 1492 }
4485aa35 1493 }
644dd1da 1494 haveAnswer:;
3e8216c8 1495 if(res == PolicyDecision::DROP) {
e9c2ad3a 1496 g_stats.policyDrops++;
ae7e77ad 1497 return;
3ddb9247 1498 }
9cdfab64 1499 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1dc8f4d0 1500 {
85ffbc53
PD
1501 string trace(sr.getTrace());
1502 if(!trace.empty()) {
1503 vector<string> lines;
1504 boost::split(lines, trace, boost::is_any_of("\n"));
1dc8f4d0 1505 for(const string& line : lines) {
85ffbc53 1506 if(!line.empty())
e6a9dde5 1507 g_log<<Logger::Warning<< line << endl;
85ffbc53
PD
1508 }
1509 }
1510 }
3ddb9247 1511
9cdfab64 1512 if(res == -1) {
0fe1d080
PD
1513 pw.getHeader()->rcode=RCode::ServFail;
1514 // no commit here, because no record
1515 g_stats.servFails++;
1516 }
288f4aa9 1517 else {
ea634573 1518 pw.getHeader()->rcode=res;
92011b8f 1519
f3fe4ae6 1520 // Does the validation mode or query demand validation?
0c43f455 1521 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
b25cae9a 1522 try {
f3fe4ae6 1523 if(sr.doLog()) {
e6a9dde5 1524 g_log<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<endl;
2e921ec6 1525 }
4d2be65d
RG
1526
1527 auto state = sr.getValidationState();
1528
b25cae9a 1529 if(state == Secure) {
2e921ec6 1530 if(sr.doLog()) {
e6a9dde5 1531 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates correctly"<<endl;
2e921ec6 1532 }
b25cae9a 1533
1534 // Is the query source interested in the value of the ad-bit?
885c8881 1535 if (dc->d_mdp.d_header.ad || DNSSECOK)
b25cae9a 1536 pw.getHeader()->ad=1;
1537 }
1538 else if(state == Insecure) {
f3fe4ae6 1539 if(sr.doLog()) {
e6a9dde5 1540 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Insecure"<<endl;
12ce523e 1541 }
b25cae9a 1542
1543 pw.getHeader()->ad=0;
f3fe4ae6 1544 }
b25cae9a 1545 else if(state == Bogus) {
66f2e6ad
KM
1546 if(t_bogusremotes)
1547 t_bogusremotes->push_back(dc->d_source);
1548 if(t_bogusqueryring)
1549 t_bogusqueryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
c87e1876 1550 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
e6a9dde5 1551 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Bogus"<<endl;
b25cae9a 1552 }
1553
1554 // Does the query or validation mode sending out a SERVFAIL on validation errors?
885c8881 1555 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
b25cae9a 1556 if(sr.doLog()) {
e6a9dde5 1557 g_log<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
b25cae9a 1558 }
1559
1560 pw.getHeader()->rcode=RCode::ServFail;
1561 goto sendit;
1562 } else {
1563 if(sr.doLog()) {
e6a9dde5 1564 g_log<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
b25cae9a 1565 }
1566 }
1567 }
1568 }
124dd1d4 1569 catch(const ImmediateServFailException &e) {
b25cae9a 1570 if(g_logCommonErrors)
e6a9dde5 1571 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
b25cae9a 1572 pw.getHeader()->rcode=RCode::ServFail;
1573 goto sendit;
f3fe4ae6 1574 }
b3f0ed10 1575 }
1576
c154c8a4 1577 if(ret.size()) {
92476c8b 1578 orderAndShuffle(ret);
5cc8371b 1579 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_source)) {
20d84f77 1580 stable_sort(ret.begin(), ret.end(), *sl);
3e61e7f7 1581 variableAnswer=true;
1582 }
8e079f3a 1583 }
0afa32d4
RG
1584
1585 bool needCommit = false;
8e079f3a 1586 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
3e80ebce
KM
1587 if( ! DNSSECOK &&
1588 ( i->d_type == QType::NSEC3 ||
1589 (
1590 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1591 (
1592 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1593 i->d_place != DNSResourceRecord::ANSWER
1594 )
1595 )
1596 )
1597 ) {
2e921ec6 1598 continue;
3e80ebce
KM
1599 }
1600
70fb28d9 1601 if (!addRecordToPacket(pw, *i, minTTL, dc->d_ttlCap, maxanswersize)) {
97c6d7e5
RG
1602 needCommit = false;
1603 break;
1604 }
1605 needCommit = true;
1606
41c542ec
NC
1607#ifdef NOD_ENABLED
1608 bool udr = false;
1609 if (g_udrEnabled) {
1610 udr = udrCheckUniqueDNSRecord(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, *i);
ca2526f5
NC
1611 if (!hasUDR && udr)
1612 hasUDR = true;
41c542ec
NC
1613 }
1614#endif /* NOD ENABLED */
1615
aa7929a3 1616#ifdef HAVE_PROTOBUF
b773359c 1617 if (t_protobufServers) {
41c542ec
NC
1618#ifdef NOD_ENABLED
1619 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes, udr);
1620#else
0bd2e252 1621 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes);
41c542ec 1622#endif /* NOD_ENABLED */
aa7929a3
RG
1623 }
1624#endif
ea634573 1625 }
0afa32d4 1626 if(needCommit)
8e079f3a 1627 pw.commit();
288f4aa9 1628 }
10321a98 1629 sendit:;
b3f0ed10 1630
a0ddd130 1631 if(g_useIncomingECS && dc->d_ecsFound && !sr.wasVariable() && !variableAnswer) {
9837850d 1632 // cerr<<"Stuffing in a 0 scope because answer is static"<<endl;
5a7f99b4 1633 EDNSSubnetOpts eo;
1634 eo.source = dc->d_ednssubnet.source;
1635 ComboAddress sa;
1ef18cab 1636 sa.reset();
5a7f99b4 1637 sa.sin4.sin_family = eo.source.getNetwork().sin4.sin_family;
1638 eo.scope = Netmask(sa, 0);
1639
1640 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::ECS, makeEDNSSubnetOptsString(eo)));
1641 }
1642
97c6d7e5
RG
1643 if (haveEDNS) {
1644 /* we try to add the EDNS OPT RR even for truncated answers,
1645 as rfc6891 states:
1646 "The minimal response MUST be the DNS header, question section, and an
1647 OPT record. This MUST also occur when a truncated response (using
1648 the DNS header's TC bit) is returned."
1649 */
9b60fb71 1650 pw.addOpt(512, ednsExtRCode, DNSSECOK ? EDNSOpts::DNSSECOK : 0, returnedEdnsOptions);
1f691b94 1651 pw.commit();
97c6d7e5
RG
1652 }
1653
79332bff 1654 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
5cc8371b 1655 updateResponseStats(res, dc->d_source, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
ff4d391d
NC
1656#ifdef NOD_ENABLED
1657 bool nod = false;
1658 if (g_nodEnabled) {
1659 if (nodCheckNewDomain(dc->d_mdp.d_qname))
1660 nod = true;
1661 }
1662#endif /* NOD_ENABLED */
aa7929a3 1663#ifdef HAVE_PROTOBUF
406b722e 1664 if (t_protobufServers && !(luaconfsLocal->protobufExportConfig.taggedOnly && (!appliedPolicy.d_name || appliedPolicy.d_name->empty()) && dc->d_policyTags.empty())) {
d362f7c1
RG
1665 pbMessage->setBytes(packet.size());
1666 pbMessage->setResponseCode(pw.getHeader()->rcode);
0a273054 1667 if (appliedPolicy.d_name) {
d362f7c1
RG
1668 pbMessage->setAppliedPolicy(*appliedPolicy.d_name);
1669 pbMessage->setAppliedPolicyType(appliedPolicy.d_type);
0a273054 1670 }
d362f7c1 1671 pbMessage->setPolicyTags(dc->d_policyTags);
c29d820c
RG
1672 if (g_useKernelTimestamp && dc->d_kernelTimestamp.tv_sec) {
1673 pbMessage->setQueryTime(dc->d_kernelTimestamp.tv_sec, dc->d_kernelTimestamp.tv_usec);
1674 }
1675 else {
1676 pbMessage->setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1677 }
d362f7c1
RG
1678 pbMessage->setRequestorId(dq.requestorId);
1679 pbMessage->setDeviceId(dq.deviceId);
0a6a45c8 1680 pbMessage->setDeviceName(dq.deviceName);
41c542ec
NC
1681#ifdef NOD_ENABLED
1682 if (g_nodEnabled) {
ca2526f5 1683 if (nod) {
41c542ec 1684 pbMessage->setNOD(true);
ca2526f5
NC
1685 pbMessage->addPolicyTag(g_nod_pbtag);
1686 }
1687 if (hasUDR) {
1688 pbMessage->addPolicyTag(g_udr_pbtag);
1689 }
41c542ec
NC
1690 }
1691#endif /* NOD_ENABLED */
406b722e
RG
1692 if (dc->d_logResponse) {
1693 protobufLogResponse(*pbMessage);
1694 }
ac238ea7 1695#ifdef NOD_ENABLED
ca2526f5
NC
1696 if (g_nodEnabled) {
1697 pbMessage->setNOD(false);
1698 pbMessage->clearUDR();
1699 if (nod)
1700 pbMessage->removePolicyTag(g_nod_pbtag);
1701 if (hasUDR)
1702 pbMessage->removePolicyTag(g_udr_pbtag);
1703 }
ac238ea7 1704#endif /* NOD_ENABLED */
aa7929a3
RG
1705 }
1706#endif
ea634573 1707 if(!dc->d_tcp) {
b71b60ee 1708 struct msghdr msgh;
1709 struct iovec iov;
7bec330a
OM
1710 cmsgbuf_aligned cbuf;
1711 fillMSGHdr(&msgh, &iov, &cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
2c0af54f
PD
1712 msgh.msg_control=NULL;
1713
cbc03320 1714 if(g_fromtosockets.count(dc->d_socket)) {
4272d071 1715 addCMsgSrcAddr(&msgh, &cbuf, &dc->d_local, 0);
2c0af54f 1716 }
a2a81d42
OM
1717 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors) {
1718 int err = errno;
1719 g_log << Logger::Warning << "Sending UDP reply to client " << dc->getRemote() << " failed with: "
1720 << strerror(err) << endl;
1721 }
70fb28d9 1722
49dc532e 1723 if(variableAnswer || sr.wasVariable()) {
1ef18cab 1724 g_stats.variableResponses++;
49dc532e 1725 }
3762e821 1726 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
b5e675a7 1727 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, std::move(dc->d_query), dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
76e2b9e3 1728 string((const char*)&*packet.begin(), packet.size()),
3ddb9247 1729 g_now.tv_sec,
76e2b9e3 1730 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
d9d3f9c1 1731 min(minTTL,SyncRes::s_packetcachettl),
88694a6a 1732 dq.validationState,
08b02366
RG
1733 dc->d_ecsBegin,
1734 dc->d_ecsEnd,
4b0bdd5f 1735 std::move(pbMessage));
1051f8a9 1736 }
3762e821 1737 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
feccc9fc 1738 }
9c495589
BH
1739 else {
1740 char buf[2];
ea634573
BH
1741 buf[0]=packet.size()/256;
1742 buf[1]=packet.size()%256;
feccc9fc 1743
c038218b 1744 Utility::iovec iov[2];
feccc9fc 1745
ea634573
BH
1746 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1747 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
feccc9fc 1748
dd079764 1749 int wret=Utility::writev(dc->d_socket, iov, 2);
0e9d9ce2 1750 bool hadError=true;
feccc9fc 1751
dd079764 1752 if(wret == 0)
e6a9dde5 1753 g_log<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
a2a81d42
OM
1754 else if(wret < 0 ) {
1755 int err = errno;
1756 g_log << Logger::Error << "Error writing TCP answer to " << dc->getRemote() << ": " << strerror(err) << endl;
1757 } else if((unsigned int)wret != 2 + packet.size())
e6a9dde5 1758 g_log<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
0e9d9ce2 1759 else
18af64a8 1760 hadError=false;
3ddb9247 1761
b5b94beb 1762 // update tcp connection status, closing if needed and doing the fd multiplexer accounting
c51c551e
OM
1763 if (dc->d_tcpConnection->d_requestsInFlight > 0) {
1764 dc->d_tcpConnection->d_requestsInFlight--;
1765 }
3ddb9247 1766
b5b94beb
OM
1767 // In the code below, we try to remove the fd from the set, but
1768 // we don't know if another mthread already did the remove, so we can get a
1769 // "Tried to remove unlisted fd" exception. Not that an inflight < limit test
1770 // will not work since we do not know if the other mthread got an error or not.
09e6702a 1771 if(hadError) {
b5b94beb
OM
1772 try {
1773 t_fdm->removeReadFD(dc->d_socket);
1774 }
1775 catch (FDMultiplexerException &) {
1776 }
c36bc97a 1777 dc->d_socket = -1;
09e6702a 1778 }
a6ae6414 1779 else {
fde296a3
RG
1780 dc->d_tcpConnection->queriesCount++;
1781 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
b5b94beb
OM
1782 try {
1783 t_fdm->removeReadFD(dc->d_socket);
1784 }
1785 catch (FDMultiplexerException &) {
1786 }
fde296a3
RG
1787 dc->d_socket = -1;
1788 }
1789 else {
fde296a3 1790 Utility::gettimeofday(&g_now, 0); // needs to be updated
27ae2e3c 1791 struct timeval ttd = g_now;
c51c551e
OM
1792 // If we cross from max to max-1 in flight requests, the fd was not listened to, add it back
1793 if (dc->d_tcpConnection->d_requestsInFlight == TCPConnection::s_maxInFlight - 1) {
3cabb750
OM
1794 // A read error might have happened. If we add the fd back, it will most likely error again.
1795 // This is not a big issue, the next handleTCPClientReadable() will see another read error
1796 // and take action.
d5c6ec95
OM
1797 ttd.tv_sec += g_tcpTimeout;
1798 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection, &ttd);
1799 } else {
3cabb750
OM
1800 // fd might have been removed by read error code, so expect an exception
1801 try {
1802 t_fdm->setReadTTD(dc->d_socket, ttd, g_tcpTimeout);
1803 }
1804 catch (FDMultiplexerException &) {
1805 }
d5c6ec95 1806 }
fde296a3 1807 }
0e9d9ce2 1808 }
9c495589 1809 }
2c9119cd 1810 float spent=makeFloat(sr.getNow()-dc->d_now);
1d5b3ce6 1811 if(!g_quiet) {
e6a9dde5
PL
1812 g_log<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1813 g_log<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
2c9119cd 1814 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
1815 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
1816
1817 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
e6a9dde5 1818 g_log<< ", dnssec="<<vStates[sr.getValidationState()];
2c9119cd 1819 }
1820
e6a9dde5 1821 g_log<<endl;
2c9119cd 1822
c75a6a9e 1823 }
b23b8614 1824
f7b8cffa 1825 if (sr.d_outqueries || sr.d_authzonequeries) {
a7956123 1826 s_RC->cacheMisses++;
f7b8cffa
RG
1827 }
1828 else {
a7956123 1829 s_RC->cacheHits++;
f7b8cffa 1830 }
2c9119cd 1831
fe213470
BH
1832 if(spent < 0.001)
1833 g_stats.answers0_1++;
1834 else if(spent < 0.010)
1835 g_stats.answers1_10++;
1836 else if(spent < 0.1)
1837 g_stats.answers10_100++;
1838 else if(spent < 1.0)
1839 g_stats.answers100_1000++;
1840 else
1841 g_stats.answersSlow++;
1842
574af7ea 1843 uint64_t newLat=(uint64_t)(spent*1000000);
b841314c 1844 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
08f3f638 1845 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
0a6b1027 1846 // no worries, we do this for packet cache hits elsewhere
19178da9 1847
1848 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
1849 if(ourtime < 1)
1850 g_stats.ourtime0_1++;
1851 else if(ourtime < 2)
1852 g_stats.ourtime1_2++;
1853 else if(ourtime < 4)
1854 g_stats.ourtime2_4++;
1855 else if(ourtime < 8)
1856 g_stats.ourtime4_8++;
1857 else if(ourtime < 16)
1858 g_stats.ourtime8_16++;
1859 else if(ourtime < 32)
1860 g_stats.ourtime16_32++;
1861 else {
1862 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1863 g_stats.ourtimeSlow++;
1864 }
042da1a1 1865 if(ourtime >= 0.0) {
1866 newLat=ourtime*1000; // usec
1867 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
1868 }
c6d04bdc 1869 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
288f4aa9 1870 }
3f81d239 1871 catch(PDNSException &ae) {
e6a9dde5 1872 g_log<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
288f4aa9 1873 }
16ce7f18
JS
1874 catch(const MOADNSException &mde) {
1875 g_log<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<mde.what()<<endl;
7b1469bb 1876 }
fdbf35ac 1877 catch(std::exception& e) {
e6a9dde5 1878 g_log<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
068c7634
PD
1879
1880 // Luawrapper nests the exception from Lua, so we unnest it here
1881 try {
1882 std::rethrow_if_nested(e);
2010ac95 1883 } catch(const std::exception& ne) {
e6a9dde5 1884 g_log<<". Extra info: "<<ne.what();
068c7634
PD
1885 } catch(...) {}
1886
e6a9dde5 1887 g_log<<endl;
c154c8a4 1888 }
288f4aa9 1889 catch(...) {
e6a9dde5 1890 g_log<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
288f4aa9 1891 }
3ddb9247 1892
ec6eacbc 1893 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
288f4aa9
BH
1894}
1895
d187038c 1896static void makeControlChannelSocket(int processNum=-1)
1d5b3ce6 1897{
2d733c0f 1898 string sockname=::arg()["socket-dir"]+"/"+s_programname;
677e2a46 1899 if(processNum >= 0)
335da0ba 1900 sockname += "."+std::to_string(processNum);
677e2a46 1901 sockname+=".controlsocket";
41f7a068 1902 s_rcc.listen(sockname);
3ddb9247 1903
387de317
BH
1904 int sockowner = -1;
1905 int sockgroup = -1;
1906
1907 if (!::arg().isEmpty("socket-group"))
1908 sockgroup=::arg().asGid("socket-group");
1909 if (!::arg().isEmpty("socket-owner"))
1910 sockowner=::arg().asUid("socket-owner");
3ddb9247 1911
f838ad8d
BH
1912 if (sockgroup > -1 || sockowner > -1) {
1913 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1914 unixDie("Failed to chown control socket");
1915 }
1916 }
387de317
BH
1917
1918 // do mode change if socket-mode is given
1919 if(!::arg().isEmpty("socket-mode")) {
1920 mode_t sockmode=::arg().asMode("socket-mode");
34c513f9
RG
1921 if(chmod(sockname.c_str(), sockmode) < 0) {
1922 unixDie("Failed to chmod control socket");
1923 }
387de317 1924 }
1d5b3ce6
BH
1925}
1926
5cc8371b 1927static void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass,
29e6303a 1928 bool& foundECS, EDNSSubnetOpts* ednssubnet, EDNSOptionViewMap* options,
5cc8371b 1929 bool& foundXPF, ComboAddress* xpfSource, ComboAddress* xpfDest)
02b47f43 1930{
59cb4a79 1931 const bool lookForXPF = xpfSource != nullptr && g_xpfRRCode != 0;
5cc8371b
RG
1932 const bool lookForECS = ednssubnet != nullptr;
1933 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(question.c_str());
02b47f43
RG
1934 size_t questionLen = question.length();
1935 unsigned int consumed=0;
1936 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1937
1938 size_t pos= sizeof(dnsheader)+consumed+4;
5cc8371b
RG
1939 const size_t headerSize = /* root */ 1 + sizeof(dnsrecordheader);
1940 const uint16_t arcount = ntohs(dh->arcount);
1941
1942 for (uint16_t arpos = 0; arpos < arcount && questionLen > (pos + headerSize) && ((lookForECS && !foundECS) || (lookForXPF && !foundXPF)); arpos++) {
1943 if (question.at(pos) != 0) {
1944 /* not an OPT or a XPF, bye. */
1945 return;
1946 }
1947
1948 pos += 1;
1949 const dnsrecordheader* drh = reinterpret_cast<const dnsrecordheader*>(&question.at(pos));
1950 pos += sizeof(dnsrecordheader);
1951
1952 if (pos >= questionLen) {
1953 return;
1954 }
1955
02b47f43 1956 /* OPT root label (1) followed by type (2) */
5cc8371b 1957 if(lookForECS && ntohs(drh->d_type) == QType::OPT) {
00b8cadc
RG
1958 if (!options) {
1959 char* ecsStart = nullptr;
1960 size_t ecsLen = 0;
5cc8371b
RG
1961 /* we need to pass the record len */
1962 int res = getEDNSOption(const_cast<char*>(reinterpret_cast<const char*>(&question.at(pos - sizeof(drh->d_clen)))), questionLen - pos + sizeof(drh->d_clen), EDNSOptionCode::ECS, &ecsStart, &ecsLen);
00b8cadc
RG
1963 if (res == 0 && ecsLen > 4) {
1964 EDNSSubnetOpts eso;
1965 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1966 *ednssubnet=eso;
5cc8371b 1967 foundECS = true;
00b8cadc
RG
1968 }
1969 }
1970 }
1971 else {
5cc8371b
RG
1972 /* we need to pass the record len */
1973 int res = getEDNSOptions(reinterpret_cast<const char*>(&question.at(pos -sizeof(drh->d_clen))), questionLen - pos + (sizeof(drh->d_clen)), *options);
00b8cadc
RG
1974 if (res == 0) {
1975 const auto& it = options->find(EDNSOptionCode::ECS);
29e6303a 1976 if (it != options->end() && !it->second.values.empty() && it->second.values.at(0).content != nullptr && it->second.values.at(0).size > 0) {
00b8cadc 1977 EDNSSubnetOpts eso;
29e6303a 1978 if(getEDNSSubnetOptsFromString(it->second.values.at(0).content, it->second.values.at(0).size, &eso)) {
00b8cadc 1979 *ednssubnet=eso;
5cc8371b 1980 foundECS = true;
00b8cadc
RG
1981 }
1982 }
02b47f43
RG
1983 }
1984 }
1985 }
59cb4a79 1986 else if (lookForXPF && ntohs(drh->d_type) == g_xpfRRCode && ntohs(drh->d_class) == QClass::IN && drh->d_ttl == 0) {
5cc8371b
RG
1987 if ((questionLen - pos) < ntohs(drh->d_clen)) {
1988 return;
1989 }
1990
1991 foundXPF = parseXPFPayload(reinterpret_cast<const char*>(&question.at(pos)), ntohs(drh->d_clen), *xpfSource, xpfDest);
1992 }
1993
1994 pos += ntohs(drh->d_clen);
02b47f43
RG
1995 }
1996}
1997
d187038c 1998static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1999{
cd989c87 2000 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
c038218b 2001
879b3f70 2002 if(conn->state==TCPConnection::BYTE0) {
2749c3fe 2003 ssize_t bytes=recv(conn->getFD(), &conn->data[0], 2, 0);
09e6702a 2004 if(bytes==1)
667f7e60 2005 conn->state=TCPConnection::BYTE1;
3ddb9247 2006 if(bytes==2) {
a0aa4f64 2007 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2749c3fe 2008 conn->data.resize(conn->qlen);
667f7e60
BH
2009 conn->bytesread=0;
2010 conn->state=TCPConnection::GETQUESTION;
09e6702a
BH
2011 }
2012 if(!bytes || bytes < 0) {
bb4bdbaf 2013 t_fdm->removeReadFD(fd);
09e6702a
BH
2014 return;
2015 }
2016 }
667f7e60 2017 else if(conn->state==TCPConnection::BYTE1) {
2749c3fe 2018 ssize_t bytes=recv(conn->getFD(), &conn->data[1], 1, 0);
09e6702a 2019 if(bytes==1) {
667f7e60 2020 conn->state=TCPConnection::GETQUESTION;
a0aa4f64 2021 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2749c3fe 2022 conn->data.resize(conn->qlen);
667f7e60 2023 conn->bytesread=0;
09e6702a
BH
2024 }
2025 if(!bytes || bytes < 0) {
2026 if(g_logCommonErrors)
e6a9dde5 2027 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected after first byte"<<endl;
bb4bdbaf 2028 t_fdm->removeReadFD(fd);
09e6702a
BH
2029 return;
2030 }
2031 }
667f7e60 2032 else if(conn->state==TCPConnection::GETQUESTION) {
2749c3fe 2033 ssize_t bytes=recv(conn->getFD(), &conn->data[conn->bytesread], conn->qlen - conn->bytesread, 0);
f9d67b41 2034 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
c0f9be19
RG
2035 if(g_logCommonErrors) {
2036 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected while reading question body"<<endl;
2037 }
bb4bdbaf 2038 t_fdm->removeReadFD(fd);
09e6702a
BH
2039 return;
2040 }
b841314c 2041 conn->bytesread+=(uint16_t)bytes;
667f7e60 2042 if(conn->bytesread==conn->qlen) {
87ff2287 2043 conn->state = TCPConnection::BYTE0;
9a864da4 2044 std::unique_ptr<DNSComboWriter> dc;
09e6702a 2045 try {
9a864da4 2046 dc=std::unique_ptr<DNSComboWriter>(new DNSComboWriter(conn->data, g_now));
09e6702a 2047 }
16ce7f18 2048 catch(const MOADNSException &mde) {
3ddb9247 2049 g_stats.clientParseError++;
4957a608 2050 if(g_logCommonErrors)
e6a9dde5 2051 g_log<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4957a608 2052 return;
09e6702a 2053 }
cd989c87
BH
2054 dc->d_tcpConnection = conn; // carry the torch
2055 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
09e6702a 2056 dc->d_tcp=true;
5cc8371b
RG
2057 dc->setRemote(conn->d_remote);
2058 dc->setSource(conn->d_remote);
a6147cd2 2059 ComboAddress dest;
d38e2ba9 2060 dest.reset();
a6147cd2 2061 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
2062 socklen_t len = dest.getSocklen();
2063 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
2064 dc->setLocal(dest);
5cc8371b 2065 dc->setDestination(dest);
33dcceba
RG
2066 DNSName qname;
2067 uint16_t qtype=0;
2068 uint16_t qclass=0;
2069 bool needECS = false;
5cc8371b 2070 bool needXPF = g_XPFAcl.match(conn->d_remote);
67e31ebe 2071 string requestorId;
590388d2 2072 string deviceId;
0a6a45c8 2073 string deviceName;
16bbc6e3 2074 bool logQuery = false;
aa7929a3 2075#ifdef HAVE_PROTOBUF
02b47f43 2076 auto luaconfsLocal = g_luaconfs.getLocal();
63341e8d 2077 if (checkProtobufExport(luaconfsLocal)) {
33dcceba
RG
2078 needECS = true;
2079 }
b773359c 2080 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
406b722e 2081 dc->d_logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
b9fa43e0
OM
2082#endif /* HAVE_PROTOBUF */
2083
2084#ifdef HAVE_FSTRM
2085 checkFrameStreamExport(luaconfsLocal);
33dcceba
RG
2086#endif
2087
70fb28d9 2088 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag_ffi || t_pdl->d_gettag))) {
33dcceba
RG
2089
2090 try {
29e6303a 2091 EDNSOptionViewMap ednsOptions;
5cc8371b 2092 bool xpfFound = false;
b40562da 2093 dc->d_ecsParsed = true;
5cc8371b 2094 dc->d_ecsFound = false;
2749c3fe 2095 getQNameAndSubnet(conn->data, &qname, &qtype, &qclass,
5cc8371b
RG
2096 dc->d_ecsFound, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2097 xpfFound, needXPF ? &dc->d_source : nullptr, needXPF ? &dc->d_destination : nullptr);
02b47f43 2098
70fb28d9 2099 if(t_pdl) {
33dcceba 2100 try {
70fb28d9 2101 if (t_pdl->d_gettag_ffi) {
406b722e 2102 dc->d_tag = t_pdl->gettag_ffi(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_records, dc->d_data, ednsOptions, true, requestorId, deviceId, deviceName, dc->d_rcode, dc->d_ttlCap, dc->d_variable, logQuery, dc->d_logResponse, dc->d_followCNAMERecords);
70fb28d9
RG
2103 }
2104 else if (t_pdl->d_gettag) {
0a6a45c8 2105 dc->d_tag = t_pdl->gettag(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, deviceName);
70fb28d9 2106 }
33dcceba 2107 }
70fb28d9 2108 catch(const std::exception& e) {
33dcceba 2109 if(g_logCommonErrors)
e6a9dde5 2110 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
33dcceba
RG
2111 }
2112 }
2113 }
70fb28d9 2114 catch(const std::exception& e)
33dcceba
RG
2115 {
2116 if(g_logCommonErrors)
e6a9dde5 2117 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
33dcceba
RG
2118 }
2119 }
f52177c3
RG
2120
2121 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(&conn->data[0]);
2122
33dcceba 2123#ifdef HAVE_PROTOBUF
b773359c 2124 if(t_protobufServers || t_outgoingProtobufServers) {
67e31ebe 2125 dc->d_requestorId = requestorId;
590388d2 2126 dc->d_deviceId = deviceId;
0a6a45c8 2127 dc->d_deviceName = deviceName;
d61aa945 2128 dc->d_uuid = getUniqueID();
4898a348 2129 }
02b47f43 2130
b773359c 2131 if(t_protobufServers) {
02b47f43 2132 try {
02b47f43 2133
845cbf4c 2134 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && dc->d_policyTags.empty())) {
0a6a45c8 2135 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, dc->d_source, dc->d_destination, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId, dc->d_deviceName);
b790ef3d 2136 }
02b47f43
RG
2137 }
2138 catch(std::exception& e) {
2139 if(g_logCommonErrors)
e6a9dde5 2140 g_log<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
02b47f43
RG
2141 }
2142 }
aa7929a3 2143#endif
5034517a
RG
2144 if(t_pdl) {
2145 if(t_pdl->ipfilter(dc->d_source, dc->d_destination, *dh)) {
2146 if(!g_quiet)
2147 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED TCP question from "<<dc->d_source.toStringWithPort()<<(dc->d_source != dc->d_remote ? " (via "+dc->d_remote.toStringWithPort()+")" : "")<<" based on policy"<<endl;
2148 g_stats.policyDrops++;
2149 return;
2150 }
2151 }
2152
879b3f70 2153 if(dc->d_mdp.d_header.qr) {
048f5db6 2154 g_stats.ignoredCount++;
c0f9be19
RG
2155 if(g_logCommonErrors) {
2156 g_log<<Logger::Error<<"Ignoring answer from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
2157 }
4957a608 2158 return;
879b3f70 2159 }
3abcdab2 2160 if(dc->d_mdp.d_header.opcode) {
048f5db6 2161 g_stats.ignoredCount++;
c0f9be19
RG
2162 if(g_logCommonErrors) {
2163 g_log<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
2164 }
c0f9be19
RG
2165 return;
2166 }
2167 else if (dh->qdcount == 0) {
2168 g_stats.emptyQueriesCount++;
2169 if(g_logCommonErrors) {
2170 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<< dc->getRemote() <<" on server socket!"<<endl;
2171 }
3abcdab2
PD
2172 return;
2173 }
09e6702a 2174 else {
4957a608
BH
2175 ++g_stats.qcounter;
2176 ++g_stats.tcpqcounter;
87ff2287 2177 ++conn->d_requestsInFlight;
c51c551e 2178 if (conn->d_requestsInFlight >= TCPConnection::s_maxInFlight) {
87ff2287
OM
2179 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
2180 } else {
2181 Utility::gettimeofday(&g_now, 0); // needed?
2182 struct timeval ttd = g_now;
2183 t_fdm->setReadTTD(fd, ttd, g_tcpTimeout);
2184 }
d5c6ec95 2185 MT->makeThread(startDoResolve, dc.release()); // deletes dc
4957a608 2186 return;
09e6702a
BH
2187 }
2188 }
2189 }
2190}
2191
6dcd28c3 2192//! Handle new incoming TCP connection
d187038c 2193static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
09e6702a 2194{
37d3f960 2195 ComboAddress addr;
09e6702a 2196 socklen_t addrlen=sizeof(addr);
a683e8bd 2197 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
b841314c 2198 if(newsock>=0) {
85c32340
BH
2199 if(MT->numProcesses() > g_maxMThreads) {
2200 g_stats.overCapacityDrops++;
a7b68ae7
RG
2201 try {
2202 closesocket(newsock);
2203 }
2204 catch(const PDNSException& e) {
e6a9dde5 2205 g_log<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
a7b68ae7 2206 }
85c32340
BH
2207 return;
2208 }
2209
92011b8f 2210 if(t_remotes)
2211 t_remotes->push_back(addr);
49a699c4 2212 if(t_allowFrom && !t_allowFrom->match(&addr)) {
3ddb9247 2213 if(!g_quiet)
e6a9dde5 2214 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2914b022 2215
09e6702a 2216 g_stats.unauthorizedTCP++;
a7b68ae7
RG
2217 try {
2218 closesocket(newsock);
2219 }
2220 catch(const PDNSException& e) {
e6a9dde5 2221 g_log<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
a7b68ae7 2222 }
09e6702a
BH
2223 return;
2224 }
bd0289fc 2225 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
09e6702a 2226 g_stats.tcpClientOverflow++;
a7b68ae7
RG
2227 try {
2228 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
2229 }
2230 catch(const PDNSException& e) {
e6a9dde5 2231 g_log<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
a7b68ae7 2232 }
09e6702a
BH
2233 return;
2234 }
3ddb9247 2235
3897b9e1 2236 setNonBlocking(newsock);
f26bf547 2237 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
cd989c87 2238 tc->state=TCPConnection::BYTE0;
3ddb9247 2239
27ae2e3c
RG
2240 struct timeval ttd;
2241 Utility::gettimeofday(&ttd, 0);
2242 ttd.tv_sec += g_tcpTimeout;
c038218b 2243
27ae2e3c 2244 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc, &ttd);
09e6702a
BH
2245 }
2246}
3ddb9247 2247
d187038c 2248static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
1bc3c142 2249{
183eb877 2250 gettimeofday(&g_now, 0);
c29d820c
RG
2251 if (tv.tv_sec) {
2252 struct timeval diff = g_now - tv;
2253 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
183eb877 2254
c29d820c
RG
2255 if(delta > 1000.0) {
2256 g_stats.tooOldDrops++;
2257 return nullptr;
2258 }
b71b60ee 2259 }
2260
1bc3c142 2261 ++g_stats.qcounter;
d7f10541
BH
2262 if(fromaddr.sin4.sin_family==AF_INET6)
2263 g_stats.ipv6qcounter++;
1bc3c142
BH
2264
2265 string response;
93f0da94 2266 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
49a3500d 2267 unsigned int ctag=0;
f57486f1 2268 uint32_t qhash = 0;
12aff2e5 2269 bool needECS = false;
5cc8371b 2270 bool needXPF = g_XPFAcl.match(fromaddr);
02b47f43 2271 std::vector<std::string> policyTags;
5fd2577f 2272 LuaContext::LuaObject data;
5cc8371b
RG
2273 ComboAddress source = fromaddr;
2274 ComboAddress destination = destaddr;
67e31ebe 2275 string requestorId;
590388d2 2276 string deviceId;
0a6a45c8 2277 string deviceName;
16bbc6e3 2278 bool logQuery = false;
406b722e 2279 bool logResponse = false;
12aff2e5 2280#ifdef HAVE_PROTOBUF
02b47f43 2281 boost::uuids::uuid uniqueId;
02b47f43 2282 auto luaconfsLocal = g_luaconfs.getLocal();
63341e8d 2283 if (checkProtobufExport(luaconfsLocal)) {
d61aa945 2284 uniqueId = getUniqueID();
02b47f43 2285 needECS = true;
63341e8d 2286 } else if (checkOutgoingProtobufExport(luaconfsLocal)) {
d61aa945 2287 uniqueId = getUniqueID();
02b47f43 2288 }
b773359c 2289 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
406b722e 2290 logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
b9fa43e0
OM
2291#endif
2292#ifdef HAVE_FSTRM
2293 checkFrameStreamExport(luaconfsLocal);
12aff2e5 2294#endif
b40562da
RG
2295 EDNSSubnetOpts ednssubnet;
2296 bool ecsFound = false;
2297 bool ecsParsed = false;
08b02366
RG
2298 uint16_t ecsBegin = 0;
2299 uint16_t ecsEnd = 0;
37a919d4
RG
2300 std::vector<DNSRecord> records;
2301 boost::optional<int> rcode = boost::none;
70fb28d9
RG
2302 uint32_t ttlCap = std::numeric_limits<uint32_t>::max();
2303 bool variable = false;
37a919d4 2304 bool followCNAMEs = false;
1bc3c142 2305 try {
02b47f43
RG
2306 DNSName qname;
2307 uint16_t qtype=0;
2308 uint16_t qclass=0;
1bc3c142 2309 uint32_t age;
c15ff3df 2310 bool qnameParsed=false;
8f7473d7 2311#ifdef MALLOC_TRACE
2312 /*
2313 static uint64_t last=0;
2314 if(!last)
2315 g_mtracer->clearAllocators();
2316 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
2317 last=g_mtracer->getAllocs();
2318 cout<<g_mtracer->topAllocatorsString()<<endl;
2319 g_mtracer->clearAllocators();
2320 */
2321#endif
55a1378f 2322
70fb28d9 2323 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag || t_pdl->d_gettag_ffi))) {
b2eacd67 2324 try {
29e6303a 2325 EDNSOptionViewMap ednsOptions;
5cc8371b
RG
2326 bool xpfFound = false;
2327
2328 ecsFound = false;
2329
2330 getQNameAndSubnet(question, &qname, &qtype, &qclass,
2331 ecsFound, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2332 xpfFound, needXPF ? &source : nullptr, needXPF ? &destination : nullptr);
2333
c15ff3df
RG
2334 qnameParsed = true;
2335 ecsParsed = true;
12aff2e5 2336
70fb28d9 2337 if(t_pdl) {
12aff2e5 2338 try {
70fb28d9 2339 if (t_pdl->d_gettag_ffi) {
406b722e 2340 ctag = t_pdl->gettag_ffi(source, ednssubnet.source, destination, qname, qtype, &policyTags, records, data, ednsOptions, false, requestorId, deviceId, deviceName, rcode, ttlCap, variable, logQuery, logResponse, followCNAMEs);
70fb28d9
RG
2341 }
2342 else if (t_pdl->d_gettag) {
0a6a45c8 2343 ctag = t_pdl->gettag(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, deviceName);
70fb28d9 2344 }
12aff2e5 2345 }
70fb28d9 2346 catch(const std::exception& e) {
12aff2e5 2347 if(g_logCommonErrors)
e6a9dde5 2348 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 2349 }
8ea8c302 2350 }
b2eacd67 2351 }
70fb28d9 2352 catch(const std::exception& e)
b2eacd67 2353 {
2354 if(g_logCommonErrors)
e6a9dde5 2355 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 2356 }
12ce523e 2357 }
3ddb9247 2358
02b47f43 2359 bool cacheHit = false;
1fbc6dc5 2360 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
02b47f43 2361#ifdef HAVE_PROTOBUF
b773359c 2362 if (t_protobufServers) {
d362f7c1 2363 pbMessage = RecProtoBufMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
c165308b 2364 pbMessage->setServerIdentity(SyncRes::s_serverID);
845cbf4c 2365 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && policyTags.empty())) {
0a6a45c8 2366 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, source, destination, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId, deviceName);
b790ef3d 2367 }
d9d3f9c1
RG
2368 }
2369#endif /* HAVE_PROTOBUF */
02b47f43 2370
70fb28d9
RG
2371 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
2372 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
2373 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
8467ec26 2374 vState valState;
c15ff3df 2375 if (qnameParsed) {
08b02366 2376 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
c15ff3df
RG
2377 }
2378 else {
08b02366 2379 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, &qtype, &qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
c15ff3df
RG
2380 }
2381
d9d3f9c1 2382 if (cacheHit) {
8467ec26
KM
2383 if(valState == Bogus) {
2384 if(t_bogusremotes)
2385 t_bogusremotes->push_back(source);
2386 if(t_bogusqueryring)
2387 t_bogusqueryring->push_back(make_pair(qname, qtype));
2388 }
2389
d9d3f9c1 2390#ifdef HAVE_PROTOBUF
b773359c 2391 if(t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && pbMessage->getAppliedPolicy().empty() && pbMessage->getPolicyTags().empty())) {
5cc8371b 2392 Netmask requestorNM(source, source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
5d2e9a83
RG
2393 ComboAddress requestor = requestorNM.getMaskedNetwork();
2394 requestor.setPort(source.getPort());
d362f7c1 2395 pbMessage->update(uniqueId, &requestor, &destination, false, dh->id);
d14121a8 2396 pbMessage->setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIPv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
c29d820c
RG
2397 if (g_useKernelTimestamp && tv.tv_sec) {
2398 pbMessage->setQueryTime(tv.tv_sec, tv.tv_usec);
2399 }
2400 else {
2401 pbMessage->setQueryTime(g_now.tv_sec, g_now.tv_usec);
2402 }
d362f7c1
RG
2403 pbMessage->setRequestorId(requestorId);
2404 pbMessage->setDeviceId(deviceId);
0a6a45c8 2405 pbMessage->setDeviceName(deviceName);
b773359c 2406 protobufLogResponse(*pbMessage);
02b47f43 2407 }
d9d3f9c1 2408#endif /* HAVE_PROTOBUF */
49a3500d 2409 if(!g_quiet)
e6a9dde5 2410 g_log<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<endl;
8f7473d7 2411
1bc3c142
BH
2412 g_stats.packetCacheHits++;
2413 SyncRes::s_queries++;
2414 ageDNSPacket(response, age);
b71b60ee 2415 struct msghdr msgh;
2416 struct iovec iov;
7bec330a
OM
2417 cmsgbuf_aligned cbuf;
2418 fillMSGHdr(&msgh, &iov, &cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
2c0af54f
PD
2419 msgh.msg_control=NULL;
2420
cbc03320 2421 if(g_fromtosockets.count(fd)) {
7bec330a 2422 addCMsgSrcAddr(&msgh, &cbuf, &destaddr, 0);
b71b60ee 2423 }
a2a81d42
OM
2424 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors) {
2425 int err = errno;
2426 g_log << Logger::Warning << "Sending UDP reply to client " << source.toStringWithPort()
2427 << (source != fromaddr ? " (via " + fromaddr.toStringWithPort() + ")" : "") << " failed with: "
2428 << strerror(err) << endl;
2429 }
97bee66d 2430 if(response.length() >= sizeof(struct dnsheader)) {
dd079764
RG
2431 struct dnsheader tmpdh;
2432 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
5cc8371b 2433 updateResponseStats(tmpdh.rcode, source, response.length(), 0, 0);
97bee66d 2434 }
08f3f638 2435 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
19178da9 2436 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
1bc3c142
BH
2437 return 0;
2438 }
3ddb9247 2439 }
1bc3c142 2440 catch(std::exception& e) {
4b4566e8
RG
2441 if(g_logCommonErrors)
2442 g_log<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1bc3c142
BH
2443 return 0;
2444 }
3ddb9247 2445
f26bf547 2446 if(t_pdl) {
5cc8371b 2447 if(t_pdl->ipfilter(source, destination, *dh)) {
4ea94941 2448 if(!g_quiet)
e6a9dde5 2449 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" based on policy"<<endl;
4ea94941 2450 g_stats.policyDrops++;
2451 return 0;
2452 }
2453 }
2454
1bc3c142 2455 if(MT->numProcesses() > g_maxMThreads) {
461df9d2 2456 if(!g_quiet)
e6a9dde5 2457 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<", over capacity"<<endl;
461df9d2 2458
1bc3c142
BH
2459 g_stats.overCapacityDrops++;
2460 return 0;
2461 }
3ddb9247 2462
37a919d4 2463 auto dc = std::unique_ptr<DNSComboWriter>(new DNSComboWriter(question, g_now, std::move(policyTags), std::move(data), std::move(records)));
1bc3c142 2464 dc->setSocket(fd);
49a3500d 2465 dc->d_tag=ctag;
e9f63d47 2466 dc->d_qhash=qhash;
5cc8371b
RG
2467 dc->setRemote(fromaddr);
2468 dc->setSource(source);
b71b60ee 2469 dc->setLocal(destaddr);
5cc8371b 2470 dc->setDestination(destination);
1bc3c142 2471 dc->d_tcp=false;
b40562da
RG
2472 dc->d_ecsFound = ecsFound;
2473 dc->d_ecsParsed = ecsParsed;
08b02366
RG
2474 dc->d_ecsBegin = ecsBegin;
2475 dc->d_ecsEnd = ecsEnd;
b40562da 2476 dc->d_ednssubnet = ednssubnet;
70fb28d9
RG
2477 dc->d_ttlCap = ttlCap;
2478 dc->d_variable = variable;
37a919d4
RG
2479 dc->d_followCNAMERecords = followCNAMEs;
2480 dc->d_rcode = rcode;
406b722e 2481 dc->d_logResponse = logResponse;
aa7929a3 2482#ifdef HAVE_PROTOBUF
b773359c 2483 if (t_protobufServers || t_outgoingProtobufServers) {
5164bac3 2484 dc->d_uuid = std::move(uniqueId);
d9d3f9c1 2485 }
67e31ebe 2486 dc->d_requestorId = requestorId;
590388d2 2487 dc->d_deviceId = deviceId;
0a6a45c8 2488 dc->d_deviceName = deviceName;
c29d820c 2489 dc->d_kernelTimestamp = tv;
aa7929a3
RG
2490#endif
2491
9a864da4 2492 MT->makeThread(startDoResolve, (void*) dc.release()); // deletes dc
1bc3c142 2493 return 0;
3ddb9247
PD
2494}
2495
b71b60ee 2496
d187038c 2497static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
5db529f8 2498{
a683e8bd 2499 ssize_t len;
12c2f2b9 2500 static const size_t maxIncomingQuerySize = 512;
04896b99 2501 static thread_local std::string data;
5db529f8 2502 ComboAddress fromaddr;
b71b60ee 2503 struct msghdr msgh;
2504 struct iovec iov;
7bec330a 2505 cmsgbuf_aligned cbuf;
390f1dab 2506 bool firstQuery = true;
b71b60ee 2507
c0a00acd
RG
2508 for(size_t queriesCounter = 0; queriesCounter < s_maxUDPQueriesPerRound; queriesCounter++) {
2509 data.resize(maxIncomingQuerySize);
2510 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
7bec330a 2511 fillMSGHdr(&msgh, &iov, &cbuf, sizeof(cbuf), &data[0], data.size(), &fromaddr);
b71b60ee 2512
c0a00acd 2513 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
390f1dab 2514
c0a00acd 2515 firstQuery = false;
390f1dab 2516
c0a00acd
RG
2517 if (static_cast<size_t>(len) < sizeof(dnsheader)) {
2518 g_stats.ignoredCount++;
2519 if (!g_quiet) {
2520 g_log<<Logger::Error<<"Ignoring too-short ("<<std::to_string(len)<<") query from "<<fromaddr.toString()<<endl;
2521 }
2522 return;
04896b99 2523 }
04896b99 2524
c0a00acd
RG
2525 if (msgh.msg_flags & MSG_TRUNC) {
2526 g_stats.truncatedDrops++;
2527 if (!g_quiet) {
2528 g_log<<Logger::Error<<"Ignoring truncated query from "<<fromaddr.toString()<<endl;
2529 }
2530 return;
ba892c7f 2531 }
b23b8614 2532
c0a00acd
RG
2533 if(t_remotes) {
2534 t_remotes->push_back(fromaddr);
2535 }
81859ba5 2536
c0a00acd
RG
2537 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
2538 if(!g_quiet) {
2539 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2540 }
3ddb9247 2541
c0a00acd
RG
2542 g_stats.unauthorizedUDP++;
2543 return;
5db529f8 2544 }
c0a00acd
RG
2545 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
2546 if(!fromaddr.sin4.sin_port) { // also works for IPv6
2547 if(!g_quiet) {
2548 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
2549 }
2550
2551 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
2552 return;
3abcdab2 2553 }
c0a00acd
RG
2554
2555 try {
2556 data.resize(static_cast<size_t>(len));
2557 dnsheader* dh=(dnsheader*)&data[0];
2558
2559 if(dh->qr) {
2560 g_stats.ignoredCount++;
2561 if(g_logCommonErrors) {
2562 g_log<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
2563 }
2564 }
2565 else if(dh->opcode) {
2566 g_stats.ignoredCount++;
2567 if(g_logCommonErrors) {
2568 g_log<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
2569 }
a6147cd2 2570 }
c0f9be19
RG
2571 else if (dh->qdcount == 0) {
2572 g_stats.emptyQueriesCount++;
2573 if(g_logCommonErrors) {
2574 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<<fromaddr.toString()<<" on server socket!"<<endl;
2575 }
2576 }
a6147cd2 2577 else {
c0a00acd
RG
2578 struct timeval tv={0,0};
2579 HarvestTimestamp(&msgh, &tv);
2580 ComboAddress dest;
2581 dest.reset(); // this makes sure we ignore this address if not returned by recvmsg above
2582 auto loc = rplookup(g_listenSocketsAddresses, fd);
2583 if(HarvestDestinationAddress(&msgh, &dest)) {
2584 // but.. need to get port too
2585 if(loc) {
2586 dest.sin4.sin_port = loc->sin4.sin_port;
2587 }
a6147cd2 2588 }
2589 else {
c0a00acd
RG
2590 if(loc) {
2591 dest = *loc;
2592 }
2593 else {
2594 dest.sin4.sin_family = fromaddr.sin4.sin_family;
2595 socklen_t slen = dest.getSocklen();
2596 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
2597 }
2598 }
2599
2600 if(g_weDistributeQueries) {
2601 distributeAsyncFunction(data, boost::bind(doProcessUDPQuestion, data, fromaddr, dest, tv, fd));
2602 }
2603 else {
144040be 2604 ++s_threadInfos[t_id].numberOfDistributedQueries;
c0a00acd 2605 doProcessUDPQuestion(data, fromaddr, dest, tv, fd);
a6147cd2 2606 }
2607 }
c0a00acd 2608 }
16ce7f18 2609 catch(const MOADNSException &mde) {
c0a00acd
RG
2610 g_stats.clientParseError++;
2611 if(g_logCommonErrors) {
2612 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
2613 }
2614 }
2615 catch(const std::runtime_error& e) {
2616 g_stats.clientParseError++;
2617 if(g_logCommonErrors) {
2618 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
2619 }
5db529f8
BH
2620 }
2621 }
c0a00acd
RG
2622 else {
2623 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
2624 if(firstQuery && errno == EAGAIN) {
2625 g_stats.noPacketError++;
2626 }
390f1dab 2627
c0a00acd
RG
2628 break;
2629 }
ac0e821b 2630 }
5db529f8
BH
2631}
2632
adb6cd72 2633static void makeTCPServerSockets(deferredAdd_t& deferredAdds, std::set<int>& tcpSockets)
9c495589 2634{
37d3f960 2635 int fd;
f28307ad 2636 vector<string>locals;
2e3d8a19 2637 stringtok(locals,::arg()["local-address"]," ,");
9c495589 2638
f28307ad 2639 if(locals.empty())
3f81d239 2640 throw PDNSException("No local address specified");
3ddb9247 2641
f28307ad 2642 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
2643 ServiceTuple st;
2644 st.port=::arg().asNum("local-port");
2645 parseService(*i, st);
3ddb9247 2646
32252594
BH
2647 ComboAddress sin;
2648
d38e2ba9 2649 sin.reset();
37d3f960 2650 sin.sin4.sin_family = AF_INET;
32252594 2651 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 2652 sin.sin6.sin6_family = AF_INET6;
f71bc087 2653 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 2654 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
37d3f960
BH
2655 }
2656
2657 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
3ddb9247 2658 if(fd<0)
3f81d239 2659 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
f28307ad 2660
3897b9e1 2661 setCloseOnExec(fd);
a903b39c 2662
f28307ad 2663 int tmp=1;
810ff705 2664 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
e6a9dde5 2665 g_log<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
c8ddb7c2 2666 exit(1);
f28307ad 2667 }
0dfa94ab 2668 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
a2a81d42
OM
2669 int err = errno;
2670 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(err)<<endl;
0dfa94ab 2671 }
2672
c8ddb7c2 2673#ifdef TCP_DEFER_ACCEPT
38ac0821 2674 if(setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
37d3f960 2675 if(i==locals.begin())
377602e3 2676 g_log<<Logger::Info<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
c8ddb7c2
BH
2677 }
2678#endif
2679
fec7dd5a
SS
2680 if( ::arg().mustDo("non-local-bind") )
2681 Utility::setBindAny(AF_INET, fd);
2682
2332f42d 2683#ifdef SO_REUSEPORT
810ff705
RG
2684 if(g_reusePort) {
2685 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
2332f42d 2686 throw PDNSException("SO_REUSEPORT: "+stringerror());
2687 }
2688#endif
2689
0735b17e
RG
2690 if (::arg().asNum("tcp-fast-open") > 0) {
2691#ifdef TCP_FASTOPEN
2692 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
2693 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
a2a81d42
OM
2694 int err = errno;
2695 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(err)<<endl;
0735b17e
RG
2696 }
2697#else
e6a9dde5 2698 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
0735b17e
RG
2699#endif
2700 }
2701
32252594 2702 sin.sin4.sin_port = htons(st.port);
a683e8bd 2703 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
3ddb9247 2704 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
3f81d239 2705 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
3ddb9247 2706
3897b9e1 2707 setNonBlocking(fd);
49a699c4 2708 setSocketSendBuffer(fd, 65000);
37d3f960 2709 listen(fd, 128);
b243ca3b 2710 deferredAdds.push_back(make_pair(fd, handleNewTCPQuestion));
adb6cd72
RG
2711 tcpSockets.insert(fd);
2712
84433b79 2713 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
2714 // - fd is not that which we know here, but returned from accept()
3ddb9247 2715 if(sin.sin4.sin_family == AF_INET)
377602e3 2716 g_log<<Logger::Info<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 2717 else
377602e3 2718 g_log<<Logger::Info<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 2719 }
9c495589
BH
2720}
2721
b243ca3b 2722static void makeUDPServerSockets(deferredAdd_t& deferredAdds)
288f4aa9 2723{
fec7dd5a 2724 int one=1;
f28307ad 2725 vector<string>locals;
2e3d8a19 2726 stringtok(locals,::arg()["local-address"]," ,");
288f4aa9 2727
f28307ad 2728 if(locals.empty())
3f81d239 2729 throw PDNSException("No local address specified");
3ddb9247 2730
f28307ad 2731 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
2732 ServiceTuple st;
2733 st.port=::arg().asNum("local-port");
2734 parseService(*i, st);
2735
37d3f960 2736 ComboAddress sin;
996c89cc 2737
d38e2ba9 2738 sin.reset();
37d3f960 2739 sin.sin4.sin_family = AF_INET;
32252594 2740 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 2741 sin.sin6.sin6_family = AF_INET6;
f71bc087 2742 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 2743 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
37d3f960 2744 }
3ddb9247 2745
bb4bdbaf 2746 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
d3b4137e 2747 if(fd < 0) {
a2a81d42 2748 throw PDNSException("Making a UDP server socket for resolver: "+stringerror());
d3b4137e 2749 }
915b0c39 2750 if (!setSocketTimestamps(fd))
e6a9dde5 2751 g_log<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
0dfa94ab 2752
b71b60ee 2753 if(IsAnyAddress(sin)) {
cbc03320 2754 if(sin.sin4.sin_family == AF_INET)
2755 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
2756 g_fromtosockets.insert(fd);
757d3179 2757#ifdef IPV6_RECVPKTINFO
cbc03320 2758 if(sin.sin4.sin_family == AF_INET6)
2759 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
2760 g_fromtosockets.insert(fd);
757d3179 2761#endif
0dfa94ab 2762 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
a2a81d42
OM
2763 int err = errno;
2764 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(err)<<endl;
0dfa94ab 2765 }
b71b60ee 2766 }
fec7dd5a
SS
2767 if( ::arg().mustDo("non-local-bind") )
2768 Utility::setBindAny(AF_INET6, fd);
2769
3897b9e1 2770 setCloseOnExec(fd);
a903b39c 2771
4e9a20e6 2772 setSocketReceiveBuffer(fd, 250000);
32252594 2773 sin.sin4.sin_port = htons(st.port);
37d3f960 2774
2332f42d 2775
2573d4a6 2776#ifdef SO_REUSEPORT
810ff705 2777 if(g_reusePort) {
2332f42d 2778 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
2779 throw PDNSException("SO_REUSEPORT: "+stringerror());
2780 }
2781#endif
90f9fbc0
RG
2782
2783 if (sin.isIPv4()) {
2784 try {
2785 setSocketIgnorePMTU(fd);
2786 }
2787 catch(const std::exception& e) {
2788 g_log<<Logger::Warning<<"Failed to set IP_MTU_DISCOVER on UDP server socket: "<<e.what()<<endl;
2789 }
2790 }
2791
2792 socklen_t socklen=sin.getSocklen();
3ddb9247 2793 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
335da0ba 2794 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
3ddb9247 2795
3897b9e1 2796 setNonBlocking(fd);
c2136bf0 2797
b243ca3b 2798 deferredAdds.push_back(make_pair(fd, handleNewUDPQuestion));
40a3dd64 2799 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
3ddb9247 2800 if(sin.sin4.sin_family == AF_INET)
377602e3 2801 g_log<<Logger::Info<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 2802 else
377602e3 2803 g_log<<Logger::Info<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 2804 }
c836dc19 2805}
caa6eefa 2806
d187038c 2807static void daemonize(void)
c836dc19
BH
2808{
2809 if(fork())
2810 exit(0); // bye bye
3ddb9247
PD
2811
2812 setsid();
c836dc19 2813
27a5ead5 2814 int i=open("/dev/null",O_RDWR); /* open stdin */
3ddb9247 2815 if(i < 0)
e6a9dde5 2816 g_log<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
27a5ead5
BH
2817 else {
2818 dup2(i,0); /* stdin */
2819 dup2(i,1); /* stderr */
2820 dup2(i,2); /* stderr */
2821 close(i);
2822 }
288f4aa9 2823}
caa6eefa 2824
9f374187
FL
2825static void termIntHandler(int)
2826{
cd180a71 2827 doExit();
9f374187
FL
2828}
2829
d187038c 2830static void usr1Handler(int)
c75a6a9e
BH
2831{
2832 statsWanted=true;
2833}
ae1b2e98 2834
d187038c 2835static void usr2Handler(int)
9170fbaf 2836{
f1f34cc2 2837 g_quiet= !g_quiet;
2838 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
2839 ::arg().set("quiet")=g_quiet ? "" : "no";
9170fbaf
BH
2840}
2841
d187038c 2842static void doStats(void)
c75a6a9e 2843{
16beeaa4
BH
2844 static time_t lastOutputTime;
2845 static uint64_t lastQueryCount;
d299d4f5 2846
cdde2458
OM
2847 uint64_t cacheHits = s_RC->cacheHits;
2848 uint64_t cacheMisses = s_RC->cacheMisses;
2849 uint64_t cacheSize = s_RC->size();
7ce9aad6
OM
2850 auto rc_stats = s_RC->stats();
2851 double r = rc_stats.second == 0 ? 0.0 : (100.0 * rc_stats.first / rc_stats.second);
2852
d299d4f5 2853 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
e6a9dde5 2854 g_log<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
cdde2458 2855 cacheSize << " cache entries, "<<
3427fa8a 2856 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
3ddb9247 2857 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
7ce9aad6 2858 g_log << Logger::Notice<< "stats: cache contended/acquired " << rc_stats.first << '/' << rc_stats.second << " = " << r << '%' << endl;
3ddb9247 2859
e6a9dde5 2860 g_log<<Logger::Notice<<"stats: throttle map: "
3427fa8a 2861 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
60e5208a 2862 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<", failed ns: "
bbc7101c
OM
2863 << broadcastAccFunction<uint64_t>(pleaseGetFailedServersSize)<<", ednsmap: "
2864 <<broadcastAccFunction<uint64_t>(pleaseGetEDNSStatusesSize)<<endl;
e6a9dde5
PL
2865 g_log<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2866 g_log<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
525b8a7c 2867 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
e6a9dde5 2868 g_log<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
3427fa8a 2869 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
81883dcc 2870
e6a9dde5 2871 //g_log<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
16beeaa4 2872 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
3ddb9247 2873
e6a9dde5 2874 g_log<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
16beeaa4 2875 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
3ddb9247 2876
144040be
RG
2877 size_t idx = 0;
2878 for (const auto& threadInfo : s_threadInfos) {
2879 if(threadInfo.isWorker) {
ad9fc3dc 2880 g_log<<Logger::Notice<<"stats: thread "<<idx<<" has been distributed "<<threadInfo.numberOfDistributedQueries<<" queries"<<endl;
144040be
RG
2881 ++idx;
2882 }
2883 }
2884
16beeaa4
BH
2885 time_t now = time(0);
2886 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
e6a9dde5 2887 g_log<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
16beeaa4
BH
2888 }
2889 lastOutputTime = now;
2890 lastQueryCount = SyncRes::s_queries;
c75a6a9e 2891 }
3ddb9247 2892 else if(statsWanted)
e6a9dde5 2893 g_log<<Logger::Notice<<"stats: no stats yet!"<<endl;
7becf07f 2894
c75a6a9e
BH
2895 statsWanted=false;
2896}
c836dc19 2897
29f0b1ce 2898static void houseKeeping(void *)
c836dc19 2899{
cdde2458
OM
2900 static thread_local time_t last_rootupdate, last_secpoll, last_trustAnchorUpdate{0}, last_RC_prune;
2901 static thread_local struct timeval last_prune;
2902
3337c2f7
RG
2903 static thread_local int cleanCounter=0;
2904 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
e4ae55e5
PL
2905 auto luaconfsLocal = g_luaconfs.getLocal();
2906
2907 if (last_trustAnchorUpdate == 0 && !luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0) {
2908 // Loading the Lua config file already "refreshed" the TAs
2909 last_trustAnchorUpdate = g_now.tv_sec + luaconfsLocal->trustAnchorFileInfo.interval * 3600;
2910 }
2911
cc59bce6 2912 try {
6b0d90ea 2913 if(s_running) {
cc59bce6 2914 return;
6b0d90ea 2915 }
cc59bce6 2916 s_running=true;
3ddb9247 2917
b9715061
OM
2918 struct timeval now, past;
2919 Utility::gettimeofday(&now, nullptr);
2920 past = now;
2921 past.tv_sec -= 5;
2922 if (last_prune < past) {
a6f7f5fe 2923 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
a6f7f5fe 2924 SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
3ddb9247 2925
b9715061 2926 time_t limit;
cc59bce6 2927 if(!((cleanCounter++)%40)) { // this is a full scan!
b9715061 2928 limit=now.tv_sec-300;
a712cb56 2929 SyncRes::pruneNSSpeeds(limit);
cc59bce6 2930 }
b9715061
OM
2931 limit = now.tv_sec - SyncRes::s_serverdownthrottletime * 10;
2932 SyncRes::pruneFailedServers(limit);
2933 limit = now.tv_sec - 2*3600;
2934 SyncRes::pruneEDNSStatuses(limit);
2935 SyncRes::pruneThrottledServers();
2936 Utility::gettimeofday(&last_prune, nullptr);
d67620e4 2937 }
3ddb9247 2938
b243ca3b 2939 if(isHandlerThread()) {
cdde2458
OM
2940 if (now.tv_sec - last_RC_prune > 5) {
2941 s_RC->doPrune(g_maxCacheEntries);
2942 last_RC_prune = now.tv_sec;
2943 }
2944 // XXX !!! global
2945 if(now.tv_sec - last_rootupdate > 7200) {
2946 int res = SyncRes::getRootNS(g_now, nullptr);
2947 if (!res) {
2948 last_rootupdate=now.tv_sec;
2949 primeRootNSZones(g_dnssecmode != DNSSECMode::Off);
2950 }
2951 }
3ddb9247 2952
cc59bce6 2953 if(now.tv_sec - last_secpoll >= 3600) {
2954 try {
2955 doSecPoll(&last_secpoll);
2956 }
124dd1d4 2957 catch(const std::exception& e)
581d4ea3 2958 {
e6a9dde5 2959 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
581d4ea3 2960 }
124dd1d4 2961 catch(const PDNSException& e)
47e9b74f 2962 {
e6a9dde5 2963 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
47e9b74f 2964 }
124dd1d4 2965 catch(const ImmediateServFailException &e)
d0992a65 2966 {
e6a9dde5 2967 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
d0992a65 2968 }
124dd1d4
RG
2969 catch(const PolicyHitException& e) {
2970 g_log<<Logger::Error<<"Policy hit while performing security poll"<<endl;
2971 }
47e9b74f 2972 catch(...)
2973 {
e6a9dde5 2974 g_log<<Logger::Error<<"Exception while performing security poll"<<endl;
47e9b74f 2975 }
18b73338 2976 }
e4ae55e5
PL
2977
2978 if (!luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0 &&
2979 g_now.tv_sec - last_trustAnchorUpdate >= (luaconfsLocal->trustAnchorFileInfo.interval * 3600)) {
2980 g_log<<Logger::Debug<<"Refreshing Trust Anchors from file"<<endl;
2981 try {
2982 map<DNSName, dsmap_t> dsAnchors;
2983 if (updateTrustAnchorsFromFile(luaconfsLocal->trustAnchorFileInfo.fname, dsAnchors)) {
2984 g_luaconfs.modify([&dsAnchors](LuaConfigItems& lci) {
2985 lci.dsAnchors = dsAnchors;
2986 });
2987 }
2988 last_trustAnchorUpdate = now.tv_sec;
2989 } catch (const PDNSException &pe) {
2990 g_log<<Logger::Error<<"Unable to update Trust Anchors: "<<pe.reason<<endl;
2991 }
2992 }
d67620e4 2993 }
6b0d90ea 2994 s_running=false;
d67620e4 2995 }
cc59bce6 2996 catch(PDNSException& ae)
2997 {
2998 s_running=false;
e6a9dde5 2999 g_log<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
cc59bce6 3000 throw;
3001 }
779828c4 3002}
d6d5dea7 3003
d187038c 3004static void makeThreadPipes()
49a699c4 3005{
ee271fc4
RG
3006 auto pipeBufferSize = ::arg().asNum("distribution-pipe-buffer-size");
3007 if (pipeBufferSize > 0) {
3008 g_log<<Logger::Info<<"Resizing the buffer of the distribution pipe to "<<pipeBufferSize<<endl;
3009 }
3010
b243ca3b
RG
3011 /* thread 0 is the handler / SNMP, we start at 1 */
3012 for(unsigned int n = 1; n <= (g_numWorkerThreads + g_numDistributorThreads); ++n) {
3013 auto& threadInfos = s_threadInfos.at(n);
3014
49a699c4
BH
3015 int fd[2];
3016 if(pipe(fd) < 0)
3017 unixDie("Creating pipe for inter-thread communications");
3ddb9247 3018
b243ca3b
RG
3019 threadInfos.pipes.readToThread = fd[0];
3020 threadInfos.pipes.writeToThread = fd[1];
3ddb9247 3021
49a699c4
BH
3022 if(pipe(fd) < 0)
3023 unixDie("Creating pipe for inter-thread communications");
b243ca3b
RG
3024
3025 threadInfos.pipes.readFromThread = fd[0];
3026 threadInfos.pipes.writeFromThread = fd[1];
3ddb9247 3027
cf8cda18
RG
3028 if(pipe(fd) < 0)
3029 unixDie("Creating pipe for inter-thread communications");
d10307c5 3030
b243ca3b
RG
3031 threadInfos.pipes.readQueriesToThread = fd[0];
3032 threadInfos.pipes.writeQueriesToThread = fd[1];
3033
ee271fc4
RG
3034 if (pipeBufferSize > 0) {
3035 if (!setPipeBufferSize(threadInfos.pipes.writeQueriesToThread, pipeBufferSize)) {
a2a81d42
OM
3036 int err = errno;
3037 g_log<<Logger::Warning<<"Error resizing the buffer of the distribution pipe for thread "<<n<<" to "<<pipeBufferSize<<": "<<strerror(err)<<endl;
ee271fc4
RG
3038 auto existingSize = getPipeBufferSize(threadInfos.pipes.writeQueriesToThread);
3039 if (existingSize > 0) {
3040 g_log<<Logger::Warning<<"The current size of the distribution pipe's buffer for thread "<<n<<" is "<<existingSize<<endl;
3041 }
3042 }
3043 }
3044
b243ca3b 3045 if (!setNonBlocking(threadInfos.pipes.writeQueriesToThread)) {
d10307c5
RG
3046 unixDie("Making pipe for inter-thread communications non-blocking");
3047 }
49a699c4
BH
3048 }
3049}
3050
00c9b8c1
BH
3051struct ThreadMSG
3052{
3053 pipefunc_t func;
3054 bool wantAnswer;
3055};
3056
b4e76a18 3057void broadcastFunction(const pipefunc_t& func)
49a699c4 3058{
b243ca3b
RG
3059 /* This function might be called by the worker with t_id 0 during startup
3060 for the initialization of ACLs and domain maps. After that it should only
3061 be called by the handler. */
d77abca1 3062
b243ca3b
RG
3063 if (s_threadInfos.empty() && isHandlerThread()) {
3064 /* the handler and distributors will call themselves below, but
3065 during startup we get called while s_threadInfos has not been
3066 populated yet to update the ACL or domain maps, so we need to
3067 handle that case.
3068 */
3069 func();
3070 }
b4e76a18 3071
b243ca3b
RG
3072 unsigned int n = 0;
3073 for (const auto& threadInfo : s_threadInfos) {
49a699c4 3074 if(n++ == t_id) {
b4e76a18 3075 func(); // don't write to ourselves!
49a699c4
BH
3076 continue;
3077 }
3ddb9247 3078
00c9b8c1
BH
3079 ThreadMSG* tmsg = new ThreadMSG();
3080 tmsg->func = func;
3081 tmsg->wantAnswer = true;
b243ca3b 3082 if(write(threadInfo.pipes.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
b841314c 3083 delete tmsg;
b243ca3b 3084
49a699c4 3085 unixDie("write to thread pipe returned wrong size or error");
b841314c 3086 }
3ddb9247 3087
49467864 3088 string* resp = nullptr;
b243ca3b 3089 if(read(threadInfo.pipes.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
49a699c4 3090 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 3091
49a699c4 3092 if(resp) {
49a699c4 3093 delete resp;
49467864 3094 resp = nullptr;
49a699c4
BH
3095 }
3096 }
3097}
06ea9015 3098
592d7ade 3099static bool trySendingQueryToWorker(unsigned int target, ThreadMSG* tmsg)
00c9b8c1 3100{
144040be 3101 auto& targetInfo = s_threadInfos[target];
b243ca3b
RG
3102 if(!targetInfo.isWorker) {
3103 g_log<<Logger::Error<<"distributeAsyncFunction() tried to assign a query to a non-worker thread"<<endl;
d77abca1 3104 exit(1);
00c9b8c1 3105 }
d77abca1 3106
b243ca3b 3107 const auto& tps = targetInfo.pipes;
3ddb9247 3108
cf8cda18
RG
3109 ssize_t written = write(tps.writeQueriesToThread, &tmsg, sizeof(tmsg));
3110 if (written > 0) {
3111 if (static_cast<size_t>(written) != sizeof(tmsg)) {
3112 delete tmsg;
3113 unixDie("write to thread pipe returned wrong size or error");
3114 }
3115 }
3116 else {
3117 int error = errno;
cf8cda18 3118 if (error == EAGAIN || error == EWOULDBLOCK) {
592d7ade 3119 return false;
cf8cda18 3120 } else {
592d7ade 3121 delete tmsg;
17634427 3122 unixDie("write to thread pipe returned wrong size or error:" + std::to_string(error));
cf8cda18 3123 }
b841314c 3124 }
592d7ade 3125
144040be
RG
3126 ++targetInfo.numberOfDistributedQueries;
3127
592d7ade
RG
3128 return true;
3129}
3130
144040be
RG
3131static unsigned int getWorkerLoad(size_t workerIdx)
3132{
3133 const auto mt = s_threadInfos[/* skip handler */ 1 + g_numDistributorThreads + workerIdx].mt;
3134 if (mt != nullptr) {
3135 return mt->numProcesses();
3136 }
3137 return 0;
3138}
3139
3140static unsigned int selectWorker(unsigned int hash)
3141{
3142 if (s_balancingFactor == 0) {
3143 return /* skip handler */ 1 + g_numDistributorThreads + (hash % g_numWorkerThreads);
3144 }
3145
3146 /* we start with one, representing the query we are currently handling */
3147 double currentLoad = 1;
3148 std::vector<unsigned int> load(g_numWorkerThreads);
3149 for (size_t idx = 0; idx < g_numWorkerThreads; idx++) {
3150 load[idx] = getWorkerLoad(idx);
3151 currentLoad += load[idx];
3152 // cerr<<"load for worker "<<idx<<" is "<<load[idx]<<endl;
3153 }
3154
3155 double targetLoad = (currentLoad / g_numWorkerThreads) * s_balancingFactor;
3156 // cerr<<"total load is "<<currentLoad<<", number of workers is "<<g_numWorkerThreads<<", target load is "<<targetLoad<<endl;
3157
3158 unsigned int worker = hash % g_numWorkerThreads;
1b9d2d46 3159 /* at least one server has to be at or below the average load */
596bf482
RG
3160 if (load[worker] > targetLoad) {
3161 ++g_stats.rebalancedQueries;
3162 do {
3163 // cerr<<"worker "<<worker<<" is above the target load, selecting another one"<<endl;
3164 worker = (worker + 1) % g_numWorkerThreads;
3165 }
3166 while(load[worker] > targetLoad);
144040be
RG
3167 }
3168
3169 return /* skip handler */ 1 + g_numDistributorThreads + worker;
3170}
3171
592d7ade
RG
3172// This function is only called by the distributor threads, when pdns-distributes-queries is set
3173void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
3174{
3175 if (!isDistributorThread()) {
3176 g_log<<Logger::Error<<"distributeAsyncFunction() has been called by a worker ("<<t_id<<")"<<endl;
3177 exit(1);
3178 }
3179
3180 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
144040be 3181 unsigned int target = selectWorker(hash);
592d7ade
RG
3182
3183 ThreadMSG* tmsg = new ThreadMSG();
3184 tmsg->func = func;
3185 tmsg->wantAnswer = false;
3186
3187 if (!trySendingQueryToWorker(target, tmsg)) {
3188 /* if this function failed but did not raise an exception, it means that the pipe
3189 was full, let's try another one */
3190 unsigned int newTarget = 0;
3191 do {
3192 newTarget = /* skip handler */ 1 + g_numDistributorThreads + dns_random(g_numWorkerThreads);
3193 } while (newTarget == target);
3194
3195 if (!trySendingQueryToWorker(newTarget, tmsg)) {
3196 g_stats.queryPipeFullDrops++;
3197 delete tmsg;
3198 }
3199 }
00c9b8c1 3200}
3427fa8a 3201
d187038c 3202static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
49a699c4 3203{
f26bf547 3204 ThreadMSG* tmsg = nullptr;
3ddb9247 3205
cf8cda18 3206 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread || fd == readQueriesToThread
49a699c4
BH
3207 unixDie("read from thread pipe returned wrong size or error");
3208 }
3ddb9247 3209
2f22827a 3210 void *resp=0;
3211 try {
3212 resp = tmsg->func();
3213 }
3214 catch(std::exception& e) {
6d2010a8 3215 if(g_logCommonErrors)
e6a9dde5 3216 g_log<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2f22827a 3217 }
3218 catch(PDNSException& e) {
6d2010a8 3219 if(g_logCommonErrors)
e6a9dde5 3220 g_log<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2f22827a 3221 }
d7c676a5 3222 if(tmsg->wantAnswer) {
b243ca3b
RG
3223 const auto& threadInfo = s_threadInfos.at(t_id);
3224 if(write(threadInfo.pipes.writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
d7c676a5 3225 delete tmsg;
00c9b8c1 3226 unixDie("write to thread pipe returned wrong size or error");
d7c676a5
RG
3227 }
3228 }
3ddb9247 3229
00c9b8c1 3230 delete tmsg;
49a699c4 3231}
09e6702a 3232
13034931
BH
3233template<class T> void *voider(const boost::function<T*()>& func)
3234{
3235 return func();
3236}
3237
b3b5459d
BH
3238vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
3239{
3240 a.insert(a.end(), b.begin(), b.end());
3241 return a;
3242}
3243
92011b8f 3244vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
3245{
3246 a.insert(a.end(), b.begin(), b.end());
3247 return a;
3248}
3249
3ddb9247
PD
3250vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
3251{
3252 a.insert(a.end(), b.begin(), b.end());
3253 return a;
3254}
3255
92011b8f 3256
387b9ca6
RG
3257/*
3258 This function should only be called by the handler to gather metrics, wipe the cache,
788eeb4c
RG
3259 reload the Lua script (not the Lua config) or change the current trace regex,
3260 and by the SNMP thread to gather metrics. */
b4e76a18 3261template<class T> T broadcastAccFunction(const boost::function<T*()>& func)
3427fa8a 3262{
b243ca3b 3263 if (!isHandlerThread()) {
788eeb4c 3264 g_log<<Logger::Error<<"broadcastAccFunction has been called by a worker ("<<t_id<<")"<<endl;
d77abca1 3265 exit(1);
d77abca1
RG
3266 }
3267
b243ca3b 3268 unsigned int n = 0;
3427fa8a 3269 T ret=T();
b243ca3b
RG
3270 for (const auto& threadInfo : s_threadInfos) {
3271 if (n++ == t_id) {
3272 continue;
3273 }
3274
3275 const auto& tps = threadInfo.pipes;
00c9b8c1
BH
3276 ThreadMSG* tmsg = new ThreadMSG();
3277 tmsg->func = boost::bind(voider<T>, func);
3278 tmsg->wantAnswer = true;
3ddb9247 3279
b841314c
RG
3280 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
3281 delete tmsg;
3427fa8a 3282 unixDie("write to thread pipe returned wrong size or error");
b841314c 3283 }
3ddb9247 3284
49467864 3285 T* resp = nullptr;
3427fa8a
BH
3286 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
3287 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 3288
3427fa8a 3289 if(resp) {
3427fa8a
BH
3290 ret += *resp;
3291 delete resp;
49467864 3292 resp = nullptr;
3427fa8a
BH
3293 }
3294 }
3295 return ret;
3296}
3297
b4e76a18
RG
3298template string broadcastAccFunction(const boost::function<string*()>& fun); // explicit instantiation
3299template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun); // explicit instantiation
3300template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun); // explicit instantiation
3301template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun); // explicit instantiation
5ac6d761 3302template ThreadTimes broadcastAccFunction(const boost::function<ThreadTimes*()>& fun);
3427fa8a 3303
d187038c 3304static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3305{
fbfc1809
RG
3306 try {
3307 string remote;
3308 string msg=s_rcc.recv(&remote);
3309 RecursorControlParser rcp;
3310 RecursorControlParser::func_t* command;
3ddb9247 3311
fbfc1809 3312 string answer=rcp.getAnswer(msg, &command);
f0f3f0b0 3313
fbfc1809
RG
3314 // If we are inside a chroot, we need to strip
3315 if (!arg()["chroot"].empty()) {
3316 size_t len = arg()["chroot"].length();
3317 remote = remote.substr(len);
3318 }
f0f3f0b0 3319
ab5c053d
BH
3320 s_rcc.send(answer, &remote);
3321 command();
3322 }
fbfc1809 3323 catch(const std::exception& e) {
e6a9dde5 3324 g_log<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
ab5c053d 3325 }
fbfc1809 3326 catch(const PDNSException& ae) {
e6a9dde5 3327 g_log<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
ab5c053d 3328 }
09e6702a
BH
3329}
3330
d187038c 3331static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3332{
0b18b22e 3333 PacketID* pident=any_cast<PacketID>(&var);
667f7e60 3334 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
09e6702a 3335
667f7e60 3336 shared_array<char> buffer(new char[pident->inNeeded]);
09e6702a 3337
a683e8bd 3338 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
09e6702a 3339 if(ret > 0) {
667f7e60 3340 pident->inMSG.append(&buffer[0], &buffer[ret]);
a683e8bd 3341 pident->inNeeded-=(size_t)ret;
825fa717 3342 if(!pident->inNeeded || pident->inIncompleteOkay) {
667f7e60
BH
3343 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
3344 PacketID pid=*pident;
3345 string msg=pident->inMSG;
3ddb9247 3346
bb4bdbaf 3347 t_fdm->removeReadFD(fd);
3ddb9247 3348 MT->sendEvent(pid, &msg);
09e6702a
BH
3349 }
3350 else {
667f7e60 3351 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
09e6702a
BH
3352 }
3353 }
3354 else {
667f7e60 3355 PacketID tmp=*pident;
bb4bdbaf 3356 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
09e6702a
BH
3357 string empty;
3358 MT->sendEvent(tmp, &empty); // this conveys error status
3359 }
3360}
3361
d187038c 3362static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3363{
0b18b22e 3364 PacketID* pid=any_cast<PacketID>(&var);
a683e8bd 3365 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
09e6702a 3366 if(ret > 0) {
a683e8bd 3367 pid->outPos+=(ssize_t)ret;
667f7e60
BH
3368 if(pid->outPos==pid->outMSG.size()) {
3369 PacketID tmp=*pid;
bb4bdbaf 3370 t_fdm->removeWriteFD(fd);
09e6702a
BH
3371 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
3372 }
3373 }
3374 else { // error or EOF
667f7e60 3375 PacketID tmp(*pid);
bb4bdbaf 3376 t_fdm->removeWriteFD(fd);
09e6702a 3377 string sent;
998a4334 3378 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
09e6702a
BH
3379 }
3380}
3381
34801ab1 3382// resend event to everybody chained onto it
d187038c 3383static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
34801ab1
BH
3384{
3385 if(iter->key.chain.empty())
3386 return;
e27e91a8 3387 // cerr<<"doResends called!\n";
34801ab1
BH
3388 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
3389 resend.fd=-1;
3390 resend.id=*i;
e27e91a8 3391 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
4665c31e 3392
34801ab1
BH
3393 MT->sendEvent(resend, &content);
3394 g_stats.chainResends++;
34801ab1
BH
3395 }
3396}
3397
d187038c 3398static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3399{
600fc20b 3400 PacketID pid=any_cast<PacketID>(var);
a683e8bd 3401 ssize_t len;
fae8fe07
RG
3402 std::string packet;
3403 packet.resize(g_outgoingEDNSBufsize);
996c89cc 3404 ComboAddress fromaddr;
09e6702a
BH
3405 socklen_t addrlen=sizeof(fromaddr);
3406
fae8fe07 3407 len=recvfrom(fd, &packet.at(0), packet.size(), 0, (sockaddr *)&fromaddr, &addrlen);
c1da7976 3408
a683e8bd 3409 if(len < (ssize_t) sizeof(dnsheader)) {
998a4334 3410 if(len < 0)
996c89cc 3411 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
09e6702a 3412 else {
3ddb9247 3413 g_stats.serverParseError++;
09e6702a 3414 if(g_logCommonErrors)
e6a9dde5 3415 g_log<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
e44d9fa7 3416 ": packet smaller than DNS header"<<endl;
998a4334 3417 }
34801ab1 3418
49a699c4 3419 t_udpclientsocks->returnSocket(fd);
34801ab1
BH
3420 string empty;
3421
3422 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3ddb9247 3423 if(iter != MT->d_waiters.end())
34801ab1 3424 doResends(iter, pid, empty);
3ddb9247 3425
34801ab1 3426 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
998a4334 3427 return;
3ddb9247 3428 }
998a4334 3429
fae8fe07 3430 packet.resize(len);
998a4334 3431 dnsheader dh;
fae8fe07 3432 memcpy(&dh, &packet.at(0), sizeof(dh));
3ddb9247 3433
6da3b3ad
PD
3434 PacketID pident;
3435 pident.remote=fromaddr;
3436 pident.id=dh.id;
3437 pident.fd=fd;
34801ab1 3438
33a928af 3439 if(!dh.qr && g_logCommonErrors) {
e6a9dde5 3440 g_log<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
6da3b3ad
PD
3441 }
3442
3443 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
3444 !dh.qr) { // one weird server
3445 pident.domain.clear();
3446 pident.type = 0;
3447 }
3448 else {
3449 try {
0b31e67e 3450 if(len > 12)
fae8fe07 3451 pident.domain=DNSName(&packet.at(0), len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
6da3b3ad
PD
3452 }
3453 catch(std::exception& e) {
3454 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
e6a9dde5 3455 g_log<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
6da3b3ad 3456 return;
34801ab1 3457 }
6da3b3ad 3458 }
34801ab1 3459
6da3b3ad
PD
3460 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
3461 if(iter != MT->d_waiters.end()) {
3462 doResends(iter, pident, packet);
3463 }
c1da7976 3464
6da3b3ad 3465retryWithName:
4957a608 3466
6da3b3ad 3467 if(!MT->sendEvent(pident, &packet)) {
9ec48f21
RG
3468 /* we did not find a match for this response, something is wrong */
3469
6da3b3ad
PD
3470 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
3471 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
3472 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
e325f20c 3473 pident.domain == mthread->key.domain) {
6da3b3ad 3474 mthread->key.nearMisses++;
998a4334 3475 }
6da3b3ad
PD
3476
3477 // be a bit paranoid here since we're weakening our matching
3ddb9247 3478 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
6da3b3ad
PD
3479 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
3480 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
3481 pident.domain = mthread->key.domain;
3482 pident.type = mthread->key.type;
3483 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
d4fb76e9 3484 }
09e6702a 3485 }
6da3b3ad
PD
3486 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
3487 if(g_logCommonErrors) {
e6a9dde5 3488 g_log<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
d8f6d49f 3489 }
09e6702a 3490 }
6da3b3ad 3491 else if(fd >= 0) {
9ec48f21 3492 /* we either found a waiter (1) or encountered an issue (-1), it's up to us to clean the socket anyway */
6da3b3ad
PD
3493 t_udpclientsocks->returnSocket(fd);
3494 }
09e6702a
BH
3495}
3496
1f4abb20
BH
3497FDMultiplexer* getMultiplexer()
3498{
3499 FDMultiplexer* ret;
f26bf547 3500 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
1f4abb20 3501 try {
f26bf547 3502 ret=i.second();
1f4abb20
BH
3503 return ret;
3504 }
98d0ee4a 3505 catch(FDMultiplexerException &fe) {
e6a9dde5 3506 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
98d0ee4a
BH
3507 }
3508 catch(...) {
e6a9dde5 3509 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
98d0ee4a 3510 }
1f4abb20 3511 }
e6a9dde5 3512 g_log<<Logger::Error<<"No working multiplexer found!"<<endl;
1f4abb20
BH
3513 exit(1);
3514}
3515
3ddb9247 3516
d187038c 3517static string* doReloadLuaScript()
4485aa35 3518{
674cf0f6 3519 string fname= ::arg()["lua-dns-script"];
4485aa35 3520 try {
674cf0f6 3521 if(fname.empty()) {
f26bf547 3522 t_pdl.reset();
377602e3 3523 g_log<<Logger::Info<<t_id<<" Unloaded current lua script"<<endl;
0f39c1a3 3524 return new string("unloaded\n");
4485aa35
BH
3525 }
3526 else {
9694e14f
AT
3527 t_pdl = std::make_shared<RecursorLua4>();
3528 t_pdl->loadFile(fname);
4485aa35
BH
3529 }
3530 }
fdbf35ac 3531 catch(std::exception& e) {
e6a9dde5 3532 g_log<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
0f39c1a3 3533 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
4485aa35 3534 }
3ddb9247 3535
e6a9dde5 3536 g_log<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
0f39c1a3 3537 return new string("(re)loaded '"+fname+"'\n");
4485aa35
BH
3538}
3539
49a699c4
BH
3540string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3541{
3ddb9247 3542 if(begin != end)
49a699c4 3543 ::arg().set("lua-dns-script") = *begin;
3ddb9247 3544
0f39c1a3 3545 return broadcastAccFunction<string>(doReloadLuaScript);
3ddb9247 3546}
49a699c4 3547
d187038c 3548static string* pleaseUseNewTraceRegex(const std::string& newRegex)
77499b05
BH
3549try
3550{
3551 if(newRegex.empty()) {
f26bf547 3552 t_traceRegex.reset();
77499b05
BH
3553 return new string("unset\n");
3554 }
3555 else {
f26bf547 3556 t_traceRegex = std::make_shared<Regex>(newRegex);
77499b05
BH
3557 return new string("ok\n");
3558 }
3559}
3f81d239 3560catch(PDNSException& ae)
77499b05
BH
3561{
3562 return new string(ae.reason+"\n");
3563}
3564
3565string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3566{
3567 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
3568}
3569
4e9a20e6 3570static void checkLinuxIPv6Limits()
3571{
3572#ifdef __linux__
3573 string line;
3574 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
335da0ba 3575 int lim=std::stoi(line);
4e9a20e6 3576 if(lim < 16384) {
e6a9dde5 3577 g_log<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
4e9a20e6 3578 }
3579 }
3580#endif
3581}
36849ff2 3582static void checkOrFixFDS()
4e9a20e6 3583{
c0063e60 3584 unsigned int availFDs=getFilenumLimit();
3585 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
3586
3587 if(wantFDs > availFDs) {
067ad20e 3588 unsigned int hardlimit= getFilenumLimit(true);
3589 if(hardlimit >= wantFDs) {
c0063e60 3590 setFilenumLimit(wantFDs);
e6a9dde5 3591 g_log<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
36849ff2 3592 }
3593 else {
067ad20e 3594 int newval = (hardlimit - 25) / g_numWorkerThreads;
e6a9dde5 3595 g_log<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
36849ff2 3596 g_maxMThreads = newval;
067ad20e 3597 setFilenumLimit(hardlimit);
36849ff2 3598 }
3599 }
4e9a20e6 3600}
77499b05 3601
c390b2da 3602static void* recursorThread(unsigned int tid, const string& threadName);
51e2144e 3603
f26bf547 3604static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
49a699c4
BH
3605{
3606 t_allowFrom = ng;
f26bf547 3607 return nullptr;
49a699c4
BH
3608}
3609
dbd23fc2
BH
3610int g_argc;
3611char** g_argv;
3612
18af64a8 3613void parseACLs()
f7c1d4e3 3614{
18af64a8 3615 static bool l_initialized;
3ddb9247 3616
49a699c4 3617 if(l_initialized) { // only reload configuration file on second call
18af64a8 3618 string configname=::arg()["config-dir"]+"/recursor.conf";
3e63da83
JR
3619 if(::arg()["config-name"]!="") {
3620 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3621 }
18af64a8 3622 cleanSlashes(configname);
3ddb9247
PD
3623
3624 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
7e818521 3625 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
49a699c4 3626 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
242b90e1 3627 ::arg().preParseFile(configname.c_str(), "include-dir");
829849d6
AT
3628 ::arg().preParse(g_argc, g_argv, "include-dir");
3629
3630 // then process includes
3631 std::vector<std::string> extraConfigs;
242b90e1
AT
3632 ::arg().gatherIncludes(extraConfigs);
3633
1dc8f4d0 3634 for(const std::string& fn : extraConfigs) {
7e818521 3635 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
3636 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
3637 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
3638 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
829849d6 3639 }
ca2c884c
AT
3640
3641 ::arg().preParse(g_argc, g_argv, "allow-from-file");
3642 ::arg().preParse(g_argc, g_argv, "allow-from");
f27e6356 3643 }
49a699c4 3644
f26bf547
RG
3645 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
3646 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
3ddb9247 3647
2c95fc65
BH
3648 if(!::arg()["allow-from-file"].empty()) {
3649 string line;
2c95fc65
BH
3650 ifstream ifs(::arg()["allow-from-file"].c_str());
3651 if(!ifs) {
9c61b9d0 3652 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2c95fc65
BH
3653 }
3654
3655 string::size_type pos;
3656 while(getline(ifs,line)) {
3657 pos=line.find('#');
3658 if(pos!=string::npos)
3659 line.resize(pos);
3660 trim(line);
3661 if(line.empty())
3662 continue;
3663
18af64a8 3664 allowFrom->addMask(line);
2c95fc65 3665 }
e6a9dde5 3666 g_log<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2c95fc65
BH
3667 }
3668 else if(!::arg()["allow-from"].empty()) {
f7c1d4e3
BH
3669 vector<string> ips;
3670 stringtok(ips, ::arg()["allow-from"], ", ");
3ddb9247 3671
e6a9dde5 3672 g_log<<Logger::Warning<<"Only allowing queries from: ";
f7c1d4e3 3673 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
18af64a8 3674 allowFrom->addMask(*i);
f7c1d4e3 3675 if(i!=ips.begin())
e6a9dde5
PL
3676 g_log<<Logger::Warning<<", ";
3677 g_log<<Logger::Warning<<*i;
f7c1d4e3 3678 }
e6a9dde5 3679 g_log<<Logger::Warning<<endl;
f7c1d4e3 3680 }
49a699c4 3681 else {
3ddb9247 3682 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
377602e3 3683 g_log<<Logger::Warning<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
f26bf547 3684 allowFrom = nullptr;
49a699c4 3685 }
3ddb9247 3686
49a699c4 3687 g_initialAllowFrom = allowFrom;
d7dae798 3688 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
f26bf547 3689 oldAllowFrom = nullptr;
3ddb9247 3690
49a699c4 3691 l_initialized = true;
18af64a8
BH
3692}
3693
795215f2 3694
756e82cf 3695static void setupDelegationOnly()
3696{
3697 vector<string> parts;
3698 stringtok(parts, ::arg()["delegation-only"], ", \t");
3699 for(const auto& p : parts) {
9065eb05 3700 SyncRes::addDelegationOnly(DNSName(p));
756e82cf 3701 }
3702}
795215f2 3703
8fd25133
RG
3704static std::map<unsigned int, std::set<int> > parseCPUMap()
3705{
3706 std::map<unsigned int, std::set<int> > result;
3707
3708 const std::string value = ::arg()["cpu-map"];
3709
3710 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
e6a9dde5 3711 g_log<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
8fd25133
RG
3712 return result;
3713 }
3714
3715 std::vector<std::string> parts;
3716
3717 stringtok(parts, value, " \t");
3718
3719 for(const auto& part : parts) {
3720 if (part.find('=') == string::npos)
3721 continue;
3722
3723 try {
3724 auto headers = splitField(part, '=');
3725 trim(headers.first);
3726 trim(headers.second);
3727
3728 unsigned int threadId = pdns_stou(headers.first);
3729 std::vector<std::string> cpus;
3730
3731 stringtok(cpus, headers.second, ",");
3732
3733 for(const auto& cpu : cpus) {
3734 int cpuId = std::stoi(cpu);
3735
3736 result[threadId].insert(cpuId);
3737 }
3738 }
3739 catch(const std::exception& e) {
e6a9dde5 3740 g_log<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
8fd25133
RG
3741 }
3742 }
3743
3744 return result;
3745}
3746
3747static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
3748{
3749 const auto& cpuMapping = cpusMap.find(n);
3750 if (cpuMapping != cpusMap.cend()) {
3751 int rc = mapThreadToCPUList(tid, cpuMapping->second);
3752 if (rc == 0) {
e6a9dde5 3753 g_log<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
8fd25133 3754 for (const auto cpu : cpuMapping->second) {
e6a9dde5 3755 g_log<<Logger::Info<<" "<<cpu;
8fd25133 3756 }
e6a9dde5 3757 g_log<<Logger::Info<<endl;
8fd25133
RG
3758 }
3759 else {
e6a9dde5 3760 g_log<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
8fd25133 3761 for (const auto cpu : cpuMapping->second) {
e6a9dde5 3762 g_log<<Logger::Info<<" "<<cpu;
8fd25133 3763 }
e6a9dde5 3764 g_log<<Logger::Info<<strerror(rc)<<endl;
8fd25133
RG
3765 }
3766 }
3767}
3768
af1377b7
NC
3769#ifdef NOD_ENABLED
3770static void setupNODThread()
3771{
3772 if (g_nodEnabled) {
b78727c6
NC
3773 uint32_t num_cells = ::arg().asNum("new-domain-db-size");
3774 t_nodDBp = std::make_shared<nod::NODDB>(num_cells);
af1377b7
NC
3775 try {
3776 t_nodDBp->setCacheDir(::arg()["new-domain-history-dir"]);
3777 }
3778 catch (const PDNSException& e) {
3779 g_log<<Logger::Error<<"new-domain-history-dir (" << ::arg()["new-domain-history-dir"] << ") is not readable or does not exist"<<endl;
3780 _exit(1);
3781 }
3782 if (!t_nodDBp->init()) {
3783 g_log<<Logger::Error<<"Could not initialize domain tracking"<<endl;
3784 _exit(1);
3785 }
41c542ec 3786 std::thread t(nod::NODDB::startHousekeepingThread, t_nodDBp, std::this_thread::get_id());
af1377b7 3787 t.detach();
ca2526f5 3788 g_nod_pbtag = ::arg()["new-domain-pb-tag"];
41c542ec
NC
3789 }
3790 if (g_udrEnabled) {
b78727c6
NC
3791 uint32_t num_cells = ::arg().asNum("unique-response-db-size");
3792 t_udrDBp = std::make_shared<nod::UniqueResponseDB>(num_cells);
41c542ec
NC
3793 try {
3794 t_udrDBp->setCacheDir(::arg()["unique-response-history-dir"]);
3795 }
3796 catch (const PDNSException& e) {
3797 g_log<<Logger::Error<<"unique-response-history-dir (" << ::arg()["unique-response-history-dir"] << ") is not readable or does not exist"<<endl;
3798 _exit(1);
3799 }
3800 if (!t_udrDBp->init()) {
3801 g_log<<Logger::Error<<"Could not initialize unique response tracking"<<endl;
3802 _exit(1);
3803 }
3804 std::thread t(nod::UniqueResponseDB::startHousekeepingThread, t_udrDBp, std::this_thread::get_id());
af1377b7 3805 t.detach();
ca2526f5 3806 g_udr_pbtag = ::arg()["unique-response-pb-tag"];
af1377b7
NC
3807 }
3808}
3809
3810void parseNODWhitelist(const std::string& wlist)
3811{
3812 vector<string> parts;
3813 stringtok(parts, wlist, ",; ");
3814 for(const auto& a : parts) {
3815 g_nodDomainWL.add(DNSName(a));
3816 }
3817}
3818
3819static void setupNODGlobal()
3820{
3821 // Setup NOD subsystem
3822 g_nodEnabled = ::arg().mustDo("new-domain-tracking");
3823 g_nodLookupDomain = DNSName(::arg()["new-domain-lookup"]);
3824 g_nodLog = ::arg().mustDo("new-domain-log");
3825 parseNODWhitelist(::arg()["new-domain-whitelist"]);
41c542ec
NC
3826
3827 // Setup Unique DNS Response subsystem
3828 g_udrEnabled = ::arg().mustDo("unique-response-tracking");
3829 g_udrLog = ::arg().mustDo("unique-response-log");
af1377b7
NC
3830}
3831#endif /* NOD_ENABLED */
3832
c6042a88 3833static void checkSocketDir(void)
0127f6bd
OM
3834{
3835 struct stat st;
3836 string dir(::arg()["socket-dir"]);
3837 string msg;
c6042a88 3838
0127f6bd
OM
3839 if (stat(dir.c_str(), &st) == -1) {
3840 msg = "it does not exist or cannot access";
3841 }
3842 else if (!S_ISDIR(st.st_mode)) {
3843 msg = "it is not a directory";
3844 }
3845 else if (access(dir.c_str(), R_OK | W_OK | X_OK) != 0) {
3846 msg = "cannot read, write or search";
3847 } else {
3848 return;
3849 }
3850 g_log << Logger::Error << "Problem with socket directory " << dir << ": " << msg << "; see https://docs.powerdns.com/recursor/upgrade.html#x-to-4-3-0-or-master" << endl;
3851 _exit(1);
3852}
3853
d187038c 3854static int serviceMain(int argc, char*argv[])
18af64a8 3855{
e6a9dde5
PL
3856 g_log.setName(s_programname);
3857 g_log.disableSyslog(::arg().mustDo("disable-syslog"));
3858 g_log.setTimestamps(::arg().mustDo("log-timestamp"));
18af64a8
BH
3859
3860 if(!::arg()["logging-facility"].empty()) {
f8499e52
BH
3861 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
3862 if(val >= 0)
e6a9dde5 3863 g_log.setFacility(val);
18af64a8 3864 else
e6a9dde5 3865 g_log<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
18af64a8
BH
3866 }
3867
ba1a571d 3868 showProductVersion();
3afde9b2 3869
06ea9015 3870 g_disthashseed=dns_random(0xffffffff);
3871
b7ef5828
PL
3872 checkLinuxIPv6Limits();
3873 try {
3874 vector<string> addrs;
3875 if(!::arg()["query-local-address6"].empty()) {
3876 SyncRes::s_doIPv6=true;
e6a9dde5 3877 g_log<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
b7ef5828
PL
3878
3879 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
3880 for(const string& addr : addrs) {
3881 g_localQueryAddresses6.push_back(ComboAddress(addr));
3882 }
3883 }
3884 else {
e6a9dde5 3885 g_log<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
b7ef5828
PL
3886 }
3887 addrs.clear();
3888 stringtok(addrs, ::arg()["query-local-address"], ", ;");
3889 for(const string& addr : addrs) {
3890 g_localQueryAddresses4.push_back(ComboAddress(addr));
3891 }
3892 }
3893 catch(std::exception& e) {
e6a9dde5 3894 g_log<<Logger::Error<<"Assigning local query addresses: "<<e.what();
b7ef5828
PL
3895 exit(99);
3896 }
3897
e48c6b8a
PL
3898 // keep this ABOVE loadRecursorLuaConfig!
3899 if(::arg()["dnssec"]=="off")
3900 g_dnssecmode=DNSSECMode::Off;
3901 else if(::arg()["dnssec"]=="process-no-validate")
3902 g_dnssecmode=DNSSECMode::ProcessNoValidate;
3903 else if(::arg()["dnssec"]=="process")
3904 g_dnssecmode=DNSSECMode::Process;
3905 else if(::arg()["dnssec"]=="validate")
3906 g_dnssecmode=DNSSECMode::ValidateAll;
3907 else if(::arg()["dnssec"]=="log-fail")
3908 g_dnssecmode=DNSSECMode::ValidateForLog;
3909 else {
e6a9dde5 3910 g_log<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
e48c6b8a
PL
3911 exit(1);
3912 }
3913
9a3ab3e4
KM
3914 g_signatureInceptionSkew = ::arg().asNum("signature-inception-skew");
3915 if (g_signatureInceptionSkew < 0) {
3916 g_log<<Logger::Error<<"A negative value for 'signature-inception-skew' is not allowed"<<endl;
3917 exit(1);
3918 }
3919
e48c6b8a 3920 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
d377bb54 3921 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
e48c6b8a 3922
a6f7f5fe 3923 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
3924 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
e6ec15bf
RG
3925
3926 luaConfigDelayedThreads delayedLuaThreads;
0f5785a6 3927 try {
e6ec15bf 3928 loadRecursorLuaConfig(::arg()["lua-config-file"], delayedLuaThreads);
0f5785a6
PL
3929 }
3930 catch (PDNSException &e) {
e6a9dde5 3931 g_log<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
0f5785a6
PL
3932 exit(1);
3933 }
ad42489c 3934
18af64a8 3935 parseACLs();
d6f3fcfa 3936 initPublicSuffixList(::arg()["public-suffix-list-file"]);
92011b8f 3937
eb5bae86 3938 if(!::arg()["dont-query"].empty()) {
eb5bae86
BH
3939 vector<string> ips;
3940 stringtok(ips, ::arg()["dont-query"], ", ");
66e0b6ea
BH
3941 ips.push_back("0.0.0.0");
3942 ips.push_back("::");
c36bc97a 3943
e6a9dde5 3944 g_log<<Logger::Warning<<"Will not send queries to: ";
eb5bae86 3945 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
9065eb05 3946 SyncRes::addDontQuery(*i);
eb5bae86 3947 if(i!=ips.begin())
e6a9dde5
PL
3948 g_log<<Logger::Warning<<", ";
3949 g_log<<Logger::Warning<<*i;
eb5bae86 3950 }
e6a9dde5 3951 g_log<<Logger::Warning<<endl;
eb5bae86
BH
3952 }
3953
f7c1d4e3 3954 g_quiet=::arg().mustDo("quiet");
3ddb9247 3955
b243ca3b 3956 /* this needs to be done before parseACLs(), which call broadcastFunction() */
1bc3c142
BH
3957 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
3958 if(g_weDistributeQueries) {
b243ca3b 3959 g_log<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
1bc3c142 3960 }
3ddb9247 3961
756e82cf 3962 setupDelegationOnly();
b33c2462 3963 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
756e82cf 3964
77499b05
BH
3965 if(::arg()["trace"]=="fail") {
3966 SyncRes::setDefaultLogMode(SyncRes::Store);
3967 }
3968 else if(::arg().mustDo("trace")) {
3969 SyncRes::setDefaultLogMode(SyncRes::Log);
f7c1d4e3
BH
3970 ::arg().set("quiet")="no";
3971 g_quiet=false;
3e9c6c0a 3972 g_dnssecLOG=true;
f7c1d4e3 3973 }
43a9b290
PL
3974 string myHostname = getHostname();
3975 if (myHostname == "UNKNOWN"){
3976 g_log<<Logger::Warning<<"Unable to get the hostname, NSID and id.server values will be empty"<<endl;
3977 myHostname = "";
d0983bff 3978 }
3ddb9247 3979
aadceba8 3980 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
5cf4b2e7 3981 SyncRes::s_minimumECSTTL = ::arg().asNum("ecs-minimum-ttl-override");
aadceba8 3982
1051f8a9
BH
3983 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
3984
f7c1d4e3 3985 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
b9473937 3986 SyncRes::s_maxbogusttl=::arg().asNum("max-cache-bogus-ttl");
63637fd8 3987 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
1051f8a9 3988 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
79ec0627
PL
3989 // Cap the packetcache-servfail-ttl to the packetcache-ttl
3990 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
3991 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
628e2c7b
PA
3992 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
3993 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
f7c1d4e3 3994 SyncRes::s_serverID=::arg()["server-id"];
173d790e 3995 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
9de3e034 3996 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
7c3398aa 3997 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
01402d56 3998 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
f7c1d4e3 3999 if(SyncRes::s_serverID.empty()) {
d0983bff 4000 SyncRes::s_serverID = myHostname;
f7c1d4e3 4001 }
3ddb9247 4002
e9f9b8ec
RG
4003 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
4004 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
c9783016 4005 SyncRes::clearECSStats();
fd8898fb 4006 SyncRes::s_ecsipv4cachelimit = ::arg().asNum("ecs-ipv4-cache-bits");
4007 SyncRes::s_ecsipv6cachelimit = ::arg().asNum("ecs-ipv6-cache-bits");
ed9019c9 4008 SyncRes::s_ecscachelimitttl = ::arg().asNum("ecs-cache-limit-ttl");
e9f9b8ec 4009
116d1288 4010 SyncRes::s_qnameminimization = ::arg().mustDo("qname-minimization");
d40a915b 4011
409b8398
OM
4012 if (SyncRes::s_qnameminimization) {
4013 // With an empty cache, a rev ipv6 query with dnssec enabled takes
4014 // almost 100 queries. Default maxqperq is 60.
13c43bdd 4015 SyncRes::s_maxqperq = std::max(SyncRes::s_maxqperq, static_cast<unsigned int>(100));
409b8398
OM
4016 }
4017
d40a915b
OM
4018 SyncRes::s_hardenNXD = SyncRes::HardenNXD::DNSSEC;
4019 string value = ::arg()["nothing-below-nxdomain"];
4020 if (value == "yes") {
4021 SyncRes::s_hardenNXD = SyncRes::HardenNXD::Yes;
4022 } else if (value == "no") {
4023 SyncRes::s_hardenNXD = SyncRes::HardenNXD::No;
4024 } else if (value != "dnssec") {
4025 g_log << Logger::Error << "Unknown nothing-below-nxdomain mode: " << value << endl;
4026 exit(1);
4027 }
116d1288 4028
8a3a3822
RG
4029 if (!::arg().isEmpty("ecs-scope-zero-address")) {
4030 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
4031 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
4032 }
4033 else {
4034 bool found = false;
4035 for (const auto& addr : g_localQueryAddresses4) {
4036 if (!IsAnyAddress(addr)) {
4037 SyncRes::setECSScopeZeroAddress(Netmask(addr, 32));
4038 found = true;
4039 break;
4040 }
4041 }
4042 if (!found) {
4043 for (const auto& addr : g_localQueryAddresses6) {
4044 if (!IsAnyAddress(addr)) {
4045 SyncRes::setECSScopeZeroAddress(Netmask(addr, 128));
4046 found = true;
4047 break;
4048 }
4049 }
4050 if (!found) {
4051 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
4052 }
4053 }
4054 }
4055
2fe3354d
CH
4056 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
4057 SyncRes::parseEDNSSubnetAddFor(::arg()["ecs-add-for"]);
4058 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
4059
5cc8371b 4060 g_XPFAcl.toMasks(::arg()["xpf-allow-from"]);
59cb4a79 4061 g_xpfRRCode = ::arg().asNum("xpf-rr-code");
5cc8371b 4062
5b0ddd18 4063 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
bb4bdbaf 4064
49a699c4 4065 g_initialDomainMap = parseAuthAndForwards();
3ddb9247 4066
08f3f638 4067 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3ddb9247 4068
f7c1d4e3 4069 g_logCommonErrors=::arg().mustDo("log-common-errors");
98d36505 4070 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
e661a20b
PD
4071
4072 g_anyToTcp = ::arg().mustDo("any-to-tcp");
a09a8ce0
PD
4073 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
4074
b3adda56
PD
4075 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
4076
b243ca3b 4077 g_numDistributorThreads = ::arg().asNum("distributor-threads");
810ff705 4078 g_numWorkerThreads = ::arg().asNum("threads");
1c4f2e1b 4079 if (g_numWorkerThreads < 1) {
e6a9dde5 4080 g_log<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
1c4f2e1b
RG
4081 g_numWorkerThreads = 1;
4082 }
4083
b243ca3b 4084 g_numThreads = g_numDistributorThreads + g_numWorkerThreads;
810ff705
RG
4085 g_maxMThreads = ::arg().asNum("max-mthreads");
4086
c51c551e
OM
4087
4088 int64_t maxInFlight = ::arg().asNum("max-concurrent-requests-per-tcp-connection");
4089 if (maxInFlight < 1 || maxInFlight > USHRT_MAX || maxInFlight >= g_maxMThreads) {
4090 g_log<<Logger::Warning<<"Asked to run with illegal max-concurrent-requests-per-tcp-connection, setting to default (10)"<<endl;
4091 TCPConnection::s_maxInFlight = 10;
4092 } else {
4093 TCPConnection::s_maxInFlight = maxInFlight;
4094 }
4095
4096
00b8cadc
RG
4097 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
4098
0ec489bf 4099 g_statisticsInterval = ::arg().asNum("statistics-interval");
4100
559b6c93
PL
4101 {
4102 SuffixMatchNode dontThrottleNames;
4103 vector<string> parts;
52858314 4104 stringtok(parts, ::arg()["dont-throttle-names"], " ,");
559b6c93
PL
4105 for (const auto &p : parts) {
4106 dontThrottleNames.add(DNSName(p));
4107 }
d514bd03 4108 g_dontThrottleNames.setState(std::move(dontThrottleNames));
559b6c93
PL
4109
4110 NetmaskGroup dontThrottleNetmasks;
52858314 4111 stringtok(parts, ::arg()["dont-throttle-netmasks"], " ,");
559b6c93
PL
4112 for (const auto &p : parts) {
4113 dontThrottleNetmasks.addMask(Netmask(p));
4114 }
d514bd03 4115 g_dontThrottleNetmasks.setState(std::move(dontThrottleNetmasks));
559b6c93
PL
4116 }
4117
144040be 4118 s_balancingFactor = ::arg().asDouble("distribution-load-factor");
078be17f
RG
4119 if (s_balancingFactor != 0.0 && s_balancingFactor < 1.0) {
4120 s_balancingFactor = 0.0;
4121 g_log<<Logger::Warning<<"Asked to run with a distribution-load-factor below 1.0, disabling it instead"<<endl;
4122 }
144040be 4123
810ff705
RG
4124#ifdef SO_REUSEPORT
4125 g_reusePort = ::arg().mustDo("reuseport");
4126#endif
4127
b243ca3b 4128 s_threadInfos.resize(g_numDistributorThreads + g_numWorkerThreads + /* handler */ 1);
810ff705 4129
b243ca3b
RG
4130 if (g_reusePort) {
4131 if (g_weDistributeQueries) {
4132 /* first thread is the handler, then distributors */
4133 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
4134 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
adb6cd72 4135 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
b243ca3b 4136 makeUDPServerSockets(deferredAdds);
adb6cd72 4137 makeTCPServerSockets(deferredAdds, tcpSockets);
b243ca3b
RG
4138 }
4139 }
4140 else {
4141 /* first thread is the handler, there is no distributor here and workers are accepting queries */
4142 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
4143 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
adb6cd72 4144 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
b243ca3b 4145 makeUDPServerSockets(deferredAdds);
adb6cd72 4146 makeTCPServerSockets(deferredAdds, tcpSockets);
b243ca3b 4147 }
810ff705
RG
4148 }
4149 }
4150 else {
c47f201b 4151 std::set<int> tcpSockets;
b243ca3b
RG
4152 /* we don't have reuseport so we can only open one socket per
4153 listening addr:port and everyone will listen on it */
4154 makeUDPServerSockets(g_deferredAdds);
c47f201b
RG
4155 makeTCPServerSockets(g_deferredAdds, tcpSockets);
4156
4157 /* every listener (so distributor if g_weDistributeQueries, workers otherwise)
4158 needs to listen to the shared sockets */
4159 if (g_weDistributeQueries) {
4160 /* first thread is the handler, then distributors */
4161 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
4162 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
4163 }
4164 }
4165 else {
4166 /* first thread is the handler, there is no distributor here and workers are accepting queries */
4167 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
4168 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
4169 }
4170 }
810ff705 4171 }
815099b2 4172
af1377b7
NC
4173#ifdef NOD_ENABLED
4174 // Setup newly observed domain globals
4175 setupNODGlobal();
4176#endif /* NOD_ENABLED */
4177
677e2a46
BH
4178 int forks;
4179 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
1bc3c142
BH
4180 if(!fork()) // we are child
4181 break;
4182 }
3ddb9247 4183
f7c1d4e3 4184 if(::arg().mustDo("daemon")) {
e6a9dde5
PL
4185 g_log<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
4186 g_log.toConsole(Logger::Critical);
f7c1d4e3
BH
4187 daemonize();
4188 }
cd180a71 4189 if(Utility::getpid() == 1) {
807db6c8
FL
4190 /* We are running as pid 1, register sigterm and sigint handler
4191
4192 The Linux kernel will handle SIGTERM and SIGINT for all processes, except PID 1.
4193 It assumes that the processes running as pid 1 is an "init" like system.
4194 For years, this was a safe assumption, but containers change that: in
4195 most (all?) container implementations, the application itself is running
4196 as pid 1. This means that sending signals to those applications, will not
dda61e20
FL
4197 be handled by default. Results might be "your container not responsing
4198 when asking it to stop", or "ctrl-c not working even when the app is
4199 running in the foreground inside a container".
807db6c8
FL
4200
4201 So TL;DR: If we're running pid 1 (container), we should handle SIGTERM and SIGINT ourselves */
4202
cd180a71
FL
4203 signal(SIGTERM,termIntHandler);
4204 signal(SIGINT,termIntHandler);
4205 }
4206
f7c1d4e3
BH
4207 signal(SIGUSR1,usr1Handler);
4208 signal(SIGUSR2,usr2Handler);
4209 signal(SIGPIPE,SIG_IGN);
810ff705 4210
a6414fdc 4211 checkOrFixFDS();
3ddb9247 4212
d1b28475
KM
4213#ifdef HAVE_LIBSODIUM
4214 if (sodium_init() == -1) {
e6a9dde5 4215 g_log<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
d1b28475
KM
4216 exit(99);
4217 }
4218#endif
4219
3afde9b2
PL
4220 openssl_thread_setup();
4221 openssl_seed();
e97cb679
AT
4222 /* setup rng before chroot */
4223 dns_random_init();
3afde9b2 4224
bdbb07e0 4225 if(::arg()["server-id"].empty()) {
d0983bff 4226 ::arg().set("server-id") = myHostname;
bdbb07e0
PL
4227 }
4228
138435cb
BH
4229 int newgid=0;
4230 if(!::arg()["setgid"].empty())
2211dac9 4231 newgid = strToGID(::arg()["setgid"]);
138435cb
BH
4232 int newuid=0;
4233 if(!::arg()["setuid"].empty())
2211dac9 4234 newuid = strToUID(::arg()["setuid"]);
138435cb 4235
f1d6a7ce
KM
4236 Utility::dropGroupPrivs(newuid, newgid);
4237
138435cb 4238 if (!::arg()["chroot"].empty()) {
75336810
PL
4239#ifdef HAVE_SYSTEMD
4240 char *ns;
4241 ns = getenv("NOTIFY_SOCKET");
4242 if (ns != nullptr) {
e6a9dde5 4243 g_log<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
75336810
PL
4244 exit(1);
4245 }
4246#endif
138435cb 4247 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
a2a81d42
OM
4248 int err = errno;
4249 g_log<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (err)<<", exiting"<<endl;
4250 exit(1);
138435cb 4251 }
f0f3f0b0 4252 else
377602e3 4253 g_log<<Logger::Info<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
138435cb
BH
4254 }
4255
c6042a88
OM
4256 checkSocketDir();
4257
f0f3f0b0
PL
4258 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
4259 if(!s_pidfname.empty())
4260 unlink(s_pidfname.c_str()); // remove possible old pid file
4261 writePid();
4262
4263 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
4264
f1d6a7ce 4265 Utility::dropUserPrivs(newuid);
1f2b341e
RG
4266 try {
4267 /* we might still have capabilities remaining, for example if we have been started as root
4268 without --setuid (please don't do that) or as an unprivileged user with ambient capabilities
4269 like CAP_NET_BIND_SERVICE.
4270 */
4271 dropCapabilities();
4272 }
4273 catch(const std::exception& e) {
4274 g_log<<Logger::Warning<<e.what()<<endl;
4275 }
c0063e60 4276
e6ec15bf
RG
4277 startLuaConfigDelayedThreads(delayedLuaThreads, g_luaconfs.getCopy().generation);
4278
49a699c4 4279 makeThreadPipes();
3ddb9247 4280
5d4dd7fe
BH
4281 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
4282 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
fde296a3 4283 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
a5886e6a 4284 s_maxUDPQueriesPerRound=::arg().asNum("max-udp-queries-per-round");
343257a4 4285
c29d820c
RG
4286 g_useKernelTimestamp = ::arg().mustDo("protobuf-use-kernel-timestamp");
4287
563517f3
RG
4288 blacklistStats(StatComponent::API, ::arg()["stats-api-blacklist"]);
4289 blacklistStats(StatComponent::Carbon, ::arg()["stats-carbon-blacklist"]);
4290 blacklistStats(StatComponent::RecControl, ::arg()["stats-rec-control-blacklist"]);
4291 blacklistStats(StatComponent::SNMP, ::arg()["stats-snmp-blacklist"]);
72259676 4292
d705aad9
RG
4293 if (::arg().mustDo("snmp-agent")) {
4294 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
4295 g_snmpAgent->run();
4296 }
4297
b47026fd 4298 int port = ::arg().asNum("udp-source-port-min");
58da9034 4299 if(port < 1024 || port > 65535){
e6a9dde5 4300 g_log<<Logger::Error<<"Unable to launch, udp-source-port-min is not a valid port number"<<endl;
bf6f28ca
CHB
4301 exit(99); // this isn't going to fix itself either
4302 }
4303 s_minUdpSourcePort = port;
b47026fd 4304 port = ::arg().asNum("udp-source-port-max");
58da9034 4305 if(port < 1024 || port > 65535 || port < s_minUdpSourcePort){
e6a9dde5 4306 g_log<<Logger::Error<<"Unable to launch, udp-source-port-max is not a valid port number or is smaller than udp-source-port-min"<<endl;
bf6f28ca
CHB
4307 exit(99); // this isn't going to fix itself either
4308 }
4309 s_maxUdpSourcePort = port;
4310 std::vector<string> parts {};
b47026fd 4311 stringtok(parts, ::arg()["udp-source-port-avoid"], ", ");
bf6f28ca
CHB
4312 for (const auto &part : parts)
4313 {
4314 port = std::stoi(part);
58da9034 4315 if(port < 1024 || port > 65535){
e6a9dde5 4316 g_log<<Logger::Error<<"Unable to launch, udp-source-port-avoid contains an invalid port number: "<<part<<endl;
bf6f28ca
CHB
4317 exit(99); // this isn't going to fix itself either
4318 }
4319 s_avoidUdpSourcePorts.insert(port);
4320 }
4321
b243ca3b 4322 unsigned int currentThreadId = 1;
8fd25133 4323 const auto cpusMap = parseCPUMap();
d77abca1 4324
c3828c03 4325 if(g_numThreads == 1) {
e6a9dde5 4326 g_log<<Logger::Warning<<"Operating unthreaded"<<endl;
6b6720de
PL
4327#ifdef HAVE_SYSTEMD
4328 sd_notify(0, "READY=1");
4329#endif
b243ca3b
RG
4330
4331 /* This thread handles the web server, carbon, statistics and the control channel */
4332 auto& handlerInfos = s_threadInfos.at(0);
4333 handlerInfos.isHandler = true;
c390b2da 4334 handlerInfos.thread = std::thread(recursorThread, 0, "main");
b243ca3b
RG
4335
4336 setCPUMap(cpusMap, currentThreadId, pthread_self());
4337
4338 auto& infos = s_threadInfos.at(currentThreadId);
4339 infos.isListener = true;
4340 infos.isWorker = true;
c390b2da 4341 recursorThread(currentThreadId++, "worker");
8657c2af
OM
4342
4343 handlerInfos.thread.join();
76698c6e
BH
4344 }
4345 else {
8fd25133 4346
ef31b090
OM
4347
4348 if (g_weDistributeQueries) {
4349 for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
4350 auto& infos = s_threadInfos.at(currentThreadId + n);
4351 infos.isListener = true;
4352 }
4353 }
4354 for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
4355 auto& infos = s_threadInfos.at(currentThreadId + (g_weDistributeQueries ? g_numDistributorThreads : 0) + n);
4356 infos.isListener = !g_weDistributeQueries;
4357 infos.isWorker = true;
4358 }
4359
b243ca3b
RG
4360 if (g_weDistributeQueries) {
4361 g_log<<Logger::Warning<<"Launching "<< g_numDistributorThreads <<" distributor threads"<<endl;
4362 for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
4363 auto& infos = s_threadInfos.at(currentThreadId);
c390b2da 4364 infos.thread = std::thread(recursorThread, currentThreadId++, "distr");
b243ca3b
RG
4365 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
4366 }
4367 }
8fd25133 4368
62b549e0
RG
4369 g_log<<Logger::Warning<<"Launching "<< g_numWorkerThreads <<" worker threads"<<endl;
4370
b243ca3b
RG
4371 for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
4372 auto& infos = s_threadInfos.at(currentThreadId);
c390b2da 4373 infos.thread = std::thread(recursorThread, currentThreadId++, "worker");
b243ca3b 4374 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
76698c6e 4375 }
b243ca3b 4376
6b6720de
PL
4377#ifdef HAVE_SYSTEMD
4378 sd_notify(0, "READY=1");
4379#endif
b243ca3b
RG
4380
4381 /* This thread handles the web server, carbon, statistics and the control channel */
4382 auto& infos = s_threadInfos.at(0);
4383 infos.isHandler = true;
c390b2da 4384 infos.thread = std::thread(recursorThread, 0, "web+stat");
b243ca3b 4385
8657c2af
OM
4386 for (auto & ti : s_threadInfos) {
4387 ti.thread.join();
4388 }
bb4bdbaf 4389 }
da966ae0
OM
4390
4391#ifdef HAVE_PROTOBUF
4392 google::protobuf::ShutdownProtobufLibrary();
4393#endif /* HAVE_PROTOBUF */
bb4bdbaf
BH
4394 return 0;
4395}
4396
c390b2da 4397static void* recursorThread(unsigned int n, const string& threadName)
bb4bdbaf
BH
4398try
4399{
d77abca1 4400 t_id=n;
b243ca3b 4401 auto& threadInfo = s_threadInfos.at(t_id);
c390b2da
PL
4402
4403 static string threadPrefix = "pdns-r/";
519f5484 4404 setThreadName(threadPrefix + threadName);
c390b2da 4405
49a699c4 4406 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
a712cb56 4407 SyncRes::setDomainMap(g_initialDomainMap);
49a699c4 4408 t_allowFrom = g_initialAllowFrom;
f26bf547
RG
4409 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
4410 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
49a699c4 4411 primeHints();
3ddb9247 4412
f26bf547 4413 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
3ddb9247 4414
e6a9dde5 4415 g_log<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3ddb9247 4416
af1377b7 4417#ifdef NOD_ENABLED
41c542ec
NC
4418 if (threadInfo.isWorker)
4419 setupNODThread();
af1377b7 4420#endif /* NOD_ENABLED */
c1751a59
RG
4421
4422 /* the listener threads handle TCP queries */
4423 if(threadInfo.isWorker || threadInfo.isListener) {
5b388d28
PD
4424 try {
4425 if(!::arg()["lua-dns-script"].empty()) {
4426 t_pdl = std::make_shared<RecursorLua4>();
4427 t_pdl->loadFile(::arg()["lua-dns-script"]);
4428 g_log<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
4429 }
4430 }
4431 catch(std::exception &e) {
4432 g_log<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
4433 _exit(99);
674cf0f6 4434 }
674cf0f6 4435 }
3ddb9247 4436
f8f243b0 4437 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
92011b8f 4438 if(ringsize) {
f26bf547 4439 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
b243ca3b
RG
4440 if(g_weDistributeQueries)
4441 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries") / g_numDistributorThreads);
f8f243b0 4442 else
3ddb9247 4443 t_remotes->set_capacity(ringsize);
f26bf547 4444 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 4445 t_servfailremotes->set_capacity(ringsize);
66f2e6ad
KM
4446 t_bogusremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4447 t_bogusremotes->set_capacity(ringsize);
f26bf547 4448 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 4449 t_largeanswerremotes->set_capacity(ringsize);
621ccf89 4450 t_timeouts = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4451 t_timeouts->set_capacity(ringsize);
92011b8f 4452
f26bf547 4453 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 4454 t_queryring->set_capacity(ringsize);
f26bf547 4455 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 4456 t_servfailqueryring->set_capacity(ringsize);
66f2e6ad
KM
4457 t_bogusqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4458 t_bogusqueryring->set_capacity(ringsize);
92011b8f 4459 }
3ddb9247 4460
f26bf547 4461 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
144040be 4462 threadInfo.mt = MT.get();
3ddb9247 4463
63341e8d
RG
4464#ifdef HAVE_PROTOBUF
4465 /* start protobuf export threads if needed */
4466 auto luaconfsLocal = g_luaconfs.getLocal();
4467 checkProtobufExport(luaconfsLocal);
4468 checkOutgoingProtobufExport(luaconfsLocal);
4469#endif /* HAVE_PROTOBUF */
b9fa43e0
OM
4470#ifdef HAVE_FSTRM
4471 checkFrameStreamExport(luaconfsLocal);
4472#endif
63341e8d 4473
bb4bdbaf
BH
4474 PacketID pident;
4475
4476 t_fdm=getMultiplexer();
d77abca1 4477
da966ae0
OM
4478 RecursorWebServer *rws = nullptr;
4479
b243ca3b 4480 if(threadInfo.isHandler) {
d07bf7ff 4481 if(::arg().mustDo("webserver")) {
e6a9dde5 4482 g_log<<Logger::Warning << "Enabling web server" << endl;
8989097d 4483 try {
da966ae0 4484 rws = new RecursorWebServer(t_fdm);
8989097d
CH
4485 }
4486 catch(PDNSException &e) {
e6a9dde5 4487 g_log<<Logger::Error<<"Exception: "<<e.reason<<endl;
8989097d
CH
4488 exit(99);
4489 }
f3d1d67b 4490 }
377602e3 4491 g_log<<Logger::Info<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
f3d1d67b 4492 }
810ff705 4493 else {
d77abca1 4494
b243ca3b
RG
4495 t_fdm->addReadFD(threadInfo.pipes.readToThread, handlePipeRequest);
4496 t_fdm->addReadFD(threadInfo.pipes.readQueriesToThread, handlePipeRequest);
4497
4498 if (threadInfo.isListener) {
4499 if (g_reusePort) {
4500 /* then every listener has its own FDs */
4501 for(const auto deferred : threadInfo.deferredAdds) {
4502 t_fdm->addReadFD(deferred.first, deferred.second);
4503 }
810ff705 4504 }
b243ca3b
RG
4505 else {
4506 /* otherwise all listeners are listening on the same ones */
4507 for(const auto deferred : g_deferredAdds) {
4508 t_fdm->addReadFD(deferred.first, deferred.second);
d77abca1
RG
4509 }
4510 }
4511 }
810ff705 4512 }
3ddb9247 4513
b0b37121 4514 registerAllStats();
d77abca1 4515
b243ca3b 4516 if(threadInfo.isHandler) {
674cf0f6
BH
4517 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
4518 }
1bc3c142 4519
f7c1d4e3 4520 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3ddb9247 4521
f7c1d4e3 4522 bool listenOnTCP(true);
49a699c4 4523
cb1523d1 4524 time_t last_stat = 0;
a2f87dd1 4525 time_t last_carbon=0, last_lua_maintenance=0;
2c78bd57 4526 time_t carbonInterval=::arg().asNum("carbon-interval");
a2f87dd1 4527 time_t luaMaintenanceInterval=::arg().asNum("lua-maintenance-interval");
ac0995bb 4528 counter.store(0); // used to periodically execute certain tasks
8657c2af
OM
4529
4530 while (!RecursorControlChannel::stop) {
ac0e821b 4531 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3ddb9247 4532
3427fa8a
BH
4533 if(!(counter%500)) {
4534 MT->makeThread(houseKeeping, 0);
f7c1d4e3
BH
4535 }
4536
d2392145 4537 if(!(counter%55)) {
d8f6d49f 4538 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
bb4bdbaf 4539 expired_t expired=t_fdm->getTimeouts(g_now);
3ddb9247 4540
f7c1d4e3 4541 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
cd989c87 4542 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4957a608 4543 if(g_logCommonErrors)
e6a9dde5 4544 g_log<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4957a608 4545 t_fdm->removeReadFD(i->first);
f7c1d4e3
BH
4546 }
4547 }
3ddb9247 4548
f7c1d4e3
BH
4549 counter++;
4550
b243ca3b 4551 if(threadInfo.isHandler) {
cb1523d1
RG
4552 if(statsWanted || (g_statisticsInterval > 0 && (g_now.tv_sec - last_stat) >= g_statisticsInterval)) {
4553 doStats();
4554 last_stat = g_now.tv_sec;
4555 }
f7c1d4e3 4556
cb1523d1 4557 Utility::gettimeofday(&g_now, 0);
2c78bd57 4558
cb1523d1
RG
4559 if((g_now.tv_sec - last_carbon) >= carbonInterval) {
4560 MT->makeThread(doCarbonDump, 0);
4561 last_carbon = g_now.tv_sec;
4562 }
2c78bd57 4563 }
2a0276a9 4564 if (t_pdl != nullptr) {
9adbe790 4565 // lua-dns-script directive is present, call the maintenance callback if needed
c1751a59
RG
4566 /* remember that the listener threads handle TCP queries */
4567 if (threadInfo.isWorker || threadInfo.isListener) {
2a0276a9
CHB
4568 // Only on threads processing queries
4569 if(g_now.tv_sec - last_lua_maintenance >= luaMaintenanceInterval) {
4570 t_pdl->maintenance();
4571 last_lua_maintenance = g_now.tv_sec;
4572 }
9adbe790 4573 }
a2f87dd1 4574 }
2c78bd57 4575
bb4bdbaf 4576 t_fdm->run(&g_now);
3ea54bf0 4577 // 'run' updates g_now for us
f7c1d4e3 4578
b243ca3b 4579 if(threadInfo.isListener) {
5c889cf5 4580 if(listenOnTCP) {
c47f201b
RG
4581 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
4582 for(const auto fd : threadInfo.tcpSockets) {
4583 t_fdm->removeReadFD(fd);
b243ca3b 4584 }
c47f201b
RG
4585 listenOnTCP=false;
4586 }
f7c1d4e3 4587 }
5c889cf5 4588 else {
c47f201b
RG
4589 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
4590 for(const auto fd : threadInfo.tcpSockets) {
4591 t_fdm->addReadFD(fd, handleNewTCPQuestion);
b243ca3b 4592 }
c47f201b
RG
4593 listenOnTCP=true;
4594 }
f7c1d4e3
BH
4595 }
4596 }
4597 }
da966ae0
OM
4598 delete rws;
4599 delete t_fdm;
8657c2af 4600 return 0;
f7c1d4e3 4601}
3f81d239 4602catch(PDNSException &ae) {
e6a9dde5 4603 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
bb4bdbaf
BH
4604 return 0;
4605}
4606catch(std::exception &e) {
e6a9dde5 4607 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
bb4bdbaf
BH
4608 return 0;
4609}
4610catch(...) {
e6a9dde5 4611 g_log<<Logger::Error<<"any other exception in main: "<<endl;
bb4bdbaf
BH
4612 return 0;
4613}
4614
51e2144e 4615
3ddb9247 4616int main(int argc, char **argv)
288f4aa9 4617{
dbd23fc2
BH
4618 g_argc = argc;
4619 g_argv = argv;
5e3de507 4620 g_stats.startupTime=time(0);
b51ef4f9 4621 Utility::srandom();
3e135495 4622 versionSetProduct(ProductRecursor);
8a63d3ce 4623 reportBasicTypes();
0007c2e5 4624 reportOtherTypes();
ea634573 4625
22030c37 4626 int ret = EXIT_SUCCESS;
caa6eefa 4627
288f4aa9 4628 try {
f888311c 4629 ::arg().set("stack-size","stack size per mthread")="200000";
2e3d8a19 4630 ::arg().set("soa-minimum-ttl","Don't change")="0";
2e3d8a19 4631 ::arg().set("no-shuffle","Don't change")="off";
2e3d8a19 4632 ::arg().set("local-port","port to listen on")="53";
32252594 4633 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
fec7dd5a 4634 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
77499b05 4635 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
a6415142 4636 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
c87e1876 4637 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
13c46e62 4638 ::arg().set("signature-inception-skew", "Allow the signature inception to be off by this number of seconds")="60";
d3f809bf 4639 ::arg().set("daemon","Operate as a daemon")="no";
191f2e47 4640 ::arg().setSwitch("write-pid","Write a PID file")="yes";
9afa8662 4641 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
b6cfa948 4642 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
b18fa400 4643 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
22e0810c 4644 ::arg().set("log-common-errors","If we should log rather common errors")="no";
2e3d8a19 4645 ::arg().set("chroot","switch to chroot jail")="";
fe9e7b8d
PL
4646 ::arg().set("setgid","If set, change group id to this gid for more security"
4647#ifdef HAVE_SYSTEMD
4648#define SYSTEMD_SETID_MSG ". When running inside systemd, use the User and Group settings in the unit-file!"
4649 SYSTEMD_SETID_MSG
4650#endif
4651 )="";
4652 ::arg().set("setuid","If set, change user id to this uid for more security"
4653#ifdef HAVE_SYSTEMD
4654 SYSTEMD_SETID_MSG
4655#endif
4656 )="";
c83ee49d 4657 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
bb4bdbaf 4658 ::arg().set("threads", "Launch this number of threads")="2";
b243ca3b 4659 ::arg().set("distributor-threads", "Launch this number of distributor threads, distributing queries to other threads")="0";
adabfcb9 4660 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
5124de27 4661 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
d07bf7ff 4662 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
479e0976 4663 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
479e0976 4664 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
d07bf7ff
PL
4665 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
4666 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
4667 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
be3e1477 4668 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
8ca656a8 4669 ::arg().set("webserver-loglevel", "Amount of logging in the webserver (none, normal, detailed)") = "normal";
cc08b5a9 4670 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
e12f8407 4671 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
2c78bd57 4672 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
f7a645ec
RG
4673 ::arg().set("carbon-namespace", "If set overwrites the first part of the carbon string")="pdns";
4674 ::arg().set("carbon-instance", "If set overwrites the the instance name default")="recursor";
4675
0ec489bf 4676 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
c038218b 4677 ::arg().set("quiet","Suppress logging of questions and answers")="";
f27e6356 4678 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
2e3d8a19 4679 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
fdbf35ac
BH
4680 ::arg().set("socket-owner","Owner of socket")="";
4681 ::arg().set("socket-group","Group of socket")="";
4682 ::arg().set("socket-mode", "Permissions for socket")="";
3ddb9247 4683
0524add9 4684 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+"/pdns-recursor when unset and not chrooted" )="";
2e3d8a19
BH
4685 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
4686 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
d4fb76e9 4687 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
2e3d8a19 4688 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
85c32340 4689 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
2e3d8a19 4690 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
87ff2287 4691 ::arg().set("max-concurrent-requests-per-tcp-connection", "Maximum number of requests handled concurrently per TCP connection") = "10";
324dc148 4692 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
979edd70 4693 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
559b6c93
PL
4694 ::arg().set("dont-throttle-names", "Do not throttle nameservers with this name or suffix")="";
4695 ::arg().set("dont-throttle-netmasks", "Do not throttle nameservers with this IP netmask")="";
2e3d8a19 4696 ::arg().set("hint-file", "If set, load root hints from this file")="";
b45eb27c 4697 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
a9af3782 4698 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
b9473937 4699 ::arg().set("max-cache-bogus-ttl", "maximum number of seconds to keep a Bogus (positive or negative) cached entry in memory")="3600";
c3e753c7 4700 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
1051f8a9 4701 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
927c12b0 4702 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
1051f8a9 4703 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
950626be 4704 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname, set custom or 'disabled'")="";
92011b8f 4705 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
ba1a571d 4706 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
49a699c4 4707 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
2c95fc65 4708 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
51e2144e 4709 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3ddb9247 4710 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4e120339 4711 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
fde296a3 4712 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
0d5f0a9f 4713 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4ef015cd 4714 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
5605c067 4715 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3e61e7f7 4716 ::arg().set("lua-config-file", "More powerful configuration options")="";
0273d4ab 4717 ::arg().setSwitch("allow-trust-anchor-query", "Allow queries for trustanchor.server CH TXT and negativetrustanchor.server CH TXT")="no";
644dd1da 4718
5605c067 4719 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
927c12b0
BH
4720 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
4721 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
5605c067 4722 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
ac0b4eb3 4723 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3ea54bf0 4724 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
e498dac1 4725 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
4485aa35 4726 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
a2f87dd1 4727 ::arg().set("lua-maintenance-interval", "Number of seconds between calls to the lua user defined maintenance() function")="1";
08f3f638 4728 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3ddb9247 4729 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
35695d18 4730 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
fd8898fb 4731 ::arg().set("ecs-ipv4-cache-bits", "Maximum number of bits of IPv4 mask to cache ECS response")="24";
35695d18 4732 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
fd8898fb 4733 ::arg().set("ecs-ipv6-cache-bits", "Maximum number of bits of IPv6 mask to cache ECS response")="56";
5cf4b2e7 4734 ::arg().set("ecs-minimum-ttl-override", "Set under adverse conditions, a minimum TTL for records in ECS-specific answers")="0";
ed9019c9 4735 ::arg().set("ecs-cache-limit-ttl", "Minimum TTL to cache ECS response")="0";
3f975863 4736 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
2fe3354d 4737 ::arg().set("ecs-add-for", "List of client netmasks for which EDNS Client Subnet will be added")="0.0.0.0/0, ::/0, " LOCAL_NETS_INVERSE;
8a3a3822 4738 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
a16c4536 4739 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
e498dac1 4740 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
4ca2d205 4741 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
e661a20b 4742 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
b3adda56 4743 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
00b8cadc 4744 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
54c36063
PL
4745 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1232";
4746 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1232";
aadceba8 4747 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
409b8398 4748 ::arg().set("max-qperq", "Maximum outgoing queries per query")="60";
c5950146 4749 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
7c3398aa 4750 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
78227847 4751 ::arg().set("max-udp-queries-per-round", "Maximum number of UDP queries processed per recvmsg() round, before returning back to normal processing")="10000";
c29d820c 4752 ::arg().set("protobuf-use-kernel-timestamp", "Compute the latency of queries in protobuf messages by using the timestamp set by the kernel when the query was received (when available)")="";
ee271fc4 4753 ::arg().set("distribution-pipe-buffer-size", "Size in bytes of the internal buffer of the pipe used by the distributor to pass incoming queries to a worker thread")="0";
a09a8ce0 4754
68e6df3c 4755 ::arg().set("include-dir","Include *.conf files from this directory")="";
d67620e4 4756 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
2332f42d 4757
4758 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
2e3d8a19 4759
d705aad9 4760 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
396f126e 4761 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
d705aad9 4762
72259676
RG
4763 std::string defaultBlacklistedStats = "cache-bytes, packetcache-bytes, special-memory-usage";
4764 for (size_t idx = 0; idx < 32; idx++) {
4765 defaultBlacklistedStats += ", ecs-v4-response-bits-" + std::to_string(idx + 1);
4766 }
4767 for (size_t idx = 0; idx < 128; idx++) {
4768 defaultBlacklistedStats += ", ecs-v6-response-bits-" + std::to_string(idx + 1);
4769 }
563517f3
RG
4770 ::arg().set("stats-api-blacklist", "List of statistics that are disabled when retrieving the complete list of statistics via the API")=defaultBlacklistedStats;
4771 ::arg().set("stats-carbon-blacklist", "List of statistics that are prevented from being exported via Carbon")=defaultBlacklistedStats;
4772 ::arg().set("stats-rec-control-blacklist", "List of statistics that are prevented from being exported via rec_control get-all")=defaultBlacklistedStats;
4773 ::arg().set("stats-snmp-blacklist", "List of statistics that are prevented from being exported via SNMP")=defaultBlacklistedStats;
d705aad9 4774
0735b17e 4775 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
d377bb54 4776 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
0735b17e 4777
8fd25133
RG
4778 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
4779
98d36505
RG
4780 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
4781
5cc8371b 4782 ::arg().set("xpf-allow-from","XPF information is only processed from these subnets")="";
59cb4a79 4783 ::arg().set("xpf-rr-code","XPF option code to use")="0";
5cc8371b 4784
58da9034 4785 ::arg().set("udp-source-port-min", "Minimum UDP port to bind on")="1024";
b47026fd
CHB
4786 ::arg().set("udp-source-port-max", "Maximum UDP port to bind on")="65535";
4787 ::arg().set("udp-source-port-avoid", "List of comma separated UDP port number to avoid")="11211";
e97cb679 4788 ::arg().set("rng", "Specify random number generator to use. Valid values are auto,sodium,openssl,getrandom,arc4random,urandom.")="auto";
d6f3fcfa 4789 ::arg().set("public-suffix-list-file", "Path to the Public Suffix List file, if any")="";
144040be 4790 ::arg().set("distribution-load-factor", "The load factor used when PowerDNS is distributing queries to worker threads")="0.0";
8949a3e0
OM
4791
4792 ::arg().setSwitch("qname-minimization", "Use Query Name Minimization")="yes";
d40a915b 4793 ::arg().setSwitch("nothing-below-nxdomain", "When an NXDOMAIN exists in cache for a name with fewer labels than the qname, send NXDOMAIN without doing a lookup (see RFC 8020)")="dnssec";
ba3d53d1 4794 ::arg().set("max-generate-steps", "Maximum number of $GENERATE steps when loading a zone from a file")="0";
359d6c17 4795 ::arg().set("cache-shards", "Number of shards in the record cache")="1024";
d40a915b 4796
af1377b7
NC
4797#ifdef NOD_ENABLED
4798 ::arg().set("new-domain-tracking", "Track newly observed domains (i.e. never seen before).")="no";
4799 ::arg().set("new-domain-log", "Log newly observed domains.")="yes";
4800 ::arg().set("new-domain-lookup", "Perform a DNS lookup newly observed domains as a subdomain of the configured domain")="";
4801 ::arg().set("new-domain-history-dir", "Persist new domain tracking data here to persist between restarts")=string(NODCACHEDIR)+"/nod";
4802 ::arg().set("new-domain-whitelist", "List of domains (and implicitly all subdomains) which will never be considered a new domain")="";
b78727c6 4803 ::arg().set("new-domain-db-size", "Size of the DB used to track new domains in terms of number of cells. Defaults to 67108864")="67108864";
ca2526f5 4804 ::arg().set("new-domain-pb-tag", "If protobuf is configured, the tag to use for messages containing newly observed domains. Defaults to 'pdns-nod'")="pdns-nod";
41c542ec
NC
4805 ::arg().set("unique-response-tracking", "Track unique responses (tuple of query name, type and RR).")="no";
4806 ::arg().set("unique-response-log", "Log unique responses")="yes";
4807 ::arg().set("unique-response-history-dir", "Persist unique response tracking data here to persist between restarts")=string(NODCACHEDIR)+"/udr";
b78727c6 4808 ::arg().set("unique-response-db-size", "Size of the DB used to track unique responses in terms of number of cells. Defaults to 67108864")="67108864";
ca2526f5 4809 ::arg().set("unique-response-pb-tag", "If protobuf is configured, the tag to use for messages containing unique DNS responses. Defaults to 'pdns-udr'")="pdns-udr";
af1377b7 4810#endif /* NOD_ENABLED */
2e3d8a19 4811 ::arg().setCmd("help","Provide a helpful message");
ba1a571d 4812 ::arg().setCmd("version","Print version string");
d5141417 4813 ::arg().setCmd("config","Output blank configuration");
8864bdf6 4814 ::arg().setDefaults();
e6a9dde5 4815 g_log.toConsole(Logger::Info);
2e3d8a19 4816 ::arg().laxParse(argc,argv); // do a lax parse
c75a6a9e 4817
2d733c0f
CH
4818 string configname=::arg()["config-dir"]+"/recursor.conf";
4819 if(::arg()["config-name"]!="") {
4820 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
5124de27 4821 s_programname+="-"+::arg()["config-name"];
2d733c0f
CH
4822 }
4823 cleanSlashes(configname);
5124de27 4824
5cc1ea1d 4825 if(!::arg().getCommands().empty()) {
94ea3c7b
PL
4826 cerr<<"Fatal: non-option";
4827 if (::arg().getCommands().size() > 1) {
4828 cerr<<"s";
4829 }
4830 cerr<<" (";
4831 bool first = true;
4832 for (auto const c : ::arg().getCommands()) {
4833 if (!first) {
4834 cerr<<", ";
4835 }
4836 first = false;
4837 cerr<<c;
4838 }
4839 cerr<<") on the command line, perhaps a '--setting=123' statement missed the '='?"<<endl;
5cc1ea1d
CH
4840 exit(99);
4841 }
4842
577cf284 4843 if(::arg().mustDo("config")) {
8864bdf6 4844 cout<<::arg().configstring(false, true);
577cf284
BH
4845 exit(0);
4846 }
4847
3ddb9247 4848 if(!::arg().file(configname.c_str()))
e6a9dde5 4849 g_log<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
c75a6a9e 4850
2e3d8a19 4851 ::arg().parse(argc,argv);
c836dc19 4852
2054afbb
CH
4853 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() ) {
4854 g_log<<Logger::Error<<"Using chroot and enabling the API is not possible"<<endl;
f0f3f0b0
PL
4855 exit(EXIT_FAILURE);
4856 }
4857
4858 if (::arg()["socket-dir"].empty()) {
4859 if (::arg()["chroot"].empty())
0524add9 4860 ::arg().set("socket-dir") = std::string(LOCALSTATEDIR) + "/pdns-recursor";
f0f3f0b0
PL
4861 else
4862 ::arg().set("socket-dir") = "/";
4863 }
4864
2e3d8a19 4865 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
562588a3 4866
b243ca3b
RG
4867 if(::arg().asNum("threads")==1) {
4868 if (::arg().mustDo("pdns-distributes-queries")) {
4869 g_log<<Logger::Warning<<"Only one thread, no need to distribute queries ourselves"<<endl;
4870 ::arg().set("pdns-distributes-queries")="no";
4871 }
4872 }
4873
4874 if(::arg().mustDo("pdns-distributes-queries") && ::arg().asNum("distributor-threads") <= 0) {
4875 g_log<<Logger::Warning<<"Asked to run with pdns-distributes-queries set but no distributor threads, raising to 1"<<endl;
4876 ::arg().set("distributor-threads")="1";
4877 }
4878
4879 if (!::arg().mustDo("pdns-distributes-queries")) {
4880 ::arg().set("distributor-threads")="0";
4881 }
61d74169 4882
2e3d8a19 4883 if(::arg().mustDo("help")) {
ff5ba4f9
WA
4884 cout<<"syntax:"<<endl<<endl;
4885 cout<<::arg().helpstring(::arg()["help"])<<endl;
4886 exit(0);
b636533b 4887 }
5e3de507 4888 if(::arg().mustDo("version")) {
ba1a571d 4889 showProductVersion();
3613a51c 4890 showBuildConfiguration();
67076869 4891 exit(0);
5e3de507 4892 }
b636533b 4893
359d6c17
OM
4894 s_RC = std::unique_ptr<MemRecursorCache>(new MemRecursorCache(::arg().asNum("cache-shards")));
4895
34162f8f 4896 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
f48d7b65 4897
34162f8f
CH
4898 if (logUrgency < Logger::Error)
4899 logUrgency = Logger::Error;
f48d7b65 4900 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
4901 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
4902 }
e6a9dde5
PL
4903 g_log.setLoglevel(logUrgency);
4904 g_log.toConsole(logUrgency);
34162f8f 4905
f7c1d4e3 4906 serviceMain(argc, argv);
288f4aa9 4907 }
3f81d239 4908 catch(PDNSException &ae) {
e6a9dde5 4909 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
22030c37 4910 ret=EXIT_FAILURE;
288f4aa9 4911 }
fdbf35ac 4912 catch(std::exception &e) {
e6a9dde5 4913 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
22030c37 4914 ret=EXIT_FAILURE;
288f4aa9
BH
4915 }
4916 catch(...) {
e6a9dde5 4917 g_log<<Logger::Error<<"any other exception in main: "<<endl;
22030c37 4918 ret=EXIT_FAILURE;
288f4aa9 4919 }
3ddb9247 4920
22030c37 4921 return ret;
288f4aa9 4922}