]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/pdns_recursor.cc
rec: Add a distribution-pipe-buffer-size setting
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
CommitLineData
288f4aa9 1/*
6edbf68a
PL
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
870a0fe4
AT
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
3e61e7f7 25
76473b92
KM
26#include <netdb.h>
27#include <sys/stat.h>
28#include <unistd.h>
f097141b 29#ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
b47026fd 30#include <boost/container/flat_set.hpp>
f097141b 31#endif
2470b36e 32#include "ws-recursor.hh"
c390b2da 33#include <thread>
519f5484 34#include "threadname.hh"
3ea54bf0 35#include "recpacketcache.hh"
3ddb9247 36#include "utility.hh"
51e2144e 37#include "dns_random.hh"
d1b28475
KM
38#ifdef HAVE_LIBSODIUM
39#include <sodium.h>
40#endif
3afde9b2 41#include "opensslsigners.hh"
288f4aa9
BH
42#include <iostream>
43#include <errno.h>
81859ba5 44#include <boost/static_assert.hpp>
288f4aa9
BH
45#include <map>
46#include <set>
97bb160b 47#include "recursor_cache.hh"
38c9ceaa 48#include "cachecleaner.hh"
288f4aa9 49#include <stdio.h>
c75a6a9e 50#include <signal.h>
288f4aa9 51#include <stdlib.h>
bb4bdbaf 52#include "misc.hh"
288f4aa9
BH
53#include "mtasker.hh"
54#include <utility>
288f4aa9
BH
55#include "arguments.hh"
56#include "syncres.hh"
88def049
BH
57#include <fcntl.h>
58#include <fstream>
3e61e7f7 59#include "sortlist.hh"
5c633640
BH
60#include "sstuff.hh"
61#include <boost/tuple/tuple.hpp>
62#include <boost/tuple/tuple_comparison.hpp>
72df400f 63#include <boost/shared_array.hpp>
7f1fa77d 64#include <boost/function.hpp>
5605c067 65#include <boost/algorithm/string.hpp>
8f7473d7 66#ifdef MALLOC_TRACE
67#include "malloctrace.hh"
68#endif
40a3dd64 69#include <netinet/tcp.h>
f12666f2 70#include "capabilities.hh"
ea634573
BH
71#include "dnsparser.hh"
72#include "dnswriter.hh"
73#include "dnsrecords.hh"
f814d7c8 74#include "zoneparser-tng.hh"
1d5b3ce6 75#include "rec_channel.hh"
aaacf7f2 76#include "logger.hh"
c8ddb7c2 77#include "iputils.hh"
09e6702a 78#include "mplexer.hh"
c038218b 79#include "config.h"
808c5ef7 80#include "lua-recursor4.hh"
ba1a571d 81#include "version.hh"
79332bff 82#include "responsestats.hh"
d67620e4 83#include "secpoll-recursor.hh"
c5c066bf 84#include "dnsname.hh"
644dd1da 85#include "filterpo.hh"
86#include "rpzloader.hh"
b3f0ed10 87#include "validate-recursor.hh"
f3c18728 88#include "rec-lua-conf.hh"
5c3b5e7f 89#include "ednsoptions.hh"
85c7ca75 90#include "gettime.hh"
d6f3fcfa 91#include "pubsuffix.hh"
af1377b7
NC
92#ifdef NOD_ENABLED
93#include "nod.hh"
94#endif /* NOD_ENABLED */
f3c18728 95
d9d3f9c1 96#include "rec-protobuf.hh"
d705aad9 97#include "rec-snmp.hh"
aa7929a3 98
6b6720de
PL
99#ifdef HAVE_SYSTEMD
100#include <systemd/sd-daemon.h>
101#endif
102
d187038c
RG
103#include "namespaces.hh"
104
d61aa945
RG
105#ifdef HAVE_PROTOBUF
106#include "uuid-utils.hh"
107#endif
108
5cc8371b
RG
109#include "xpf.hh"
110
d187038c
RG
111typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
112
f26bf547 113static thread_local std::shared_ptr<RecursorLua4> t_pdl;
b243ca3b 114static thread_local unsigned int t_id = 0;
f26bf547
RG
115static thread_local std::shared_ptr<Regex> t_traceRegex;
116static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
63341e8d 117#ifdef HAVE_PROTOBUF
3fe06137 118static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_protobufServers{nullptr};
b773359c 119static thread_local uint64_t t_protobufServersGeneration;
3fe06137 120static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_outgoingProtobufServers{nullptr};
b773359c 121static thread_local uint64_t t_outgoingProtobufServersGeneration;
63341e8d 122#endif /* HAVE_PROTOBUF */
f26bf547
RG
123
124thread_local std::unique_ptr<MT_t> MT; // the big MTasker
125thread_local std::unique_ptr<MemRecursorCache> t_RC;
126thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
3337c2f7 127thread_local FDMultiplexer* t_fdm{nullptr};
be9078b3 128thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes, t_bogusremotes;
66f2e6ad 129thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring, t_bogusqueryring;
f26bf547 130thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
af1377b7
NC
131#ifdef NOD_ENABLED
132thread_local std::shared_ptr<nod::NODDB> t_nodDBp;
41c542ec 133thread_local std::shared_ptr<nod::UniqueResponseDB> t_udrDBp;
af1377b7 134#endif /* NOD_ENABLED */
d187038c 135__thread struct timeval g_now; // timestamp, updated (too) frequently
d7dae798 136
b243ca3b
RG
137typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
138
d7dae798 139// for communicating with our threads
b243ca3b
RG
140// effectively readonly after startup
141struct RecThreadInfo
142{
143 struct ThreadPipeSet
144 {
145 int writeToThread{-1};
146 int readToThread{-1};
147 int writeFromThread{-1};
148 int readFromThread{-1};
149 int writeQueriesToThread{-1}; // this one is non-blocking
150 int readQueriesToThread{-1};
151 };
152
adb6cd72 153 /* FD corresponding to TCP sockets this thread is listening
c47f201b 154 on.
adb6cd72
RG
155 These FDs are also in deferredAdds when we have one
156 socket per listener, and in g_deferredAdds instead. */
157 std::set<int> tcpSockets;
b243ca3b
RG
158 /* FD corresponding to listening sockets if we have one socket per
159 listener (with reuseport), otherwise all listeners share the
160 same FD and g_deferredAdds is then used instead */
161 deferredAdd_t deferredAdds;
162 struct ThreadPipeSet pipes;
163 std::thread thread;
144040be
RG
164 MT_t* mt{nullptr};
165 uint64_t numberOfDistributedQueries{0};
b243ca3b
RG
166 /* handle the web server, carbon, statistics and the control channel */
167 bool isHandler{false};
168 /* accept incoming queries (and distributes them to the workers if pdns-distributes-queries is set) */
169 bool isListener{false};
170 /* process queries */
171 bool isWorker{false};
49a699c4 172};
810ff705 173
b243ca3b
RG
174/* first we have the handler thread, t_id == 0 (some other
175 helper threads like SNMP might have t_id == 0 as well)
176 then the distributor threads if any
177 and finally the workers */
178static std::vector<RecThreadInfo> s_threadInfos;
179/* without reuseport, all listeners share the same sockets */
180static deferredAdd_t g_deferredAdds;
faf580f5 181
d187038c
RG
182typedef vector<int> tcpListenSockets_t;
183typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
3ea54bf0 184
d187038c 185static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
d187038c 186static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
d187038c
RG
187static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
188static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
189static AtomicCounter counter;
9065eb05 190static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
f26bf547 191static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
5cc8371b 192static NetmaskGroup g_XPFAcl;
d187038c 193static size_t g_tcpMaxQueriesPerConn;
a5886e6a 194static size_t s_maxUDPQueriesPerRound;
d187038c
RG
195static uint64_t g_latencyStatSize;
196static uint32_t g_disthashseed;
197static unsigned int g_maxTCPPerClient;
d187038c 198static unsigned int g_maxMThreads;
b243ca3b 199static unsigned int g_numDistributorThreads;
d187038c
RG
200static unsigned int g_numWorkerThreads;
201static int g_tcpTimeout;
202static uint16_t g_udpTruncationThreshold;
59cb4a79 203static uint16_t g_xpfRRCode{0};
d187038c
RG
204static std::atomic<bool> statsWanted;
205static std::atomic<bool> g_quiet;
206static bool g_logCommonErrors;
207static bool g_anyToTcp;
b243ca3b 208static bool g_weDistributeQueries; // if true, 1 or more threads listen on the incoming query sockets and distribute them to workers
810ff705 209static bool g_reusePort{false};
00b8cadc 210static bool g_gettagNeedsEDNSOptions{false};
0ec489bf 211static time_t g_statisticsInterval;
9065eb05 212static bool g_useIncomingECS;
c29d820c 213static bool g_useKernelTimestamp;
a6f7f5fe 214std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
af1377b7
NC
215#ifdef NOD_ENABLED
216static bool g_nodEnabled;
217static DNSName g_nodLookupDomain;
218static bool g_nodLog;
219static SuffixMatchNode g_nodDomainWL;
ca2526f5 220static std::string g_nod_pbtag;
41c542ec
NC
221static bool g_udrEnabled;
222static bool g_udrLog;
ca2526f5 223static std::string g_udr_pbtag;
af1377b7 224#endif /* NOD_ENABLED */
f097141b 225#ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
bf6f28ca 226static boost::container::flat_set<uint16_t> s_avoidUdpSourcePorts;
f097141b
CHB
227#else
228static std::set<uint16_t> s_avoidUdpSourcePorts;
229#endif
bf6f28ca
CHB
230static uint16_t s_minUdpSourcePort;
231static uint16_t s_maxUdpSourcePort;
144040be 232static double s_balancingFactor;
49a699c4 233
b243ca3b 234RecursorControlChannel s_rcc; // only active in the handler thread
d187038c 235RecursorStats g_stats;
2d733c0f 236string s_programname="pdns_recursor";
d187038c 237string s_pidfname;
c1c29961 238bool g_lowercaseOutgoing;
bf19ccfd 239unsigned int g_networkTimeoutMsec;
d187038c
RG
240unsigned int g_numThreads;
241uint16_t g_outgoingEDNSBufsize;
98d36505 242bool g_logRPZChanges{false};
c3828c03 243
559b6c93
PL
244// Used in the Syncres to not throttle certain servers
245GlobalStateHolder<SuffixMatchNode> g_dontThrottleNames;
246GlobalStateHolder<NetmaskGroup> g_dontThrottleNetmasks;
247
12cd44ee 248#define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
2fe3354d 249#define LOCAL_NETS_INVERSE "!127.0.0.0/8, !10.0.0.0/8, !100.64.0.0/10, !169.254.0.0/16, !192.168.0.0/16, !172.16.0.0/12, !::1/128, !fc00::/7, !fe80::/10"
12cd44ee 250// Bad Nets taken from both:
3ddb9247 251// http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
12cd44ee 252// and
253// http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
254// where such a network may not be considered a valid destination
255#define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
256#define DONT_QUERY LOCAL_NETS ", " BAD_NETS
49a699c4 257
d7dae798 258//! used to send information to a newborn mthread
ea634573 259struct DNSComboWriter {
08b02366 260 DNSComboWriter(const std::string& query, const struct timeval& now): d_mdp(true, query), d_now(now), d_query(query)
2749c3fe
RG
261 {
262 }
5cc8371b 263
08b02366 264 DNSComboWriter(const std::string& query, const struct timeval& now, std::vector<std::string>&& policyTags, LuaContext::LuaObject&& data): d_mdp(true, query), d_now(now), d_query(query), d_policyTags(std::move(policyTags)), d_data(std::move(data))
5164bac3
RG
265 {
266 }
267
5cc8371b
RG
268 void setRemote(const ComboAddress& sa)
269 {
270 d_remote=sa;
271 }
272
273 void setSource(const ComboAddress& sa)
ea634573 274 {
5cc8371b 275 d_source=sa;
ea634573
BH
276 }
277
b71b60ee 278 void setLocal(const ComboAddress& sa)
279 {
280 d_local=sa;
281 }
282
5cc8371b
RG
283 void setDestination(const ComboAddress& sa)
284 {
285 d_destination=sa;
286 }
b71b60ee 287
ea634573
BH
288 void setSocket(int sock)
289 {
290 d_socket=sock;
291 }
a1754c6a
BH
292
293 string getRemote() const
294 {
5cc8371b
RG
295 if (d_source == d_remote) {
296 return d_source.toStringWithPort();
297 }
298 return d_source.toStringWithPort() + " (proxied by " + d_remote.toStringWithPort() + ")";
a1754c6a
BH
299 }
300
5cc8371b 301 MOADNSParser d_mdp;
c9e9e5e0 302 struct timeval d_now;
5cc8371b
RG
303 /* Remote client, might differ from d_source
304 in case of XPF, in which case d_source holds
305 the IP of the client and d_remote of the proxy
306 */
307 ComboAddress d_remote;
308 ComboAddress d_source;
309 /* Destination address, might differ from
310 d_destination in case of XPF, in which case
311 d_destination holds the IP of the proxy and
312 d_local holds our own. */
313 ComboAddress d_local;
314 ComboAddress d_destination;
aa7929a3
RG
315#ifdef HAVE_PROTOBUF
316 boost::uuids::uuid d_uuid;
67e31ebe 317 string d_requestorId;
590388d2 318 string d_deviceId;
c29d820c 319 struct timeval d_kernelTimestamp{0,0};
aa7929a3 320#endif
08b02366 321 std::string d_query;
5164bac3
RG
322 std::vector<std::string> d_policyTags;
323 LuaContext::LuaObject d_data;
b40562da 324 EDNSSubnetOpts d_ednssubnet;
5164bac3 325 shared_ptr<TCPConnection> d_tcpConnection;
ea634573 326 int d_socket;
b673817a 327 unsigned int d_tag{0};
e9f63d47 328 uint32_t d_qhash{0};
70fb28d9 329 uint32_t d_ttlCap{std::numeric_limits<uint32_t>::max()};
08b02366
RG
330 uint16_t d_ecsBegin{0};
331 uint16_t d_ecsEnd{0};
70fb28d9 332 bool d_variable{false};
5164bac3
RG
333 bool d_ecsFound{false};
334 bool d_ecsParsed{false};
335 bool d_tcp;
ea634573
BH
336};
337
06857845
RG
338MT_t* getMT()
339{
340 return MT ? MT.get() : nullptr;
341}
ea634573 342
288f4aa9
BH
343ArgvMap &arg()
344{
345 static ArgvMap theArg;
346 return theArg;
347}
4ef015cd 348
8fb594ba 349unsigned int getRecursorThreadId()
b4015453 350{
30da2030 351 return t_id;
b4015453 352}
09e6702a 353
30ee601a
RG
354int getMTaskerTID()
355{
356 return MT->getTid();
357}
358
b243ca3b
RG
359static bool isDistributorThread()
360{
361 if (t_id == 0) {
362 return false;
363 }
364
365 return g_weDistributeQueries && s_threadInfos.at(t_id).isListener;
366}
367
368static bool isHandlerThread()
369{
370 if (t_id == 0) {
371 return true;
372 }
373
374 return s_threadInfos.at(t_id).isHandler;
375}
376
d187038c 377static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 378
50c81227 379// -1 is error, 0 is timeout, 1 is success
3ddb9247 380int asendtcp(const string& data, Socket* sock)
5c633640
BH
381{
382 PacketID pident;
383 pident.sock=sock;
384 pident.outMSG=data;
3ddb9247 385
bb4bdbaf 386 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
50c81227 387 string packet;
5c633640 388
5b0ddd18 389 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
23db0a09 390
9170fbaf 391 if(!ret || ret==-1) { // timeout
bb4bdbaf 392 t_fdm->removeWriteFD(sock->getHandle());
5c633640 393 }
50c81227
BH
394 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
395 return -1;
396 }
9170fbaf 397 return ret;
5c633640
BH
398}
399
d187038c 400static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 401
9170fbaf 402// -1 is error, 0 is timeout, 1 is success
a683e8bd 403int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
288f4aa9 404{
50c81227 405 data.clear();
5c633640
BH
406 PacketID pident;
407 pident.sock=sock;
408 pident.inNeeded=len;
825fa717 409 pident.inIncompleteOkay=incompleteOkay;
bb4bdbaf 410 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
5c633640 411
bb4bdbaf 412 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
9170fbaf 413 if(!ret || ret==-1) { // timeout
bb4bdbaf 414 t_fdm->removeReadFD(sock->getHandle());
288f4aa9 415 }
50c81227
BH
416 else if(data.empty()) {// error, EOF or other
417 return -1;
418 }
419
9170fbaf 420 return ret;
288f4aa9
BH
421}
422
d187038c 423static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
4465e941 424{
fba1e944 425 PacketID pident=*any_cast<PacketID>(&var);
4465e941 426 char resp[512];
7c77ce63
RG
427 ComboAddress fromaddr;
428 socklen_t addrlen=sizeof(fromaddr);
429
430 ssize_t ret=recvfrom(fd, resp, sizeof(resp), 0, (sockaddr *)&fromaddr, &addrlen);
431 if (fromaddr != pident.remote) {
e6a9dde5 432 g_log<<Logger::Notice<<"Response received from the wrong remote host ("<<fromaddr.toStringWithPort()<<" instead of "<<pident.remote.toStringWithPort()<<"), discarding"<<endl;
7c77ce63
RG
433
434 }
435
4465e941 436 t_fdm->removeReadFD(fd);
437 if(ret >= 0) {
a683e8bd 438 string data(resp, (size_t) ret);
fba1e944 439 MT->sendEvent(pident, &data);
4465e941 440 }
441 else {
fba1e944 442 string empty;
443 MT->sendEvent(pident, &empty);
444 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
4465e941 445 }
446}
fba1e944 447string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
4465e941 448{
4465e941 449 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
450 s.setNonBlocking();
451 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
452
453 s.bind(local);
454 s.connect(dest);
4465e941 455 s.send(query);
456
457 PacketID pident;
458 pident.sock=&s;
7c77ce63 459 pident.remote=dest;
4465e941 460 pident.type=0;
fba1e944 461 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
4465e941 462
463 string data;
fba1e944 464
4465e941 465 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
fba1e944 466
4465e941 467 if(!ret || ret==-1) { // timeout
4465e941 468 t_fdm->removeReadFD(s.getHandle());
469 }
470 else if(data.empty()) {// error, EOF or other
fba1e944 471 // we could special case this
4465e941 472 return data;
473 }
4465e941 474 return data;
475}
476
d7dae798 477//! pick a random query local address
1652a63e 478ComboAddress getQueryLocalAddress(int family, uint16_t port)
5a38281c 479{
1652a63e 480 ComboAddress ret;
5a38281c 481 if(family==AF_INET) {
3ddb9247 482 if(g_localQueryAddresses4.empty())
1652a63e 483 ret = g_local4;
3ddb9247 484 else
1652a63e
BH
485 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
486 ret.sin4.sin_port = htons(port);
5a38281c
BH
487 }
488 else {
489 if(g_localQueryAddresses6.empty())
1652a63e
BH
490 ret = g_local6;
491 else
492 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
3ddb9247 493
1652a63e 494 ret.sin6.sin6_port = htons(port);
5a38281c 495 }
1652a63e 496 return ret;
5a38281c 497}
4ef015cd 498
d187038c 499static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
09e6702a 500
d187038c 501static void setSocketBuffer(int fd, int optname, uint32_t size)
d7dae798
BH
502{
503 uint32_t psize=0;
504 socklen_t len=sizeof(psize);
3ddb9247 505
d7dae798 506 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
e6a9dde5 507 g_log<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
3ddb9247 508 return;
d7dae798
BH
509 }
510
511 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
e6a9dde5 512 g_log<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
d7dae798
BH
513}
514
515
516static void setSocketReceiveBuffer(int fd, uint32_t size)
517{
518 setSocketBuffer(fd, SO_RCVBUF, size);
519}
520
521static void setSocketSendBuffer(int fd, uint32_t size)
522{
523 setSocketBuffer(fd, SO_SNDBUF, size);
524}
525
526
4ef015cd
BH
527// you can ask this class for a UDP socket to send a query from
528// this socket is not yours, don't even think about deleting it
529// but after you call 'returnSocket' on it, don't assume anything anymore
530class UDPClientSocks
531{
4ef015cd 532 unsigned int d_numsocks;
4ef015cd 533public:
e2642526 534 UDPClientSocks() : d_numsocks(0)
4ef015cd
BH
535 {
536 }
537
996c89cc 538 typedef set<int> socks_t;
4ef015cd
BH
539 socks_t d_socks;
540
2ee280cf 541 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
d8f6d49f 542 int getSocket(const ComboAddress& toaddr, int* fd)
4ef015cd 543 {
d8f6d49f
BH
544 *fd=makeClientSocket(toaddr.sin4.sin_family);
545 if(*fd < 0) // temporary error - receive exception otherwise
2ee280cf 546 return -2;
d8f6d49f
BH
547
548 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
549 int err = errno;
41ff43f8 550 // returnSocket(*fd);
a7b68ae7
RG
551 try {
552 closesocket(*fd);
553 }
554 catch(const PDNSException& e) {
e6a9dde5 555 g_log<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
a7b68ae7
RG
556 }
557
d8f6d49f 558 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
4957a608 559 return -2;
998a4334 560 return -1;
d8f6d49f 561 }
998a4334 562
d8f6d49f 563 d_socks.insert(*fd);
998a4334 564 d_numsocks++;
d8f6d49f 565 return 0;
4ef015cd
BH
566 }
567
095c3045
BH
568 void returnSocket(int fd)
569 {
570 socks_t::iterator i=d_socks.find(fd);
34801ab1 571 if(i==d_socks.end()) {
335da0ba 572 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd)+") not in the pool");
34801ab1 573 }
bb4bdbaf 574 returnSocketLocked(i);
095c3045
BH
575 }
576
4ef015cd 577 // return a socket to the pool, or simply erase it
bb4bdbaf 578 void returnSocketLocked(socks_t::iterator& i)
4ef015cd 579 {
600fc20b 580 if(i==d_socks.end()) {
3f81d239 581 throw PDNSException("Trying to return a socket not in the pool");
600fc20b 582 }
80baf329 583 try {
bb4bdbaf 584 t_fdm->removeReadFD(*i);
80baf329
BH
585 }
586 catch(FDMultiplexerException& e) {
bb4bdbaf 587 // we sometimes return a socket that has not yet been assigned to t_fdm
80baf329 588 }
a7b68ae7
RG
589 try {
590 closesocket(*i);
591 }
592 catch(const PDNSException& e) {
e6a9dde5 593 g_log<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
a7b68ae7 594 }
3ddb9247 595
998a4334
BH
596 d_socks.erase(i++);
597 --d_numsocks;
4ef015cd 598 }
d8f6d49f
BH
599
600 // returns -1 for errors which might go away, throws for ones that won't
bb4bdbaf 601 static int makeClientSocket(int family)
d8f6d49f 602 {
a683e8bd 603 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
42c235e5 604
d8f6d49f
BH
605 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
606 return ret;
3ddb9247
PD
607
608 if(ret<0)
335da0ba 609 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
36855b53 610
7eb73ffa 611 // setCloseOnExec(ret); // we're not going to exec
5a38281c 612
d8f6d49f 613 int tries=10;
3aa91c3e 614 ComboAddress sin;
d8f6d49f 615 while(--tries) {
1652a63e 616 uint16_t port;
3ddb9247 617
d8f6d49f 618 if(tries==1) // fall back to kernel 'random'
4957a608 619 port = 0;
bf6f28ca
CHB
620 else {
621 do {
622 port = s_minUdpSourcePort + dns_random(s_maxUdpSourcePort - s_minUdpSourcePort + 1);
623 }
624 while (s_avoidUdpSourcePorts.count(port));
625 }
5a38281c 626
3aa91c3e 627 sin=getQueryLocalAddress(family, port); // does htons for us
5a38281c 628
3ddb9247 629 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
4957a608 630 break;
d8f6d49f
BH
631 }
632 if(!tries)
3aa91c3e 633 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
3ddb9247 634
29bb743c 635 setReceiveSocketErrors(ret, family);
3897b9e1 636 setNonBlocking(ret);
d8f6d49f
BH
637 return ret;
638 }
49a699c4
BH
639};
640
f26bf547 641static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
4ef015cd 642
288f4aa9 643/* these two functions are used by LWRes */
34801ab1 644// -2 is OS error, -1 is error that depends on the remote, > 0 is success
a683e8bd 645int asendto(const char *data, size_t len, int flags,
3ddb9247 646 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
288f4aa9 647{
34801ab1
BH
648
649 PacketID pident;
787e5eab
BH
650 pident.domain = domain;
651 pident.remote = toaddr;
652 pident.type = qtype;
34801ab1
BH
653
654 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
655 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
656
657 for(; chain.first != chain.second; chain.first++) {
658 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
e27e91a8 659 /*
4665c31e
BH
660 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
661 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
4957a608 662 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
e27e91a8 663 */
34801ab1
BH
664 chain.first->key.chain.insert(id); // we can chain
665 *fd=-1; // gets used in waitEvent / sendEvent later on
666 return 1;
667 }
668 }
669
49a699c4 670 int ret=t_udpclientsocks->getSocket(toaddr, fd);
d8f6d49f
BH
671 if(ret < 0)
672 return ret;
34801ab1 673
998a4334
BH
674 pident.fd=*fd;
675 pident.id=id;
3ddb9247 676
bb4bdbaf
BH
677 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
678 ret = send(*fd, data, len, 0);
679
5b0ddd18 680 int tmp = errno;
bb4bdbaf 681
7302ed0a 682 if(ret < 0)
49a699c4 683 t_udpclientsocks->returnSocket(*fd);
bb4bdbaf 684
5b0ddd18 685 errno = tmp; // this is for logging purposes only
7302ed0a 686 return ret;
288f4aa9
BH
687}
688
9170fbaf 689// -1 is error, 0 is timeout, 1 is success
f128d20d 690int arecvfrom(std::string& packet, int flags, const ComboAddress& fromaddr, size_t *d_len,
c5c066bf 691 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
288f4aa9 692{
0d5f0a9f 693 static optional<unsigned int> nearMissLimit;
3ddb9247 694 if(!nearMissLimit)
0d5f0a9f
BH
695 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
696
288f4aa9 697 PacketID pident;
4ef015cd 698 pident.fd=fd;
288f4aa9 699 pident.id=id;
0d5f0a9f 700 pident.domain=domain;
787e5eab 701 pident.type = qtype;
996c89cc 702 pident.remote=fromaddr;
b636533b 703
5b0ddd18 704 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
34801ab1 705
9170fbaf 706 if(ret > 0) {
996c89cc 707 if(packet.empty()) // means "error"
3ddb9247 708 return -1;
998a4334 709
a683e8bd 710 *d_len=packet.size();
f128d20d 711
0d5f0a9f 712 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
e6a9dde5 713 g_log<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
0d5f0a9f 714 g_stats.spoofCount++;
35ce8576
BH
715 return -1;
716 }
288f4aa9 717 }
09e6702a 718 else {
34801ab1 719 if(fd >= 0)
49a699c4 720 t_udpclientsocks->returnSocket(fd);
09e6702a 721 }
9170fbaf 722 return ret;
288f4aa9
BH
723}
724
88def049
BH
725static void writePid(void)
726{
191f2e47 727 if(!::arg().mustDo("write-pid"))
728 return;
18e7758c 729 ofstream of(s_pidfname.c_str(), std::ios_base::app);
88def049 730 if(of)
705f31ae 731 of<< Utility::getpid() <<endl;
88def049 732 else
e6a9dde5 733 g_log<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
88def049
BH
734}
735
2749c3fe 736TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : data(2, 0), d_remote(addr), d_fd(fd)
3ddb9247
PD
737{
738 ++s_currentConnections;
cd989c87 739 (*t_tcpClientCounts)[d_remote]++;
0e408828 740}
cd989c87
BH
741
742TCPConnection::~TCPConnection()
0e408828 743{
a7b68ae7
RG
744 try {
745 if(closesocket(d_fd) < 0)
e6a9dde5 746 g_log<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
a7b68ae7
RG
747 }
748 catch(const PDNSException& e) {
e6a9dde5 749 g_log<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
a7b68ae7
RG
750 }
751
3ddb9247 752 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
cd989c87 753 t_tcpClientCounts->erase(d_remote);
1bc9e6bd 754 --s_currentConnections;
0e408828 755}
0e9d9ce2 756
3ddb9247 757AtomicCounter TCPConnection::s_currentConnections;
d187038c
RG
758
759static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
6dcd28c3 760
92011b8f 761// the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
d187038c 762static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
2cc13433 763{
92011b8f 764 if(packetsize > 1000 && t_largeanswerremotes)
765 t_largeanswerremotes->push_back(remote);
2cc13433
BH
766 switch(res) {
767 case RCode::ServFail:
92011b8f 768 if(t_servfailremotes) {
769 t_servfailremotes->push_back(remote);
5af86fdc 770 if(query && t_servfailqueryring) // packet cache
92011b8f 771 t_servfailqueryring->push_back(make_pair(*query, qtype));
772 }
2cc13433
BH
773 g_stats.servFails++;
774 break;
775 case RCode::NXDomain:
776 g_stats.nxDomains++;
777 break;
778 case RCode::NoError:
779 g_stats.noErrors++;
780 break;
781 }
782}
783
9a864da4 784static string makeLoginfo(const std::unique_ptr<DNSComboWriter>& dc)
a903b39c 785try
786{
5cc8371b 787 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->getRemote())+")";
a903b39c 788}
789catch(...)
790{
791 return "Exception making error message for exception";
792}
793
aa7929a3 794#ifdef HAVE_PROTOBUF
b773359c 795static void protobufLogQuery(uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId)
aa7929a3 796{
b773359c
RG
797 if (!t_protobufServers) {
798 return;
799 }
800
e1c8a4bb
RG
801 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
802 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
803 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
c165308b 804 message.setServerIdentity(SyncRes::s_serverID);
a94bc5d7 805 message.setEDNSSubnet(ednssubnet, ednssubnet.isIpv4() ? maskV4 : maskV6);
67e31ebe 806 message.setRequestorId(requestorId);
590388d2 807 message.setDeviceId(deviceId);
02b47f43 808
02b47f43 809 if (!policyTags.empty()) {
d9d3f9c1 810 message.setPolicyTags(policyTags);
02b47f43 811 }
aa7929a3 812
d9d3f9c1 813// cerr <<message.toDebugString()<<endl;
aa7929a3 814 std::string str;
d9d3f9c1 815 message.serialize(str);
b773359c
RG
816
817 for (auto& server : *t_protobufServers) {
818 server->queueData(str);
819 }
aa7929a3
RG
820}
821
b773359c 822static void protobufLogResponse(const RecProtoBufMessage& message)
aa7929a3 823{
b773359c
RG
824 if (!t_protobufServers) {
825 return;
826 }
827
d9d3f9c1 828// cerr <<message.toDebugString()<<endl;
aa7929a3 829 std::string str;
d9d3f9c1 830 message.serialize(str);
b773359c
RG
831
832 for (auto& server : *t_protobufServers) {
833 server->queueData(str);
834 }
aa7929a3
RG
835}
836#endif
837
53508135
PL
838/**
839 * Chases the CNAME provided by the PolicyCustom RPZ policy.
840 *
841 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
842 * @param qtype: The QType of the original query
843 * @param sr: A SyncRes
844 * @param res: An integer that will contain the RCODE of the lookup we do
845 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
846 */
d187038c 847static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
53508135
PL
848{
849 if (spoofed.d_type == QType::CNAME) {
30ee601a
RG
850 bool oldWantsRPZ = sr.getWantsRPZ();
851 sr.setWantsRPZ(false);
53508135 852 vector<DNSRecord> ans;
6da513b2 853 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, QClass::IN, ans);
53508135
PL
854 for (const auto& rec : ans) {
855 if(rec.d_place == DNSResourceRecord::ANSWER) {
856 ret.push_back(rec);
857 }
858 }
859 // Reset the RPZ state of the SyncRes
30ee601a 860 sr.setWantsRPZ(oldWantsRPZ);
53508135
PL
861 }
862}
863
70fb28d9 864static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, uint32_t ttlCap, const uint16_t maxAnswerSize)
97c6d7e5 865{
70fb28d9 866 pw.startRecord(rec.d_name, rec.d_type, (rec.d_ttl > ttlCap ? ttlCap : rec.d_ttl), rec.d_class, rec.d_place);
97c6d7e5
RG
867
868 if(rec.d_type != QType::OPT) // their TTL ain't real
869 minTTL = min(minTTL, rec.d_ttl);
870
871 rec.d_content->toPacket(pw);
872 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
873 pw.rollback();
874 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
875 pw.getHeader()->tc=1;
876 pw.truncate();
877 }
878 return false;
879 }
880
881 return true;
882}
883
63341e8d 884#ifdef HAVE_PROTOBUF
3fe06137 885static std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> startProtobufServers(const ProtobufExportConfig& config)
63341e8d 886{
3fe06137 887 auto result = std::make_shared<std::vector<std::unique_ptr<RemoteLogger>>>();
b773359c
RG
888
889 for (const auto& server : config.servers) {
890 try {
da71b63b 891 result->emplace_back(new RemoteLogger(server, config.timeout, 100*config.maxQueuedEntries, config.reconnectWaitTime, config.asyncConnect));
b773359c
RG
892 }
893 catch(const std::exception& e) {
894 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.what()<<endl;
895 }
896 catch(const PDNSException& e) {
897 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.reason<<endl;
898 }
63341e8d
RG
899 }
900
901 return result;
902}
903
904static bool checkProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
905{
906 if (!luaconfsLocal->protobufExportConfig.enabled) {
b773359c
RG
907 if (t_protobufServers) {
908 for (auto& server : *t_protobufServers) {
909 server->stop();
910 }
911 t_protobufServers.reset();
63341e8d
RG
912 }
913
914 return false;
915 }
916
917 /* if the server was not running, or if it was running according to a
918 previous configuration */
b773359c
RG
919 if (!t_protobufServers ||
920 t_protobufServersGeneration < luaconfsLocal->generation) {
63341e8d 921
b773359c
RG
922 if (t_protobufServers) {
923 for (auto& server : *t_protobufServers) {
924 server->stop();
925 }
63341e8d 926 }
b773359c 927 t_protobufServers.reset();
63341e8d 928
b773359c
RG
929 t_protobufServers = startProtobufServers(luaconfsLocal->protobufExportConfig);
930 t_protobufServersGeneration = luaconfsLocal->generation;
63341e8d
RG
931 }
932
933 return true;
934}
935
936static bool checkOutgoingProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
937{
938 if (!luaconfsLocal->outgoingProtobufExportConfig.enabled) {
b773359c
RG
939 if (t_outgoingProtobufServers) {
940 for (auto& server : *t_outgoingProtobufServers) {
941 server->stop();
942 }
63341e8d 943 }
b773359c 944 t_outgoingProtobufServers.reset();
63341e8d
RG
945
946 return false;
947 }
948
949 /* if the server was not running, or if it was running according to a
950 previous configuration */
b773359c
RG
951 if (!t_outgoingProtobufServers ||
952 t_outgoingProtobufServersGeneration < luaconfsLocal->generation) {
63341e8d 953
b773359c
RG
954 if (t_outgoingProtobufServers) {
955 for (auto& server : *t_outgoingProtobufServers) {
956 server->stop();
957 }
63341e8d 958 }
b773359c 959 t_outgoingProtobufServers.reset();
63341e8d 960
b773359c
RG
961 t_outgoingProtobufServers = startProtobufServers(luaconfsLocal->outgoingProtobufExportConfig);
962 t_outgoingProtobufServersGeneration = luaconfsLocal->generation;
63341e8d
RG
963 }
964
965 return true;
966}
967#endif /* HAVE_PROTOBUF */
968
af1377b7 969#ifdef NOD_ENABLED
41c542ec 970static bool nodCheckNewDomain(const DNSName& dname)
af1377b7
NC
971{
972 static const QType qt(QType::A);
973 static const uint16_t qc(QClass::IN);
41c542ec 974 bool ret = false;
af1377b7
NC
975 // First check the (sub)domain isn't whitelisted for NOD purposes
976 if (!g_nodDomainWL.check(dname)) {
977 // Now check the NODDB (note this is probablistic so can have FNs/FPs)
978 if (t_nodDBp && t_nodDBp->isNewDomain(dname)) {
979 if (g_nodLog) {
980 // This should probably log to a dedicated log file
981 g_log<<Logger::Notice<<"Newly observed domain nod="<<dname.toLogString()<<endl;
982 }
983 if (!(g_nodLookupDomain.isRoot())) {
984 // Send a DNS A query to <domain>.g_nodLookupDomain
985 DNSName qname = dname;
986 vector<DNSRecord> dummy;
987 qname += g_nodLookupDomain;
988 directResolve(qname, qt, qc, dummy);
989 }
41c542ec 990 ret = true;
af1377b7
NC
991 }
992 }
41c542ec 993 return ret;
af1377b7
NC
994}
995
996static void nodAddDomain(const DNSName& dname)
997{
998 // Don't bother adding domains on the nod whitelist
999 if (!g_nodDomainWL.check(dname)) {
1000 if (t_nodDBp) {
1001 // This keeps the nod info up to date
1002 t_nodDBp->addDomain(dname);
1003 }
1004 }
1005}
41c542ec
NC
1006
1007static bool udrCheckUniqueDNSRecord(const DNSName& dname, uint16_t qtype, const DNSRecord& record)
1008{
1009 bool ret = false;
1010 if (record.d_place == DNSResourceRecord::ANSWER ||
1011 record.d_place == DNSResourceRecord::ADDITIONAL) {
1012 // Create a string that represent a triplet of (qname, qtype and RR[type, name, content])
1013 std::stringstream ss;
1014 ss << dname.toDNSStringLC() << ":" << qtype << ":" << qtype << ":" << record.d_type << ":" << record.d_name.toDNSStringLC() << ":" << record.d_content->getZoneRepresentation();
1015 if (t_udrDBp && t_udrDBp->isUniqueResponse(ss.str())) {
ff4d391d
NC
1016 if (g_udrLog) {
1017 // This should also probably log to a dedicated file.
1018 g_log<<Logger::Notice<<"Unique response observed: qname="<<dname.toLogString()<<" qtype="<<QType(qtype).getName()<< " rrtype=" << QType(record.d_type).getName() << " rrname=" << record.d_name.toLogString() << " rrcontent=" << record.d_content->getZoneRepresentation() << endl;
41c542ec
NC
1019 }
1020 ret = true;
1021 }
1022 }
1023 return ret;
1024}
af1377b7
NC
1025#endif /* NOD_ENABLED */
1026
d187038c 1027static void startDoResolve(void *p)
288f4aa9 1028{
9a864da4 1029 auto dc=std::unique_ptr<DNSComboWriter>(reinterpret_cast<DNSComboWriter*>(p));
288f4aa9 1030 try {
5af86fdc
RG
1031 if (t_queryring)
1032 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
92011b8f 1033
32015748 1034 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
7f7b8d55 1035 EDNSOpts edo;
5164bac3 1036 std::vector<pair<uint16_t, string> > ednsOpts;
eb9444be 1037 bool variableAnswer = dc->d_variable;
8e079f3a 1038 bool haveEDNS=false;
ca2526f5
NC
1039#ifdef NOD_ENABLED
1040 bool hasUDR = false;
1041#endif /* NOD_ENABLED */
f1db0de2
PL
1042 DNSPacketWriter::optvect_t returnedEdnsOptions; // Here we stuff all the options for the return packet
1043 uint8_t ednsExtRCode = 0;
8e079f3a 1044 if(getEDNSOpts(dc->d_mdp, &edo)) {
f1db0de2
PL
1045 haveEDNS=true;
1046 if (edo.d_version != 0) {
1047 ednsExtRCode = ERCode::BADVERS;
1048 }
1049
32015748
RG
1050 if(!dc->d_tcp) {
1051 /* rfc6891 6.2.3:
1052 "Values lower than 512 MUST be treated as equal to 512."
1053 */
1054 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
1055 }
5164bac3 1056 ednsOpts = edo.d_options;
3af35968 1057 maxanswersize -= 11; // EDNS header size
b40562da 1058
1f691b94
PL
1059 for (const auto& o : edo.d_options) {
1060 if (o.first == EDNSOptionCode::ECS && g_useIncomingECS && !dc->d_ecsParsed) {
1061 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
1062 } else if (o.first == EDNSOptionCode::NSID) {
f1db0de2 1063 const static string mode_server_id = ::arg()["server-id"];
8a42919a
PL
1064 if(mode_server_id != "disabled" && !mode_server_id.empty() &&
1065 maxanswersize > (2 + 2 + mode_server_id.size())) {
f1db0de2
PL
1066 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::NSID, mode_server_id));
1067 variableAnswer = true; // Can't packetcache an answer with NSID
1068 // Option Code and Option Length are both 2
1069 maxanswersize -= 2 + 2 + mode_server_id.size();
1070 }
b40562da
RG
1071 }
1072 }
10321a98 1073 }
b40562da
RG
1074 /* perhaps there was no EDNS or no ECS but by now we looked */
1075 dc->d_ecsParsed = true;
e325f20c 1076 vector<DNSRecord> ret;
ea634573 1077 vector<uint8_t> packet;
b23b8614 1078
ad42489c 1079 auto luaconfsLocal = g_luaconfs.getLocal();
b8470add
PL
1080 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
1081 bool wantsRPZ(true);
1fbc6dc5 1082 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
f1c7929a 1083 bool logResponse = false;
aa7929a3 1084#ifdef HAVE_PROTOBUF
63341e8d 1085 if (checkProtobufExport(luaconfsLocal)) {
b773359c 1086 logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
5cc8371b 1087 Netmask requestorNM(dc->d_source, dc->d_source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
e1c8a4bb 1088 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
0bd2e252 1089 pbMessage = RecProtoBufMessage(RecProtoBufMessage::Response, dc->d_uuid, &requestor, &dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass, dc->d_mdp.d_header.id, dc->d_tcp, 0);
c165308b 1090 pbMessage->setServerIdentity(SyncRes::s_serverID);
d362f7c1 1091 pbMessage->setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
d9d3f9c1
RG
1092 }
1093#endif /* HAVE_PROTOBUF */
ad42489c 1094
3ddb9247 1095 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
ea634573
BH
1096
1097 pw.getHeader()->aa=0;
1098 pw.getHeader()->ra=1;
c154c8a4 1099 pw.getHeader()->qr=1;
bb4bdbaf 1100 pw.getHeader()->tc=0;
ea634573 1101 pw.getHeader()->id=dc->d_mdp.d_header.id;
10321a98 1102 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
57769f13 1103 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
ea634573 1104
70fb28d9
RG
1105 /* This is the lowest TTL seen in the records of the response,
1106 so we can't cache it for longer than this value.
1107 If we have a TTL cap, this value can't be larger than the
1108 cap no matter what. */
1109 uint32_t minTTL = dc->d_ttlCap;
904d3219
PD
1110
1111 SyncRes sr(dc->d_now);
0c43f455 1112
2e921ec6 1113 bool DNSSECOK=false;
3457a2a0 1114 if(t_pdl) {
f26bf547 1115 sr.setLuaEngine(t_pdl);
3457a2a0 1116 }
9eec8c98 1117 if(g_dnssecmode != DNSSECMode::Off) {
30ee601a 1118 sr.setDoDNSSEC(true);
9eec8c98
PL
1119
1120 // Does the requestor want DNSSEC records?
d6c335ab 1121 if(edo.d_extFlags & EDNSOpts::DNSSECOK) {
9eec8c98
PL
1122 DNSSECOK=true;
1123 g_stats.dnssecQueries++;
1124 }
88c33dca
RG
1125 if (dc->d_mdp.d_header.cd) {
1126 /* Per rfc6840 section 5.9, "When processing a request with
1127 the Checking Disabled (CD) bit set, a resolver SHOULD attempt
1128 to return all response data, even data that has failed DNSSEC
1129 validation. */
1130 ++g_stats.dnssecCheckDisabledQueries;
1131 }
1132 if (dc->d_mdp.d_header.ad) {
1133 /* Per rfc6840 section 5.7, "the AD bit in a query as a signal
1134 indicating that the requester understands and is interested in the
1135 value of the AD bit in the response. This allows a requester to
1136 indicate that it understands the AD bit without also requesting
1137 DNSSEC data via the DO bit. */
1138 ++g_stats.dnssecAuthenticDataQueries;
1139 }
9eec8c98
PL
1140 } else {
1141 // Ignore the client-set CD flag
1142 pw.getHeader()->cd=0;
5b9853c9 1143 }
0c43f455
RG
1144 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
1145
4898a348 1146#ifdef HAVE_PROTOBUF
30ee601a 1147 sr.setInitialRequestId(dc->d_uuid);
b773359c 1148 sr.setOutgoingProtobufServers(t_outgoingProtobufServers);
4898a348 1149#endif
0c43f455 1150
2fe3354d 1151 sr.setQuerySource(dc->d_remote, g_useIncomingECS && !dc->d_ednssubnet.source.empty() ? boost::optional<const EDNSSubnetOpts&>(dc->d_ednssubnet) : boost::none);
57769f13 1152
904d3219 1153 bool tracedQuery=false; // we could consider letting Lua know about this too
9fc36e90 1154 bool shouldNotValidate = false;
904d3219 1155
ef3b6cd7
RG
1156 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
1157 int res = RCode::NoError;
1f1ca368 1158 DNSFilterEngine::Policy appliedPolicy;
6da513b2 1159 std::vector<DNSRecord> spoofed;
f1c7929a 1160 RecursorLua4::DNSQuestion dq(dc->d_source, dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ, logResponse);
d6c335ab 1161 dq.ednsFlags = &edo.d_extFlags;
5164bac3 1162 dq.ednsOptions = &ednsOpts;
6e505c5e
RG
1163 dq.tag = dc->d_tag;
1164 dq.discardedPolicies = &sr.d_discardedPolicies;
1165 dq.policyTags = &dc->d_policyTags;
1166 dq.appliedPolicy = &appliedPolicy;
1167 dq.currentRecords = &ret;
1168 dq.dh = &dc->d_mdp.d_header;
05c74122 1169 dq.data = dc->d_data;
67e31ebe
RG
1170#ifdef HAVE_PROTOBUF
1171 dq.requestorId = dc->d_requestorId;
590388d2 1172 dq.deviceId = dc->d_deviceId;
67e31ebe 1173#endif
ba21fcfe 1174
6cf96227
PL
1175 if(ednsExtRCode != 0) {
1176 goto sendit;
1177 }
1178
e661a20b 1179 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
56b4d21b
PD
1180 pw.getHeader()->tc = 1;
1181 res = 0;
1182 variableAnswer = true;
e661a20b
PD
1183 goto sendit;
1184 }
1185
f26bf547 1186 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
77499b05
BH
1187 sr.setLogMode(SyncRes::Store);
1188 tracedQuery=true;
1189 }
3ddb9247 1190
8f7473d7 1191
976ec823 1192 if(!g_quiet || tracedQuery) {
e6a9dde5 1193 g_log<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
976ec823 1194 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
b40562da 1195 if(!dc->d_ednssubnet.source.empty()) {
e6a9dde5 1196 g_log<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
6e986f5e 1197 }
e6a9dde5 1198 g_log<<endl;
976ec823 1199 }
c75a6a9e 1200
fededf47 1201 sr.setId(MT->getTid());
67828389 1202 if(!dc->d_mdp.d_header.rd)
c836dc19
BH
1203 sr.setCacheOnly();
1204
f26bf547
RG
1205 if (t_pdl) {
1206 t_pdl->prerpz(dq, res);
0a273054
RG
1207 }
1208
db486de5 1209 // Check if the query has a policy attached to it
0a273054 1210 if (wantsRPZ) {
5cc8371b 1211 appliedPolicy = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_source, sr.d_discardedPolicies);
0a273054 1212 }
644dd1da 1213
54be222b 1214 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
f26bf547 1215 if(!t_pdl || !t_pdl->preresolve(dq, res)) {
b8470add 1216
30ee601a 1217 sr.setWantsRPZ(wantsRPZ);
b8470add
PL
1218 if(wantsRPZ) {
1219 switch(appliedPolicy.d_kind) {
1220 case DNSFilterEngine::PolicyKind::NoAction:
1221 break;
1222 case DNSFilterEngine::PolicyKind::Drop:
1223 g_stats.policyDrops++;
7a25883a 1224 g_stats.policyResults[appliedPolicy.d_kind]++;
b8470add
PL
1225 return;
1226 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1227 g_stats.policyResults[appliedPolicy.d_kind]++;
1228 res=RCode::NXDomain;
1229 goto haveAnswer;
1230 case DNSFilterEngine::PolicyKind::NODATA:
1231 g_stats.policyResults[appliedPolicy.d_kind]++;
1232 res=RCode::NoError;
db486de5 1233 goto haveAnswer;
b8470add
PL
1234 case DNSFilterEngine::PolicyKind::Custom:
1235 g_stats.policyResults[appliedPolicy.d_kind]++;
1236 res=RCode::NoError;
6da513b2
RG
1237 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1238 for (const auto& dr : spoofed) {
1239 ret.push_back(dr);
1240 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1241 }
b8470add
PL
1242 goto haveAnswer;
1243 case DNSFilterEngine::PolicyKind::Truncate:
1244 if(!dc->d_tcp) {
1245 g_stats.policyResults[appliedPolicy.d_kind]++;
1246 res=RCode::NoError;
1247 pw.getHeader()->tc=1;
1248 goto haveAnswer;
1249 }
1250 break;
1251 }
db486de5
PL
1252 }
1253
b8470add 1254 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
44971ca0
PD
1255 try {
1256 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
9fc36e90 1257 shouldNotValidate = sr.wasOutOfBand();
44971ca0
PD
1258 }
1259 catch(ImmediateServFailException &e) {
854d44e3 1260 if(g_logCommonErrors)
e6a9dde5 1261 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
44971ca0
PD
1262 res = RCode::ServFail;
1263 }
4485aa35 1264
1921a4c2
RG
1265 dq.validationState = sr.getValidationState();
1266
b8470add
PL
1267 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
1268 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
1269 appliedPolicy = sr.d_appliedPolicy;
1270 g_stats.policyResults[appliedPolicy.d_kind]++;
1271 switch(appliedPolicy.d_kind) {
1272 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
1273 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
1274 case DNSFilterEngine::PolicyKind::Drop:
1275 g_stats.policyDrops++;
b8470add
PL
1276 return;
1277 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1278 ret.clear();
1279 res=RCode::NXDomain;
1280 goto haveAnswer;
1281
1282 case DNSFilterEngine::PolicyKind::NODATA:
1283 ret.clear();
1284 res=RCode::NoError;
1285 goto haveAnswer;
1286
1287 case DNSFilterEngine::PolicyKind::Truncate:
1288 if(!dc->d_tcp) {
1289 ret.clear();
1290 res=RCode::NoError;
1291 pw.getHeader()->tc=1;
1292 goto haveAnswer;
1293 }
1294 break;
1295
1296 case DNSFilterEngine::PolicyKind::Custom:
1297 ret.clear();
1298 res=RCode::NoError;
6da513b2
RG
1299 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1300 for (const auto& dr : spoofed) {
1301 ret.push_back(dr);
1302 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1303 }
b8470add
PL
1304 goto haveAnswer;
1305 }
1306 }
1307
1308 if (wantsRPZ) {
1f1ca368 1309 appliedPolicy = luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies);
b8470add 1310 }
db486de5 1311
f26bf547 1312 if(t_pdl) {
db486de5
PL
1313 if(res == RCode::NoError) {
1314 auto i=ret.cbegin();
1315 for(; i!= ret.cend(); ++i)
1316 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
1317 break;
f26bf547 1318 if(i == ret.cend() && t_pdl->nodata(dq, res))
3ca4e735
PL
1319 shouldNotValidate = true;
1320
db486de5 1321 }
f26bf547 1322 else if(res == RCode::NXDomain && t_pdl->nxdomain(dq, res))
3ca4e735 1323 shouldNotValidate = true;
db486de5 1324
f26bf547 1325 if(t_pdl->postresolve(dq, res))
3ca4e735 1326 shouldNotValidate = true;
db486de5
PL
1327 }
1328
b8470add
PL
1329 if (wantsRPZ) { //XXX This block is repeated, see above
1330 g_stats.policyResults[appliedPolicy.d_kind]++;
1331 switch(appliedPolicy.d_kind) {
1332 case DNSFilterEngine::PolicyKind::NoAction:
1333 break;
1334 case DNSFilterEngine::PolicyKind::Drop:
1335 g_stats.policyDrops++;
b8470add
PL
1336 return;
1337 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1338 ret.clear();
1339 res=RCode::NXDomain;
1340 goto haveAnswer;
1341
1342 case DNSFilterEngine::PolicyKind::NODATA:
1343 ret.clear();
1344 res=RCode::NoError;
1345 goto haveAnswer;
1346
1347 case DNSFilterEngine::PolicyKind::Truncate:
1348 if(!dc->d_tcp) {
1349 ret.clear();
1350 res=RCode::NoError;
1351 pw.getHeader()->tc=1;
1352 goto haveAnswer;
1353 }
1354 break;
1355
1356 case DNSFilterEngine::PolicyKind::Custom:
1357 ret.clear();
1358 res=RCode::NoError;
6da513b2
RG
1359 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1360 for (const auto& dr : spoofed) {
1361 ret.push_back(dr);
1362 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1363 }
b8470add
PL
1364 goto haveAnswer;
1365 }
644dd1da 1366 }
4485aa35 1367 }
644dd1da 1368 haveAnswer:;
3e8216c8 1369 if(res == PolicyDecision::DROP) {
e9c2ad3a 1370 g_stats.policyDrops++;
ae7e77ad 1371 return;
3ddb9247 1372 }
9cdfab64 1373 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1dc8f4d0 1374 {
85ffbc53
PD
1375 string trace(sr.getTrace());
1376 if(!trace.empty()) {
1377 vector<string> lines;
1378 boost::split(lines, trace, boost::is_any_of("\n"));
1dc8f4d0 1379 for(const string& line : lines) {
85ffbc53 1380 if(!line.empty())
e6a9dde5 1381 g_log<<Logger::Warning<< line << endl;
85ffbc53
PD
1382 }
1383 }
1384 }
3ddb9247 1385
9cdfab64 1386 if(res == -1) {
0fe1d080
PD
1387 pw.getHeader()->rcode=RCode::ServFail;
1388 // no commit here, because no record
1389 g_stats.servFails++;
1390 }
288f4aa9 1391 else {
ea634573 1392 pw.getHeader()->rcode=res;
92011b8f 1393
f3fe4ae6 1394 // Does the validation mode or query demand validation?
0c43f455 1395 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
b25cae9a 1396 try {
f3fe4ae6 1397 if(sr.doLog()) {
e6a9dde5 1398 g_log<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<endl;
2e921ec6 1399 }
4d2be65d
RG
1400
1401 auto state = sr.getValidationState();
1402
b25cae9a 1403 if(state == Secure) {
2e921ec6 1404 if(sr.doLog()) {
e6a9dde5 1405 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates correctly"<<endl;
2e921ec6 1406 }
b25cae9a 1407
1408 // Is the query source interested in the value of the ad-bit?
885c8881 1409 if (dc->d_mdp.d_header.ad || DNSSECOK)
b25cae9a 1410 pw.getHeader()->ad=1;
1411 }
1412 else if(state == Insecure) {
f3fe4ae6 1413 if(sr.doLog()) {
e6a9dde5 1414 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Insecure"<<endl;
12ce523e 1415 }
b25cae9a 1416
1417 pw.getHeader()->ad=0;
f3fe4ae6 1418 }
b25cae9a 1419 else if(state == Bogus) {
66f2e6ad
KM
1420 if(t_bogusremotes)
1421 t_bogusremotes->push_back(dc->d_source);
1422 if(t_bogusqueryring)
1423 t_bogusqueryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
c87e1876 1424 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
e6a9dde5 1425 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Bogus"<<endl;
b25cae9a 1426 }
1427
1428 // Does the query or validation mode sending out a SERVFAIL on validation errors?
885c8881 1429 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
b25cae9a 1430 if(sr.doLog()) {
e6a9dde5 1431 g_log<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
b25cae9a 1432 }
1433
1434 pw.getHeader()->rcode=RCode::ServFail;
1435 goto sendit;
1436 } else {
1437 if(sr.doLog()) {
e6a9dde5 1438 g_log<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
b25cae9a 1439 }
1440 }
1441 }
1442 }
1443 catch(ImmediateServFailException &e) {
1444 if(g_logCommonErrors)
e6a9dde5 1445 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
b25cae9a 1446 pw.getHeader()->rcode=RCode::ServFail;
1447 goto sendit;
f3fe4ae6 1448 }
b3f0ed10 1449 }
1450
c154c8a4 1451 if(ret.size()) {
92476c8b 1452 orderAndShuffle(ret);
5cc8371b 1453 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_source)) {
20d84f77 1454 stable_sort(ret.begin(), ret.end(), *sl);
3e61e7f7 1455 variableAnswer=true;
1456 }
8e079f3a 1457 }
0afa32d4
RG
1458
1459 bool needCommit = false;
8e079f3a 1460 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
3e80ebce
KM
1461 if( ! DNSSECOK &&
1462 ( i->d_type == QType::NSEC3 ||
1463 (
1464 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1465 (
1466 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1467 i->d_place != DNSResourceRecord::ANSWER
1468 )
1469 )
1470 )
1471 ) {
2e921ec6 1472 continue;
3e80ebce
KM
1473 }
1474
70fb28d9 1475 if (!addRecordToPacket(pw, *i, minTTL, dc->d_ttlCap, maxanswersize)) {
97c6d7e5
RG
1476 needCommit = false;
1477 break;
1478 }
1479 needCommit = true;
1480
41c542ec
NC
1481#ifdef NOD_ENABLED
1482 bool udr = false;
1483 if (g_udrEnabled) {
1484 udr = udrCheckUniqueDNSRecord(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, *i);
ca2526f5
NC
1485 if (!hasUDR && udr)
1486 hasUDR = true;
41c542ec
NC
1487 }
1488#endif /* NOD ENABLED */
1489
aa7929a3 1490#ifdef HAVE_PROTOBUF
b773359c 1491 if (t_protobufServers) {
41c542ec
NC
1492#ifdef NOD_ENABLED
1493 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes, udr);
1494#else
0bd2e252 1495 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes);
41c542ec 1496#endif /* NOD_ENABLED */
aa7929a3
RG
1497 }
1498#endif
ea634573 1499 }
0afa32d4 1500 if(needCommit)
8e079f3a 1501 pw.commit();
288f4aa9 1502 }
10321a98 1503 sendit:;
b3f0ed10 1504
a0ddd130 1505 if(g_useIncomingECS && dc->d_ecsFound && !sr.wasVariable() && !variableAnswer) {
9837850d 1506 // cerr<<"Stuffing in a 0 scope because answer is static"<<endl;
5a7f99b4 1507 EDNSSubnetOpts eo;
1508 eo.source = dc->d_ednssubnet.source;
1509 ComboAddress sa;
1ef18cab 1510 sa.reset();
5a7f99b4 1511 sa.sin4.sin_family = eo.source.getNetwork().sin4.sin_family;
1512 eo.scope = Netmask(sa, 0);
1513
1514 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::ECS, makeEDNSSubnetOptsString(eo)));
1515 }
1516
97c6d7e5
RG
1517 if (haveEDNS) {
1518 /* we try to add the EDNS OPT RR even for truncated answers,
1519 as rfc6891 states:
1520 "The minimal response MUST be the DNS header, question section, and an
1521 OPT record. This MUST also occur when a truncated response (using
1522 the DNS header's TC bit) is returned."
1523 */
9b60fb71 1524 pw.addOpt(512, ednsExtRCode, DNSSECOK ? EDNSOpts::DNSSECOK : 0, returnedEdnsOptions);
1f691b94 1525 pw.commit();
97c6d7e5
RG
1526 }
1527
79332bff 1528 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
5cc8371b 1529 updateResponseStats(res, dc->d_source, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
ff4d391d
NC
1530#ifdef NOD_ENABLED
1531 bool nod = false;
1532 if (g_nodEnabled) {
1533 if (nodCheckNewDomain(dc->d_mdp.d_qname))
1534 nod = true;
1535 }
1536#endif /* NOD_ENABLED */
aa7929a3 1537#ifdef HAVE_PROTOBUF
b773359c 1538 if (t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && (!appliedPolicy.d_name || appliedPolicy.d_name->empty()) && dc->d_policyTags.empty())) {
d362f7c1
RG
1539 pbMessage->setBytes(packet.size());
1540 pbMessage->setResponseCode(pw.getHeader()->rcode);
0a273054 1541 if (appliedPolicy.d_name) {
d362f7c1
RG
1542 pbMessage->setAppliedPolicy(*appliedPolicy.d_name);
1543 pbMessage->setAppliedPolicyType(appliedPolicy.d_type);
0a273054 1544 }
d362f7c1 1545 pbMessage->setPolicyTags(dc->d_policyTags);
c29d820c
RG
1546 if (g_useKernelTimestamp && dc->d_kernelTimestamp.tv_sec) {
1547 pbMessage->setQueryTime(dc->d_kernelTimestamp.tv_sec, dc->d_kernelTimestamp.tv_usec);
1548 }
1549 else {
1550 pbMessage->setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1551 }
d362f7c1
RG
1552 pbMessage->setRequestorId(dq.requestorId);
1553 pbMessage->setDeviceId(dq.deviceId);
41c542ec
NC
1554#ifdef NOD_ENABLED
1555 if (g_nodEnabled) {
ca2526f5 1556 if (nod) {
41c542ec 1557 pbMessage->setNOD(true);
ca2526f5
NC
1558 pbMessage->addPolicyTag(g_nod_pbtag);
1559 }
1560 if (hasUDR) {
1561 pbMessage->addPolicyTag(g_udr_pbtag);
1562 }
41c542ec
NC
1563 }
1564#endif /* NOD_ENABLED */
b773359c 1565 protobufLogResponse(*pbMessage);
ac238ea7 1566#ifdef NOD_ENABLED
ca2526f5
NC
1567 if (g_nodEnabled) {
1568 pbMessage->setNOD(false);
1569 pbMessage->clearUDR();
1570 if (nod)
1571 pbMessage->removePolicyTag(g_nod_pbtag);
1572 if (hasUDR)
1573 pbMessage->removePolicyTag(g_udr_pbtag);
1574 }
ac238ea7 1575#endif /* NOD_ENABLED */
aa7929a3
RG
1576 }
1577#endif
ea634573 1578 if(!dc->d_tcp) {
b71b60ee 1579 struct msghdr msgh;
1580 struct iovec iov;
1581 char cbuf[256];
1582 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
2c0af54f
PD
1583 msgh.msg_control=NULL;
1584
cbc03320 1585 if(g_fromtosockets.count(dc->d_socket)) {
fbe2a2e0 1586 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
2c0af54f 1587 }
cbc03320 1588 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
e6a9dde5 1589 g_log<<Logger::Warning<<"Sending UDP reply to client "<<dc->getRemote()<<" failed with: "<<strerror(errno)<<endl;
70fb28d9 1590
49dc532e 1591 if(variableAnswer || sr.wasVariable()) {
1ef18cab 1592 g_stats.variableResponses++;
49dc532e 1593 }
3762e821 1594 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
b5e675a7 1595 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, std::move(dc->d_query), dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
76e2b9e3 1596 string((const char*)&*packet.begin(), packet.size()),
3ddb9247 1597 g_now.tv_sec,
76e2b9e3 1598 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
d9d3f9c1 1599 min(minTTL,SyncRes::s_packetcachettl),
88694a6a 1600 dq.validationState,
08b02366
RG
1601 dc->d_ecsBegin,
1602 dc->d_ecsEnd,
4b0bdd5f 1603 std::move(pbMessage));
1051f8a9 1604 }
3762e821 1605 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
feccc9fc 1606 }
9c495589
BH
1607 else {
1608 char buf[2];
ea634573
BH
1609 buf[0]=packet.size()/256;
1610 buf[1]=packet.size()%256;
feccc9fc 1611
c038218b 1612 Utility::iovec iov[2];
feccc9fc 1613
ea634573
BH
1614 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1615 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
feccc9fc 1616
dd079764 1617 int wret=Utility::writev(dc->d_socket, iov, 2);
0e9d9ce2 1618 bool hadError=true;
feccc9fc 1619
dd079764 1620 if(wret == 0)
e6a9dde5 1621 g_log<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
dd079764 1622 else if(wret < 0 )
e6a9dde5 1623 g_log<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
dd079764 1624 else if((unsigned int)wret != 2 + packet.size())
e6a9dde5 1625 g_log<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
0e9d9ce2 1626 else
18af64a8 1627 hadError=false;
3ddb9247 1628
09e6702a 1629 // update tcp connection status, either by closing or moving to 'BYTE0'
3ddb9247 1630
09e6702a 1631 if(hadError) {
18af64a8 1632 // no need to remove us from FDM, we weren't there
c36bc97a 1633 dc->d_socket = -1;
09e6702a 1634 }
a6ae6414 1635 else {
fde296a3
RG
1636 dc->d_tcpConnection->queriesCount++;
1637 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1638 dc->d_socket = -1;
1639 }
1640 else {
1641 dc->d_tcpConnection->state=TCPConnection::BYTE0;
1642 Utility::gettimeofday(&g_now, 0); // needs to be updated
27ae2e3c
RG
1643 struct timeval ttd = g_now;
1644 ttd.tv_sec += g_tcpTimeout;
1645
1646 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection, &ttd);
fde296a3 1647 }
0e9d9ce2 1648 }
9c495589 1649 }
2c9119cd 1650 float spent=makeFloat(sr.getNow()-dc->d_now);
1d5b3ce6 1651 if(!g_quiet) {
e6a9dde5
PL
1652 g_log<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1653 g_log<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
2c9119cd 1654 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
1655 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
1656
1657 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
e6a9dde5 1658 g_log<< ", dnssec="<<vStates[sr.getValidationState()];
2c9119cd 1659 }
1660
e6a9dde5 1661 g_log<<endl;
2c9119cd 1662
c75a6a9e 1663 }
b23b8614 1664
f7b8cffa
RG
1665 if (sr.d_outqueries || sr.d_authzonequeries) {
1666 t_RC->cacheMisses++;
1667 }
1668 else {
1669 t_RC->cacheHits++;
1670 }
2c9119cd 1671
fe213470
BH
1672 if(spent < 0.001)
1673 g_stats.answers0_1++;
1674 else if(spent < 0.010)
1675 g_stats.answers1_10++;
1676 else if(spent < 0.1)
1677 g_stats.answers10_100++;
1678 else if(spent < 1.0)
1679 g_stats.answers100_1000++;
1680 else
1681 g_stats.answersSlow++;
1682
574af7ea 1683 uint64_t newLat=(uint64_t)(spent*1000000);
b841314c 1684 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
08f3f638 1685 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
0a6b1027 1686 // no worries, we do this for packet cache hits elsewhere
19178da9 1687
1688 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
1689 if(ourtime < 1)
1690 g_stats.ourtime0_1++;
1691 else if(ourtime < 2)
1692 g_stats.ourtime1_2++;
1693 else if(ourtime < 4)
1694 g_stats.ourtime2_4++;
1695 else if(ourtime < 8)
1696 g_stats.ourtime4_8++;
1697 else if(ourtime < 16)
1698 g_stats.ourtime8_16++;
1699 else if(ourtime < 32)
1700 g_stats.ourtime16_32++;
1701 else {
1702 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1703 g_stats.ourtimeSlow++;
1704 }
042da1a1 1705 if(ourtime >= 0.0) {
1706 newLat=ourtime*1000; // usec
1707 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
1708 }
c6d04bdc 1709 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
288f4aa9 1710 }
3f81d239 1711 catch(PDNSException &ae) {
e6a9dde5 1712 g_log<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
288f4aa9 1713 }
16ce7f18
JS
1714 catch(const MOADNSException &mde) {
1715 g_log<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<mde.what()<<endl;
7b1469bb 1716 }
fdbf35ac 1717 catch(std::exception& e) {
e6a9dde5 1718 g_log<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
068c7634
PD
1719
1720 // Luawrapper nests the exception from Lua, so we unnest it here
1721 try {
1722 std::rethrow_if_nested(e);
2010ac95 1723 } catch(const std::exception& ne) {
e6a9dde5 1724 g_log<<". Extra info: "<<ne.what();
068c7634
PD
1725 } catch(...) {}
1726
e6a9dde5 1727 g_log<<endl;
c154c8a4 1728 }
288f4aa9 1729 catch(...) {
e6a9dde5 1730 g_log<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
288f4aa9 1731 }
3ddb9247 1732
ec6eacbc 1733 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
288f4aa9
BH
1734}
1735
d187038c 1736static void makeControlChannelSocket(int processNum=-1)
1d5b3ce6 1737{
2d733c0f 1738 string sockname=::arg()["socket-dir"]+"/"+s_programname;
677e2a46 1739 if(processNum >= 0)
335da0ba 1740 sockname += "."+std::to_string(processNum);
677e2a46 1741 sockname+=".controlsocket";
41f7a068 1742 s_rcc.listen(sockname);
3ddb9247 1743
387de317
BH
1744 int sockowner = -1;
1745 int sockgroup = -1;
1746
1747 if (!::arg().isEmpty("socket-group"))
1748 sockgroup=::arg().asGid("socket-group");
1749 if (!::arg().isEmpty("socket-owner"))
1750 sockowner=::arg().asUid("socket-owner");
3ddb9247 1751
f838ad8d
BH
1752 if (sockgroup > -1 || sockowner > -1) {
1753 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1754 unixDie("Failed to chown control socket");
1755 }
1756 }
387de317
BH
1757
1758 // do mode change if socket-mode is given
1759 if(!::arg().isEmpty("socket-mode")) {
1760 mode_t sockmode=::arg().asMode("socket-mode");
34c513f9
RG
1761 if(chmod(sockname.c_str(), sockmode) < 0) {
1762 unixDie("Failed to chmod control socket");
1763 }
387de317 1764 }
1d5b3ce6
BH
1765}
1766
5cc8371b 1767static void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass,
29e6303a 1768 bool& foundECS, EDNSSubnetOpts* ednssubnet, EDNSOptionViewMap* options,
5cc8371b 1769 bool& foundXPF, ComboAddress* xpfSource, ComboAddress* xpfDest)
02b47f43 1770{
59cb4a79 1771 const bool lookForXPF = xpfSource != nullptr && g_xpfRRCode != 0;
5cc8371b
RG
1772 const bool lookForECS = ednssubnet != nullptr;
1773 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(question.c_str());
02b47f43
RG
1774 size_t questionLen = question.length();
1775 unsigned int consumed=0;
1776 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1777
1778 size_t pos= sizeof(dnsheader)+consumed+4;
5cc8371b
RG
1779 const size_t headerSize = /* root */ 1 + sizeof(dnsrecordheader);
1780 const uint16_t arcount = ntohs(dh->arcount);
1781
1782 for (uint16_t arpos = 0; arpos < arcount && questionLen > (pos + headerSize) && ((lookForECS && !foundECS) || (lookForXPF && !foundXPF)); arpos++) {
1783 if (question.at(pos) != 0) {
1784 /* not an OPT or a XPF, bye. */
1785 return;
1786 }
1787
1788 pos += 1;
1789 const dnsrecordheader* drh = reinterpret_cast<const dnsrecordheader*>(&question.at(pos));
1790 pos += sizeof(dnsrecordheader);
1791
1792 if (pos >= questionLen) {
1793 return;
1794 }
1795
02b47f43 1796 /* OPT root label (1) followed by type (2) */
5cc8371b 1797 if(lookForECS && ntohs(drh->d_type) == QType::OPT) {
00b8cadc
RG
1798 if (!options) {
1799 char* ecsStart = nullptr;
1800 size_t ecsLen = 0;
5cc8371b
RG
1801 /* we need to pass the record len */
1802 int res = getEDNSOption(const_cast<char*>(reinterpret_cast<const char*>(&question.at(pos - sizeof(drh->d_clen)))), questionLen - pos + sizeof(drh->d_clen), EDNSOptionCode::ECS, &ecsStart, &ecsLen);
00b8cadc
RG
1803 if (res == 0 && ecsLen > 4) {
1804 EDNSSubnetOpts eso;
1805 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1806 *ednssubnet=eso;
5cc8371b 1807 foundECS = true;
00b8cadc
RG
1808 }
1809 }
1810 }
1811 else {
5cc8371b
RG
1812 /* we need to pass the record len */
1813 int res = getEDNSOptions(reinterpret_cast<const char*>(&question.at(pos -sizeof(drh->d_clen))), questionLen - pos + (sizeof(drh->d_clen)), *options);
00b8cadc
RG
1814 if (res == 0) {
1815 const auto& it = options->find(EDNSOptionCode::ECS);
29e6303a 1816 if (it != options->end() && !it->second.values.empty() && it->second.values.at(0).content != nullptr && it->second.values.at(0).size > 0) {
00b8cadc 1817 EDNSSubnetOpts eso;
29e6303a 1818 if(getEDNSSubnetOptsFromString(it->second.values.at(0).content, it->second.values.at(0).size, &eso)) {
00b8cadc 1819 *ednssubnet=eso;
5cc8371b 1820 foundECS = true;
00b8cadc
RG
1821 }
1822 }
02b47f43
RG
1823 }
1824 }
1825 }
59cb4a79 1826 else if (lookForXPF && ntohs(drh->d_type) == g_xpfRRCode && ntohs(drh->d_class) == QClass::IN && drh->d_ttl == 0) {
5cc8371b
RG
1827 if ((questionLen - pos) < ntohs(drh->d_clen)) {
1828 return;
1829 }
1830
1831 foundXPF = parseXPFPayload(reinterpret_cast<const char*>(&question.at(pos)), ntohs(drh->d_clen), *xpfSource, xpfDest);
1832 }
1833
1834 pos += ntohs(drh->d_clen);
02b47f43
RG
1835 }
1836}
1837
d187038c 1838static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1839{
cd989c87 1840 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
c038218b 1841
879b3f70 1842 if(conn->state==TCPConnection::BYTE0) {
2749c3fe 1843 ssize_t bytes=recv(conn->getFD(), &conn->data[0], 2, 0);
09e6702a 1844 if(bytes==1)
667f7e60 1845 conn->state=TCPConnection::BYTE1;
3ddb9247 1846 if(bytes==2) {
a0aa4f64 1847 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2749c3fe 1848 conn->data.resize(conn->qlen);
667f7e60
BH
1849 conn->bytesread=0;
1850 conn->state=TCPConnection::GETQUESTION;
09e6702a
BH
1851 }
1852 if(!bytes || bytes < 0) {
bb4bdbaf 1853 t_fdm->removeReadFD(fd);
09e6702a
BH
1854 return;
1855 }
1856 }
667f7e60 1857 else if(conn->state==TCPConnection::BYTE1) {
2749c3fe 1858 ssize_t bytes=recv(conn->getFD(), &conn->data[1], 1, 0);
09e6702a 1859 if(bytes==1) {
667f7e60 1860 conn->state=TCPConnection::GETQUESTION;
a0aa4f64 1861 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2749c3fe 1862 conn->data.resize(conn->qlen);
667f7e60 1863 conn->bytesread=0;
09e6702a
BH
1864 }
1865 if(!bytes || bytes < 0) {
1866 if(g_logCommonErrors)
e6a9dde5 1867 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected after first byte"<<endl;
bb4bdbaf 1868 t_fdm->removeReadFD(fd);
09e6702a
BH
1869 return;
1870 }
1871 }
667f7e60 1872 else if(conn->state==TCPConnection::GETQUESTION) {
2749c3fe 1873 ssize_t bytes=recv(conn->getFD(), &conn->data[conn->bytesread], conn->qlen - conn->bytesread, 0);
f9d67b41 1874 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
c0f9be19
RG
1875 if(g_logCommonErrors) {
1876 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected while reading question body"<<endl;
1877 }
bb4bdbaf 1878 t_fdm->removeReadFD(fd);
09e6702a
BH
1879 return;
1880 }
b841314c 1881 conn->bytesread+=(uint16_t)bytes;
667f7e60 1882 if(conn->bytesread==conn->qlen) {
bb4bdbaf 1883 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
879b3f70 1884
9a864da4 1885 std::unique_ptr<DNSComboWriter> dc;
09e6702a 1886 try {
9a864da4 1887 dc=std::unique_ptr<DNSComboWriter>(new DNSComboWriter(conn->data, g_now));
09e6702a 1888 }
16ce7f18 1889 catch(const MOADNSException &mde) {
3ddb9247 1890 g_stats.clientParseError++;
4957a608 1891 if(g_logCommonErrors)
e6a9dde5 1892 g_log<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4957a608 1893 return;
09e6702a 1894 }
cd989c87
BH
1895 dc->d_tcpConnection = conn; // carry the torch
1896 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
09e6702a 1897 dc->d_tcp=true;
5cc8371b
RG
1898 dc->setRemote(conn->d_remote);
1899 dc->setSource(conn->d_remote);
a6147cd2 1900 ComboAddress dest;
d38e2ba9 1901 dest.reset();
a6147cd2 1902 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1903 socklen_t len = dest.getSocklen();
1904 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1905 dc->setLocal(dest);
5cc8371b 1906 dc->setDestination(dest);
33dcceba
RG
1907 DNSName qname;
1908 uint16_t qtype=0;
1909 uint16_t qclass=0;
1910 bool needECS = false;
5cc8371b 1911 bool needXPF = g_XPFAcl.match(conn->d_remote);
67e31ebe 1912 string requestorId;
590388d2 1913 string deviceId;
16bbc6e3 1914 bool logQuery = false;
aa7929a3 1915#ifdef HAVE_PROTOBUF
02b47f43 1916 auto luaconfsLocal = g_luaconfs.getLocal();
63341e8d 1917 if (checkProtobufExport(luaconfsLocal)) {
33dcceba
RG
1918 needECS = true;
1919 }
b773359c 1920 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
33dcceba
RG
1921#endif
1922
70fb28d9 1923 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag_ffi || t_pdl->d_gettag))) {
33dcceba
RG
1924
1925 try {
29e6303a 1926 EDNSOptionViewMap ednsOptions;
5cc8371b 1927 bool xpfFound = false;
b40562da 1928 dc->d_ecsParsed = true;
5cc8371b 1929 dc->d_ecsFound = false;
2749c3fe 1930 getQNameAndSubnet(conn->data, &qname, &qtype, &qclass,
5cc8371b
RG
1931 dc->d_ecsFound, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
1932 xpfFound, needXPF ? &dc->d_source : nullptr, needXPF ? &dc->d_destination : nullptr);
02b47f43 1933
70fb28d9 1934 if(t_pdl) {
33dcceba 1935 try {
70fb28d9 1936 if (t_pdl->d_gettag_ffi) {
f1c7929a 1937 dc->d_tag = t_pdl->gettag_ffi(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, dc->d_ttlCap, dc->d_variable, logQuery);
70fb28d9
RG
1938 }
1939 else if (t_pdl->d_gettag) {
1940 dc->d_tag = t_pdl->gettag(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId);
1941 }
33dcceba 1942 }
70fb28d9 1943 catch(const std::exception& e) {
33dcceba 1944 if(g_logCommonErrors)
e6a9dde5 1945 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
33dcceba
RG
1946 }
1947 }
1948 }
70fb28d9 1949 catch(const std::exception& e)
33dcceba
RG
1950 {
1951 if(g_logCommonErrors)
e6a9dde5 1952 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
33dcceba
RG
1953 }
1954 }
f52177c3
RG
1955
1956 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(&conn->data[0]);
1957
33dcceba 1958#ifdef HAVE_PROTOBUF
b773359c 1959 if(t_protobufServers || t_outgoingProtobufServers) {
67e31ebe 1960 dc->d_requestorId = requestorId;
590388d2 1961 dc->d_deviceId = deviceId;
d61aa945 1962 dc->d_uuid = getUniqueID();
4898a348 1963 }
02b47f43 1964
b773359c 1965 if(t_protobufServers) {
02b47f43 1966 try {
02b47f43 1967
845cbf4c 1968 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && dc->d_policyTags.empty())) {
b773359c 1969 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, dc->d_source, dc->d_destination, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId);
b790ef3d 1970 }
02b47f43
RG
1971 }
1972 catch(std::exception& e) {
1973 if(g_logCommonErrors)
e6a9dde5 1974 g_log<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
02b47f43
RG
1975 }
1976 }
aa7929a3 1977#endif
5034517a
RG
1978 if(t_pdl) {
1979 if(t_pdl->ipfilter(dc->d_source, dc->d_destination, *dh)) {
1980 if(!g_quiet)
1981 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED TCP question from "<<dc->d_source.toStringWithPort()<<(dc->d_source != dc->d_remote ? " (via "+dc->d_remote.toStringWithPort()+")" : "")<<" based on policy"<<endl;
1982 g_stats.policyDrops++;
1983 return;
1984 }
1985 }
1986
879b3f70 1987 if(dc->d_mdp.d_header.qr) {
048f5db6 1988 g_stats.ignoredCount++;
c0f9be19
RG
1989 if(g_logCommonErrors) {
1990 g_log<<Logger::Error<<"Ignoring answer from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
1991 }
4957a608 1992 return;
879b3f70 1993 }
3abcdab2 1994 if(dc->d_mdp.d_header.opcode) {
048f5db6 1995 g_stats.ignoredCount++;
c0f9be19
RG
1996 if(g_logCommonErrors) {
1997 g_log<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
1998 }
c0f9be19
RG
1999 return;
2000 }
2001 else if (dh->qdcount == 0) {
2002 g_stats.emptyQueriesCount++;
2003 if(g_logCommonErrors) {
2004 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<< dc->getRemote() <<" on server socket!"<<endl;
2005 }
3abcdab2
PD
2006 return;
2007 }
09e6702a 2008 else {
4957a608
BH
2009 ++g_stats.qcounter;
2010 ++g_stats.tcpqcounter;
9a864da4 2011 MT->makeThread(startDoResolve, dc.release()); // deletes dc, will set state to BYTE0 again
4957a608 2012 return;
09e6702a
BH
2013 }
2014 }
2015 }
2016}
2017
6dcd28c3 2018//! Handle new incoming TCP connection
d187038c 2019static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
09e6702a 2020{
37d3f960 2021 ComboAddress addr;
09e6702a 2022 socklen_t addrlen=sizeof(addr);
a683e8bd 2023 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
b841314c 2024 if(newsock>=0) {
85c32340
BH
2025 if(MT->numProcesses() > g_maxMThreads) {
2026 g_stats.overCapacityDrops++;
a7b68ae7
RG
2027 try {
2028 closesocket(newsock);
2029 }
2030 catch(const PDNSException& e) {
e6a9dde5 2031 g_log<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
a7b68ae7 2032 }
85c32340
BH
2033 return;
2034 }
2035
92011b8f 2036 if(t_remotes)
2037 t_remotes->push_back(addr);
49a699c4 2038 if(t_allowFrom && !t_allowFrom->match(&addr)) {
3ddb9247 2039 if(!g_quiet)
e6a9dde5 2040 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2914b022 2041
09e6702a 2042 g_stats.unauthorizedTCP++;
a7b68ae7
RG
2043 try {
2044 closesocket(newsock);
2045 }
2046 catch(const PDNSException& e) {
e6a9dde5 2047 g_log<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
a7b68ae7 2048 }
09e6702a
BH
2049 return;
2050 }
bd0289fc 2051 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
09e6702a 2052 g_stats.tcpClientOverflow++;
a7b68ae7
RG
2053 try {
2054 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
2055 }
2056 catch(const PDNSException& e) {
e6a9dde5 2057 g_log<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
a7b68ae7 2058 }
09e6702a
BH
2059 return;
2060 }
3ddb9247 2061
3897b9e1 2062 setNonBlocking(newsock);
f26bf547 2063 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
cd989c87 2064 tc->state=TCPConnection::BYTE0;
3ddb9247 2065
27ae2e3c
RG
2066 struct timeval ttd;
2067 Utility::gettimeofday(&ttd, 0);
2068 ttd.tv_sec += g_tcpTimeout;
c038218b 2069
27ae2e3c 2070 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc, &ttd);
09e6702a
BH
2071 }
2072}
3ddb9247 2073
d187038c 2074static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
1bc3c142 2075{
183eb877 2076 gettimeofday(&g_now, 0);
c29d820c
RG
2077 if (tv.tv_sec) {
2078 struct timeval diff = g_now - tv;
2079 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
183eb877 2080
c29d820c
RG
2081 if(delta > 1000.0) {
2082 g_stats.tooOldDrops++;
2083 return nullptr;
2084 }
b71b60ee 2085 }
2086
1bc3c142 2087 ++g_stats.qcounter;
d7f10541
BH
2088 if(fromaddr.sin4.sin_family==AF_INET6)
2089 g_stats.ipv6qcounter++;
1bc3c142
BH
2090
2091 string response;
93f0da94 2092 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
49a3500d 2093 unsigned int ctag=0;
f57486f1 2094 uint32_t qhash = 0;
12aff2e5 2095 bool needECS = false;
5cc8371b 2096 bool needXPF = g_XPFAcl.match(fromaddr);
02b47f43 2097 std::vector<std::string> policyTags;
5fd2577f 2098 LuaContext::LuaObject data;
5cc8371b
RG
2099 ComboAddress source = fromaddr;
2100 ComboAddress destination = destaddr;
67e31ebe 2101 string requestorId;
590388d2 2102 string deviceId;
16bbc6e3 2103 bool logQuery = false;
12aff2e5 2104#ifdef HAVE_PROTOBUF
02b47f43 2105 boost::uuids::uuid uniqueId;
02b47f43 2106 auto luaconfsLocal = g_luaconfs.getLocal();
63341e8d 2107 if (checkProtobufExport(luaconfsLocal)) {
d61aa945 2108 uniqueId = getUniqueID();
02b47f43 2109 needECS = true;
63341e8d 2110 } else if (checkOutgoingProtobufExport(luaconfsLocal)) {
d61aa945 2111 uniqueId = getUniqueID();
02b47f43 2112 }
b773359c
RG
2113 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
2114 bool logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
12aff2e5 2115#endif
b40562da
RG
2116 EDNSSubnetOpts ednssubnet;
2117 bool ecsFound = false;
2118 bool ecsParsed = false;
08b02366
RG
2119 uint16_t ecsBegin = 0;
2120 uint16_t ecsEnd = 0;
70fb28d9
RG
2121 uint32_t ttlCap = std::numeric_limits<uint32_t>::max();
2122 bool variable = false;
1bc3c142 2123 try {
02b47f43
RG
2124 DNSName qname;
2125 uint16_t qtype=0;
2126 uint16_t qclass=0;
1bc3c142 2127 uint32_t age;
c15ff3df 2128 bool qnameParsed=false;
8f7473d7 2129#ifdef MALLOC_TRACE
2130 /*
2131 static uint64_t last=0;
2132 if(!last)
2133 g_mtracer->clearAllocators();
2134 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
2135 last=g_mtracer->getAllocs();
2136 cout<<g_mtracer->topAllocatorsString()<<endl;
2137 g_mtracer->clearAllocators();
2138 */
2139#endif
55a1378f 2140
70fb28d9 2141 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag || t_pdl->d_gettag_ffi))) {
b2eacd67 2142 try {
29e6303a 2143 EDNSOptionViewMap ednsOptions;
5cc8371b
RG
2144 bool xpfFound = false;
2145
2146 ecsFound = false;
2147
2148 getQNameAndSubnet(question, &qname, &qtype, &qclass,
2149 ecsFound, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2150 xpfFound, needXPF ? &source : nullptr, needXPF ? &destination : nullptr);
2151
c15ff3df
RG
2152 qnameParsed = true;
2153 ecsParsed = true;
12aff2e5 2154
70fb28d9 2155 if(t_pdl) {
12aff2e5 2156 try {
70fb28d9 2157 if (t_pdl->d_gettag_ffi) {
f1c7929a 2158 ctag = t_pdl->gettag_ffi(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, ttlCap, variable, logQuery);
70fb28d9
RG
2159 }
2160 else if (t_pdl->d_gettag) {
2161 ctag = t_pdl->gettag(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId);
2162 }
12aff2e5 2163 }
70fb28d9 2164 catch(const std::exception& e) {
12aff2e5 2165 if(g_logCommonErrors)
e6a9dde5 2166 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 2167 }
8ea8c302 2168 }
b2eacd67 2169 }
70fb28d9 2170 catch(const std::exception& e)
b2eacd67 2171 {
2172 if(g_logCommonErrors)
e6a9dde5 2173 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 2174 }
12ce523e 2175 }
3ddb9247 2176
02b47f43 2177 bool cacheHit = false;
1fbc6dc5 2178 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
02b47f43 2179#ifdef HAVE_PROTOBUF
b773359c 2180 if (t_protobufServers) {
d362f7c1 2181 pbMessage = RecProtoBufMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
c165308b 2182 pbMessage->setServerIdentity(SyncRes::s_serverID);
845cbf4c 2183 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && policyTags.empty())) {
b773359c 2184 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, source, destination, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId);
b790ef3d 2185 }
d9d3f9c1
RG
2186 }
2187#endif /* HAVE_PROTOBUF */
02b47f43 2188
70fb28d9
RG
2189 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
2190 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
2191 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
8467ec26 2192 vState valState;
c15ff3df 2193 if (qnameParsed) {
08b02366 2194 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
c15ff3df
RG
2195 }
2196 else {
08b02366 2197 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, &qtype, &qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
c15ff3df
RG
2198 }
2199
d9d3f9c1 2200 if (cacheHit) {
8467ec26
KM
2201 if(valState == Bogus) {
2202 if(t_bogusremotes)
2203 t_bogusremotes->push_back(source);
2204 if(t_bogusqueryring)
2205 t_bogusqueryring->push_back(make_pair(qname, qtype));
2206 }
2207
d9d3f9c1 2208#ifdef HAVE_PROTOBUF
b773359c 2209 if(t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && pbMessage->getAppliedPolicy().empty() && pbMessage->getPolicyTags().empty())) {
5cc8371b 2210 Netmask requestorNM(source, source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
e1c8a4bb 2211 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
d362f7c1
RG
2212 pbMessage->update(uniqueId, &requestor, &destination, false, dh->id);
2213 pbMessage->setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
c29d820c
RG
2214 if (g_useKernelTimestamp && tv.tv_sec) {
2215 pbMessage->setQueryTime(tv.tv_sec, tv.tv_usec);
2216 }
2217 else {
2218 pbMessage->setQueryTime(g_now.tv_sec, g_now.tv_usec);
2219 }
d362f7c1
RG
2220 pbMessage->setRequestorId(requestorId);
2221 pbMessage->setDeviceId(deviceId);
b773359c 2222 protobufLogResponse(*pbMessage);
02b47f43 2223 }
d9d3f9c1 2224#endif /* HAVE_PROTOBUF */
49a3500d 2225 if(!g_quiet)
e6a9dde5 2226 g_log<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<endl;
8f7473d7 2227
1bc3c142
BH
2228 g_stats.packetCacheHits++;
2229 SyncRes::s_queries++;
2230 ageDNSPacket(response, age);
b71b60ee 2231 struct msghdr msgh;
2232 struct iovec iov;
2233 char cbuf[256];
2234 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
2c0af54f
PD
2235 msgh.msg_control=NULL;
2236
cbc03320 2237 if(g_fromtosockets.count(fd)) {
fbe2a2e0 2238 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
b71b60ee 2239 }
cbc03320 2240 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
e6a9dde5 2241 g_log<<Logger::Warning<<"Sending UDP reply to client "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" failed with: "<<strerror(errno)<<endl;
b71b60ee 2242
97bee66d 2243 if(response.length() >= sizeof(struct dnsheader)) {
dd079764
RG
2244 struct dnsheader tmpdh;
2245 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
5cc8371b 2246 updateResponseStats(tmpdh.rcode, source, response.length(), 0, 0);
97bee66d 2247 }
08f3f638 2248 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
19178da9 2249 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
1bc3c142
BH
2250 return 0;
2251 }
3ddb9247 2252 }
1bc3c142 2253 catch(std::exception& e) {
e6a9dde5 2254 g_log<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1bc3c142
BH
2255 return 0;
2256 }
3ddb9247 2257
f26bf547 2258 if(t_pdl) {
5cc8371b 2259 if(t_pdl->ipfilter(source, destination, *dh)) {
4ea94941 2260 if(!g_quiet)
e6a9dde5 2261 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" based on policy"<<endl;
4ea94941 2262 g_stats.policyDrops++;
2263 return 0;
2264 }
2265 }
2266
1bc3c142 2267 if(MT->numProcesses() > g_maxMThreads) {
461df9d2 2268 if(!g_quiet)
e6a9dde5 2269 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<", over capacity"<<endl;
461df9d2 2270
1bc3c142
BH
2271 g_stats.overCapacityDrops++;
2272 return 0;
2273 }
3ddb9247 2274
9a864da4 2275 auto dc = std::unique_ptr<DNSComboWriter>(new DNSComboWriter(question, g_now, std::move(policyTags), std::move(data)));
1bc3c142 2276 dc->setSocket(fd);
49a3500d 2277 dc->d_tag=ctag;
e9f63d47 2278 dc->d_qhash=qhash;
5cc8371b
RG
2279 dc->setRemote(fromaddr);
2280 dc->setSource(source);
b71b60ee 2281 dc->setLocal(destaddr);
5cc8371b 2282 dc->setDestination(destination);
1bc3c142 2283 dc->d_tcp=false;
b40562da
RG
2284 dc->d_ecsFound = ecsFound;
2285 dc->d_ecsParsed = ecsParsed;
08b02366
RG
2286 dc->d_ecsBegin = ecsBegin;
2287 dc->d_ecsEnd = ecsEnd;
b40562da 2288 dc->d_ednssubnet = ednssubnet;
70fb28d9
RG
2289 dc->d_ttlCap = ttlCap;
2290 dc->d_variable = variable;
aa7929a3 2291#ifdef HAVE_PROTOBUF
b773359c 2292 if (t_protobufServers || t_outgoingProtobufServers) {
5164bac3 2293 dc->d_uuid = std::move(uniqueId);
d9d3f9c1 2294 }
67e31ebe 2295 dc->d_requestorId = requestorId;
590388d2 2296 dc->d_deviceId = deviceId;
c29d820c 2297 dc->d_kernelTimestamp = tv;
aa7929a3
RG
2298#endif
2299
9a864da4 2300 MT->makeThread(startDoResolve, (void*) dc.release()); // deletes dc
1bc3c142 2301 return 0;
3ddb9247
PD
2302}
2303
b71b60ee 2304
d187038c 2305static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
5db529f8 2306{
a683e8bd 2307 ssize_t len;
12c2f2b9 2308 static const size_t maxIncomingQuerySize = 512;
04896b99 2309 static thread_local std::string data;
5db529f8 2310 ComboAddress fromaddr;
b71b60ee 2311 struct msghdr msgh;
2312 struct iovec iov;
2313 char cbuf[256];
390f1dab 2314 bool firstQuery = true;
b71b60ee 2315
c0a00acd
RG
2316 for(size_t queriesCounter = 0; queriesCounter < s_maxUDPQueriesPerRound; queriesCounter++) {
2317 data.resize(maxIncomingQuerySize);
2318 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
2319 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), &data[0], data.size(), &fromaddr);
b71b60ee 2320
c0a00acd 2321 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
390f1dab 2322
c0a00acd 2323 firstQuery = false;
390f1dab 2324
c0a00acd
RG
2325 if (static_cast<size_t>(len) < sizeof(dnsheader)) {
2326 g_stats.ignoredCount++;
2327 if (!g_quiet) {
2328 g_log<<Logger::Error<<"Ignoring too-short ("<<std::to_string(len)<<") query from "<<fromaddr.toString()<<endl;
2329 }
2330 return;
04896b99 2331 }
04896b99 2332
c0a00acd
RG
2333 if (msgh.msg_flags & MSG_TRUNC) {
2334 g_stats.truncatedDrops++;
2335 if (!g_quiet) {
2336 g_log<<Logger::Error<<"Ignoring truncated query from "<<fromaddr.toString()<<endl;
2337 }
2338 return;
ba892c7f 2339 }
b23b8614 2340
c0a00acd
RG
2341 if(t_remotes) {
2342 t_remotes->push_back(fromaddr);
2343 }
81859ba5 2344
c0a00acd
RG
2345 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
2346 if(!g_quiet) {
2347 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2348 }
3ddb9247 2349
c0a00acd
RG
2350 g_stats.unauthorizedUDP++;
2351 return;
5db529f8 2352 }
c0a00acd
RG
2353 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
2354 if(!fromaddr.sin4.sin_port) { // also works for IPv6
2355 if(!g_quiet) {
2356 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
2357 }
2358
2359 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
2360 return;
3abcdab2 2361 }
c0a00acd
RG
2362
2363 try {
2364 data.resize(static_cast<size_t>(len));
2365 dnsheader* dh=(dnsheader*)&data[0];
2366
2367 if(dh->qr) {
2368 g_stats.ignoredCount++;
2369 if(g_logCommonErrors) {
2370 g_log<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
2371 }
2372 }
2373 else if(dh->opcode) {
2374 g_stats.ignoredCount++;
2375 if(g_logCommonErrors) {
2376 g_log<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
2377 }
a6147cd2 2378 }
c0f9be19
RG
2379 else if (dh->qdcount == 0) {
2380 g_stats.emptyQueriesCount++;
2381 if(g_logCommonErrors) {
2382 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<<fromaddr.toString()<<" on server socket!"<<endl;
2383 }
2384 }
a6147cd2 2385 else {
c0a00acd
RG
2386 struct timeval tv={0,0};
2387 HarvestTimestamp(&msgh, &tv);
2388 ComboAddress dest;
2389 dest.reset(); // this makes sure we ignore this address if not returned by recvmsg above
2390 auto loc = rplookup(g_listenSocketsAddresses, fd);
2391 if(HarvestDestinationAddress(&msgh, &dest)) {
2392 // but.. need to get port too
2393 if(loc) {
2394 dest.sin4.sin_port = loc->sin4.sin_port;
2395 }
a6147cd2 2396 }
2397 else {
c0a00acd
RG
2398 if(loc) {
2399 dest = *loc;
2400 }
2401 else {
2402 dest.sin4.sin_family = fromaddr.sin4.sin_family;
2403 socklen_t slen = dest.getSocklen();
2404 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
2405 }
2406 }
2407
2408 if(g_weDistributeQueries) {
2409 distributeAsyncFunction(data, boost::bind(doProcessUDPQuestion, data, fromaddr, dest, tv, fd));
2410 }
2411 else {
144040be 2412 ++s_threadInfos[t_id].numberOfDistributedQueries;
c0a00acd 2413 doProcessUDPQuestion(data, fromaddr, dest, tv, fd);
a6147cd2 2414 }
2415 }
c0a00acd 2416 }
16ce7f18 2417 catch(const MOADNSException &mde) {
c0a00acd
RG
2418 g_stats.clientParseError++;
2419 if(g_logCommonErrors) {
2420 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
2421 }
2422 }
2423 catch(const std::runtime_error& e) {
2424 g_stats.clientParseError++;
2425 if(g_logCommonErrors) {
2426 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
2427 }
5db529f8
BH
2428 }
2429 }
c0a00acd
RG
2430 else {
2431 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
2432 if(firstQuery && errno == EAGAIN) {
2433 g_stats.noPacketError++;
2434 }
390f1dab 2435
c0a00acd
RG
2436 break;
2437 }
ac0e821b 2438 }
5db529f8
BH
2439}
2440
adb6cd72 2441static void makeTCPServerSockets(deferredAdd_t& deferredAdds, std::set<int>& tcpSockets)
9c495589 2442{
37d3f960 2443 int fd;
f28307ad 2444 vector<string>locals;
2e3d8a19 2445 stringtok(locals,::arg()["local-address"]," ,");
9c495589 2446
f28307ad 2447 if(locals.empty())
3f81d239 2448 throw PDNSException("No local address specified");
3ddb9247 2449
f28307ad 2450 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
2451 ServiceTuple st;
2452 st.port=::arg().asNum("local-port");
2453 parseService(*i, st);
3ddb9247 2454
32252594
BH
2455 ComboAddress sin;
2456
d38e2ba9 2457 sin.reset();
37d3f960 2458 sin.sin4.sin_family = AF_INET;
32252594 2459 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 2460 sin.sin6.sin6_family = AF_INET6;
f71bc087 2461 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 2462 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
37d3f960
BH
2463 }
2464
2465 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
3ddb9247 2466 if(fd<0)
3f81d239 2467 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
f28307ad 2468
3897b9e1 2469 setCloseOnExec(fd);
a903b39c 2470
f28307ad 2471 int tmp=1;
810ff705 2472 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
e6a9dde5 2473 g_log<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
c8ddb7c2 2474 exit(1);
f28307ad 2475 }
0dfa94ab 2476 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
e6a9dde5 2477 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
0dfa94ab 2478 }
2479
c8ddb7c2 2480#ifdef TCP_DEFER_ACCEPT
38ac0821 2481 if(setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
37d3f960 2482 if(i==locals.begin())
377602e3 2483 g_log<<Logger::Info<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
c8ddb7c2
BH
2484 }
2485#endif
2486
fec7dd5a
SS
2487 if( ::arg().mustDo("non-local-bind") )
2488 Utility::setBindAny(AF_INET, fd);
2489
2332f42d 2490#ifdef SO_REUSEPORT
810ff705
RG
2491 if(g_reusePort) {
2492 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
2332f42d 2493 throw PDNSException("SO_REUSEPORT: "+stringerror());
2494 }
2495#endif
2496
0735b17e
RG
2497 if (::arg().asNum("tcp-fast-open") > 0) {
2498#ifdef TCP_FASTOPEN
2499 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
2500 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
e6a9dde5 2501 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno)<<endl;
0735b17e
RG
2502 }
2503#else
e6a9dde5 2504 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
0735b17e
RG
2505#endif
2506 }
2507
32252594 2508 sin.sin4.sin_port = htons(st.port);
a683e8bd 2509 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
3ddb9247 2510 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
3f81d239 2511 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
3ddb9247 2512
3897b9e1 2513 setNonBlocking(fd);
49a699c4 2514 setSocketSendBuffer(fd, 65000);
37d3f960 2515 listen(fd, 128);
b243ca3b 2516 deferredAdds.push_back(make_pair(fd, handleNewTCPQuestion));
adb6cd72
RG
2517 tcpSockets.insert(fd);
2518
84433b79 2519 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
2520 // - fd is not that which we know here, but returned from accept()
3ddb9247 2521 if(sin.sin4.sin_family == AF_INET)
377602e3 2522 g_log<<Logger::Info<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 2523 else
377602e3 2524 g_log<<Logger::Info<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 2525 }
9c495589
BH
2526}
2527
b243ca3b 2528static void makeUDPServerSockets(deferredAdd_t& deferredAdds)
288f4aa9 2529{
fec7dd5a 2530 int one=1;
f28307ad 2531 vector<string>locals;
2e3d8a19 2532 stringtok(locals,::arg()["local-address"]," ,");
288f4aa9 2533
f28307ad 2534 if(locals.empty())
3f81d239 2535 throw PDNSException("No local address specified");
3ddb9247 2536
f28307ad 2537 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
2538 ServiceTuple st;
2539 st.port=::arg().asNum("local-port");
2540 parseService(*i, st);
2541
37d3f960 2542 ComboAddress sin;
996c89cc 2543
d38e2ba9 2544 sin.reset();
37d3f960 2545 sin.sin4.sin_family = AF_INET;
32252594 2546 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 2547 sin.sin6.sin6_family = AF_INET6;
f71bc087 2548 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 2549 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
37d3f960 2550 }
3ddb9247 2551
bb4bdbaf 2552 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
d3b4137e 2553 if(fd < 0) {
3f81d239 2554 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
d3b4137e 2555 }
915b0c39 2556 if (!setSocketTimestamps(fd))
e6a9dde5 2557 g_log<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
0dfa94ab 2558
b71b60ee 2559 if(IsAnyAddress(sin)) {
cbc03320 2560 if(sin.sin4.sin_family == AF_INET)
2561 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
2562 g_fromtosockets.insert(fd);
757d3179 2563#ifdef IPV6_RECVPKTINFO
cbc03320 2564 if(sin.sin4.sin_family == AF_INET6)
2565 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
2566 g_fromtosockets.insert(fd);
757d3179 2567#endif
0dfa94ab 2568 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
e6a9dde5 2569 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
0dfa94ab 2570 }
b71b60ee 2571 }
fec7dd5a
SS
2572 if( ::arg().mustDo("non-local-bind") )
2573 Utility::setBindAny(AF_INET6, fd);
2574
3897b9e1 2575 setCloseOnExec(fd);
a903b39c 2576
4e9a20e6 2577 setSocketReceiveBuffer(fd, 250000);
32252594 2578 sin.sin4.sin_port = htons(st.port);
37d3f960 2579
2332f42d 2580
2573d4a6 2581#ifdef SO_REUSEPORT
810ff705 2582 if(g_reusePort) {
2332f42d 2583 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
2584 throw PDNSException("SO_REUSEPORT: "+stringerror());
2585 }
2586#endif
90f9fbc0
RG
2587
2588 if (sin.isIPv4()) {
2589 try {
2590 setSocketIgnorePMTU(fd);
2591 }
2592 catch(const std::exception& e) {
2593 g_log<<Logger::Warning<<"Failed to set IP_MTU_DISCOVER on UDP server socket: "<<e.what()<<endl;
2594 }
2595 }
2596
2597 socklen_t socklen=sin.getSocklen();
3ddb9247 2598 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
335da0ba 2599 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
3ddb9247 2600
3897b9e1 2601 setNonBlocking(fd);
c2136bf0 2602
b243ca3b 2603 deferredAdds.push_back(make_pair(fd, handleNewUDPQuestion));
40a3dd64 2604 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
3ddb9247 2605 if(sin.sin4.sin_family == AF_INET)
377602e3 2606 g_log<<Logger::Info<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 2607 else
377602e3 2608 g_log<<Logger::Info<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 2609 }
c836dc19 2610}
caa6eefa 2611
d187038c 2612static void daemonize(void)
c836dc19
BH
2613{
2614 if(fork())
2615 exit(0); // bye bye
3ddb9247
PD
2616
2617 setsid();
c836dc19 2618
27a5ead5 2619 int i=open("/dev/null",O_RDWR); /* open stdin */
3ddb9247 2620 if(i < 0)
e6a9dde5 2621 g_log<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
27a5ead5
BH
2622 else {
2623 dup2(i,0); /* stdin */
2624 dup2(i,1); /* stderr */
2625 dup2(i,2); /* stderr */
2626 close(i);
2627 }
288f4aa9 2628}
caa6eefa 2629
d187038c 2630static void usr1Handler(int)
c75a6a9e
BH
2631{
2632 statsWanted=true;
2633}
ae1b2e98 2634
d187038c 2635static void usr2Handler(int)
9170fbaf 2636{
f1f34cc2 2637 g_quiet= !g_quiet;
2638 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
2639 ::arg().set("quiet")=g_quiet ? "" : "no";
9170fbaf
BH
2640}
2641
d187038c 2642static void doStats(void)
c75a6a9e 2643{
16beeaa4
BH
2644 static time_t lastOutputTime;
2645 static uint64_t lastQueryCount;
d299d4f5 2646
2647 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
2648 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
3ddb9247 2649
d299d4f5 2650 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
e6a9dde5 2651 g_log<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
3427fa8a
BH
2652 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
2653 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
3ddb9247
PD
2654 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
2655
e6a9dde5 2656 g_log<<Logger::Notice<<"stats: throttle map: "
3427fa8a 2657 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
3ddb9247 2658 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
e6a9dde5
PL
2659 g_log<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2660 g_log<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
525b8a7c 2661 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
e6a9dde5 2662 g_log<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
3427fa8a 2663 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
81883dcc 2664
e6a9dde5 2665 //g_log<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
16beeaa4 2666 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
3ddb9247 2667
e6a9dde5 2668 g_log<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
16beeaa4 2669 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
3ddb9247 2670
144040be
RG
2671 size_t idx = 0;
2672 for (const auto& threadInfo : s_threadInfos) {
2673 if(threadInfo.isWorker) {
ad9fc3dc 2674 g_log<<Logger::Notice<<"stats: thread "<<idx<<" has been distributed "<<threadInfo.numberOfDistributedQueries<<" queries"<<endl;
144040be
RG
2675 ++idx;
2676 }
2677 }
2678
16beeaa4
BH
2679 time_t now = time(0);
2680 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
e6a9dde5 2681 g_log<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
16beeaa4
BH
2682 }
2683 lastOutputTime = now;
2684 lastQueryCount = SyncRes::s_queries;
c75a6a9e 2685 }
3ddb9247 2686 else if(statsWanted)
e6a9dde5 2687 g_log<<Logger::Notice<<"stats: no stats yet!"<<endl;
7becf07f 2688
c75a6a9e
BH
2689 statsWanted=false;
2690}
c836dc19 2691
29f0b1ce 2692static void houseKeeping(void *)
c836dc19 2693{
e4ae55e5 2694 static thread_local time_t last_rootupdate, last_prune, last_secpoll, last_trustAnchorUpdate{0};
3337c2f7
RG
2695 static thread_local int cleanCounter=0;
2696 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
e4ae55e5
PL
2697 auto luaconfsLocal = g_luaconfs.getLocal();
2698
2699 if (last_trustAnchorUpdate == 0 && !luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0) {
2700 // Loading the Lua config file already "refreshed" the TAs
2701 last_trustAnchorUpdate = g_now.tv_sec + luaconfsLocal->trustAnchorFileInfo.interval * 3600;
2702 }
2703
cc59bce6 2704 try {
6b0d90ea 2705 if(s_running) {
cc59bce6 2706 return;
6b0d90ea 2707 }
cc59bce6 2708 s_running=true;
3ddb9247 2709
cc59bce6 2710 struct timeval now;
2711 Utility::gettimeofday(&now, 0);
3ddb9247
PD
2712
2713 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
a6f7f5fe 2714 t_RC->doPrune(g_maxCacheEntries / g_numThreads); // this function is local to a thread, so fine anyhow
2715 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
3ddb9247 2716
a6f7f5fe 2717 SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
3ddb9247 2718
cc59bce6 2719 if(!((cleanCounter++)%40)) { // this is a full scan!
2720 time_t limit=now.tv_sec-300;
a712cb56 2721 SyncRes::pruneNSSpeeds(limit);
cc59bce6 2722 }
2723 last_prune=time(0);
d67620e4 2724 }
3ddb9247 2725
cc59bce6 2726 if(now.tv_sec - last_rootupdate > 7200) {
30ee601a 2727 int res = SyncRes::getRootNS(g_now, nullptr);
7836f7b4
PL
2728 if (!res)
2729 last_rootupdate=now.tv_sec;
cc59bce6 2730 }
3ddb9247 2731
b243ca3b 2732 if(isHandlerThread()) {
3ddb9247 2733
cc59bce6 2734 if(now.tv_sec - last_secpoll >= 3600) {
2735 try {
2736 doSecPoll(&last_secpoll);
2737 }
581d4ea3 2738 catch(std::exception& e)
2739 {
e6a9dde5 2740 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
581d4ea3 2741 }
47e9b74f 2742 catch(PDNSException& e)
2743 {
e6a9dde5 2744 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
47e9b74f 2745 }
d0992a65
CH
2746 catch(ImmediateServFailException &e)
2747 {
e6a9dde5 2748 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
d0992a65 2749 }
47e9b74f 2750 catch(...)
2751 {
e6a9dde5 2752 g_log<<Logger::Error<<"Exception while performing security poll"<<endl;
47e9b74f 2753 }
18b73338 2754 }
e4ae55e5
PL
2755
2756 if (!luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0 &&
2757 g_now.tv_sec - last_trustAnchorUpdate >= (luaconfsLocal->trustAnchorFileInfo.interval * 3600)) {
2758 g_log<<Logger::Debug<<"Refreshing Trust Anchors from file"<<endl;
2759 try {
2760 map<DNSName, dsmap_t> dsAnchors;
2761 if (updateTrustAnchorsFromFile(luaconfsLocal->trustAnchorFileInfo.fname, dsAnchors)) {
2762 g_luaconfs.modify([&dsAnchors](LuaConfigItems& lci) {
2763 lci.dsAnchors = dsAnchors;
2764 });
2765 }
2766 last_trustAnchorUpdate = now.tv_sec;
2767 } catch (const PDNSException &pe) {
2768 g_log<<Logger::Error<<"Unable to update Trust Anchors: "<<pe.reason<<endl;
2769 }
2770 }
d67620e4 2771 }
6b0d90ea 2772 s_running=false;
d67620e4 2773 }
cc59bce6 2774 catch(PDNSException& ae)
2775 {
2776 s_running=false;
e6a9dde5 2777 g_log<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
cc59bce6 2778 throw;
2779 }
779828c4 2780}
d6d5dea7 2781
d187038c 2782static void makeThreadPipes()
49a699c4 2783{
ee271fc4
RG
2784 auto pipeBufferSize = ::arg().asNum("distribution-pipe-buffer-size");
2785 if (pipeBufferSize > 0) {
2786 g_log<<Logger::Info<<"Resizing the buffer of the distribution pipe to "<<pipeBufferSize<<endl;
2787 }
2788
b243ca3b
RG
2789 /* thread 0 is the handler / SNMP, we start at 1 */
2790 for(unsigned int n = 1; n <= (g_numWorkerThreads + g_numDistributorThreads); ++n) {
2791 auto& threadInfos = s_threadInfos.at(n);
2792
49a699c4
BH
2793 int fd[2];
2794 if(pipe(fd) < 0)
2795 unixDie("Creating pipe for inter-thread communications");
3ddb9247 2796
b243ca3b
RG
2797 threadInfos.pipes.readToThread = fd[0];
2798 threadInfos.pipes.writeToThread = fd[1];
3ddb9247 2799
49a699c4
BH
2800 if(pipe(fd) < 0)
2801 unixDie("Creating pipe for inter-thread communications");
b243ca3b
RG
2802
2803 threadInfos.pipes.readFromThread = fd[0];
2804 threadInfos.pipes.writeFromThread = fd[1];
3ddb9247 2805
cf8cda18
RG
2806 if(pipe(fd) < 0)
2807 unixDie("Creating pipe for inter-thread communications");
d10307c5 2808
b243ca3b
RG
2809 threadInfos.pipes.readQueriesToThread = fd[0];
2810 threadInfos.pipes.writeQueriesToThread = fd[1];
2811
ee271fc4
RG
2812 if (pipeBufferSize > 0) {
2813 if (!setPipeBufferSize(threadInfos.pipes.writeQueriesToThread, pipeBufferSize)) {
2814 g_log<<Logger::Warning<<"Error resizing the buffer of the distribution pipe for thread "<<n<<" to "<<pipeBufferSize<<": "<<strerror(errno)<<endl;
2815 auto existingSize = getPipeBufferSize(threadInfos.pipes.writeQueriesToThread);
2816 if (existingSize > 0) {
2817 g_log<<Logger::Warning<<"The current size of the distribution pipe's buffer for thread "<<n<<" is "<<existingSize<<endl;
2818 }
2819 }
2820 }
2821
b243ca3b 2822 if (!setNonBlocking(threadInfos.pipes.writeQueriesToThread)) {
d10307c5
RG
2823 unixDie("Making pipe for inter-thread communications non-blocking");
2824 }
49a699c4
BH
2825 }
2826}
2827
00c9b8c1
BH
2828struct ThreadMSG
2829{
2830 pipefunc_t func;
2831 bool wantAnswer;
2832};
2833
b4e76a18 2834void broadcastFunction(const pipefunc_t& func)
49a699c4 2835{
b243ca3b
RG
2836 /* This function might be called by the worker with t_id 0 during startup
2837 for the initialization of ACLs and domain maps. After that it should only
2838 be called by the handler. */
d77abca1 2839
b243ca3b
RG
2840 if (s_threadInfos.empty() && isHandlerThread()) {
2841 /* the handler and distributors will call themselves below, but
2842 during startup we get called while s_threadInfos has not been
2843 populated yet to update the ACL or domain maps, so we need to
2844 handle that case.
2845 */
2846 func();
2847 }
b4e76a18 2848
b243ca3b
RG
2849 unsigned int n = 0;
2850 for (const auto& threadInfo : s_threadInfos) {
49a699c4 2851 if(n++ == t_id) {
b4e76a18 2852 func(); // don't write to ourselves!
49a699c4
BH
2853 continue;
2854 }
3ddb9247 2855
00c9b8c1
BH
2856 ThreadMSG* tmsg = new ThreadMSG();
2857 tmsg->func = func;
2858 tmsg->wantAnswer = true;
b243ca3b 2859 if(write(threadInfo.pipes.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
b841314c 2860 delete tmsg;
b243ca3b 2861
49a699c4 2862 unixDie("write to thread pipe returned wrong size or error");
b841314c 2863 }
3ddb9247 2864
49467864 2865 string* resp = nullptr;
b243ca3b 2866 if(read(threadInfo.pipes.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
49a699c4 2867 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 2868
49a699c4 2869 if(resp) {
49a699c4 2870 delete resp;
49467864 2871 resp = nullptr;
49a699c4
BH
2872 }
2873 }
2874}
06ea9015 2875
592d7ade 2876static bool trySendingQueryToWorker(unsigned int target, ThreadMSG* tmsg)
00c9b8c1 2877{
144040be 2878 auto& targetInfo = s_threadInfos[target];
b243ca3b
RG
2879 if(!targetInfo.isWorker) {
2880 g_log<<Logger::Error<<"distributeAsyncFunction() tried to assign a query to a non-worker thread"<<endl;
d77abca1 2881 exit(1);
00c9b8c1 2882 }
d77abca1 2883
b243ca3b 2884 const auto& tps = targetInfo.pipes;
3ddb9247 2885
cf8cda18
RG
2886 ssize_t written = write(tps.writeQueriesToThread, &tmsg, sizeof(tmsg));
2887 if (written > 0) {
2888 if (static_cast<size_t>(written) != sizeof(tmsg)) {
2889 delete tmsg;
2890 unixDie("write to thread pipe returned wrong size or error");
2891 }
2892 }
2893 else {
2894 int error = errno;
cf8cda18 2895 if (error == EAGAIN || error == EWOULDBLOCK) {
592d7ade 2896 return false;
cf8cda18 2897 } else {
592d7ade 2898 delete tmsg;
17634427 2899 unixDie("write to thread pipe returned wrong size or error:" + std::to_string(error));
cf8cda18 2900 }
b841314c 2901 }
592d7ade 2902
144040be
RG
2903 ++targetInfo.numberOfDistributedQueries;
2904
592d7ade
RG
2905 return true;
2906}
2907
144040be
RG
2908static unsigned int getWorkerLoad(size_t workerIdx)
2909{
2910 const auto mt = s_threadInfos[/* skip handler */ 1 + g_numDistributorThreads + workerIdx].mt;
2911 if (mt != nullptr) {
2912 return mt->numProcesses();
2913 }
2914 return 0;
2915}
2916
2917static unsigned int selectWorker(unsigned int hash)
2918{
2919 if (s_balancingFactor == 0) {
2920 return /* skip handler */ 1 + g_numDistributorThreads + (hash % g_numWorkerThreads);
2921 }
2922
2923 /* we start with one, representing the query we are currently handling */
2924 double currentLoad = 1;
2925 std::vector<unsigned int> load(g_numWorkerThreads);
2926 for (size_t idx = 0; idx < g_numWorkerThreads; idx++) {
2927 load[idx] = getWorkerLoad(idx);
2928 currentLoad += load[idx];
2929 // cerr<<"load for worker "<<idx<<" is "<<load[idx]<<endl;
2930 }
2931
2932 double targetLoad = (currentLoad / g_numWorkerThreads) * s_balancingFactor;
2933 // cerr<<"total load is "<<currentLoad<<", number of workers is "<<g_numWorkerThreads<<", target load is "<<targetLoad<<endl;
2934
2935 unsigned int worker = hash % g_numWorkerThreads;
1b9d2d46 2936 /* at least one server has to be at or below the average load */
596bf482
RG
2937 if (load[worker] > targetLoad) {
2938 ++g_stats.rebalancedQueries;
2939 do {
2940 // cerr<<"worker "<<worker<<" is above the target load, selecting another one"<<endl;
2941 worker = (worker + 1) % g_numWorkerThreads;
2942 }
2943 while(load[worker] > targetLoad);
144040be
RG
2944 }
2945
2946 return /* skip handler */ 1 + g_numDistributorThreads + worker;
2947}
2948
592d7ade
RG
2949// This function is only called by the distributor threads, when pdns-distributes-queries is set
2950void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
2951{
2952 if (!isDistributorThread()) {
2953 g_log<<Logger::Error<<"distributeAsyncFunction() has been called by a worker ("<<t_id<<")"<<endl;
2954 exit(1);
2955 }
2956
2957 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
144040be 2958 unsigned int target = selectWorker(hash);
592d7ade
RG
2959
2960 ThreadMSG* tmsg = new ThreadMSG();
2961 tmsg->func = func;
2962 tmsg->wantAnswer = false;
2963
2964 if (!trySendingQueryToWorker(target, tmsg)) {
2965 /* if this function failed but did not raise an exception, it means that the pipe
2966 was full, let's try another one */
2967 unsigned int newTarget = 0;
2968 do {
2969 newTarget = /* skip handler */ 1 + g_numDistributorThreads + dns_random(g_numWorkerThreads);
2970 } while (newTarget == target);
2971
2972 if (!trySendingQueryToWorker(newTarget, tmsg)) {
2973 g_stats.queryPipeFullDrops++;
2974 delete tmsg;
2975 }
2976 }
00c9b8c1 2977}
3427fa8a 2978
d187038c 2979static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
49a699c4 2980{
f26bf547 2981 ThreadMSG* tmsg = nullptr;
3ddb9247 2982
cf8cda18 2983 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread || fd == readQueriesToThread
49a699c4
BH
2984 unixDie("read from thread pipe returned wrong size or error");
2985 }
3ddb9247 2986
2f22827a 2987 void *resp=0;
2988 try {
2989 resp = tmsg->func();
2990 }
2991 catch(std::exception& e) {
6d2010a8 2992 if(g_logCommonErrors)
e6a9dde5 2993 g_log<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2f22827a 2994 }
2995 catch(PDNSException& e) {
6d2010a8 2996 if(g_logCommonErrors)
e6a9dde5 2997 g_log<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2f22827a 2998 }
d7c676a5 2999 if(tmsg->wantAnswer) {
b243ca3b
RG
3000 const auto& threadInfo = s_threadInfos.at(t_id);
3001 if(write(threadInfo.pipes.writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
d7c676a5 3002 delete tmsg;
00c9b8c1 3003 unixDie("write to thread pipe returned wrong size or error");
d7c676a5
RG
3004 }
3005 }
3ddb9247 3006
00c9b8c1 3007 delete tmsg;
49a699c4 3008}
09e6702a 3009
13034931
BH
3010template<class T> void *voider(const boost::function<T*()>& func)
3011{
3012 return func();
3013}
3014
b3b5459d
BH
3015vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
3016{
3017 a.insert(a.end(), b.begin(), b.end());
3018 return a;
3019}
3020
92011b8f 3021vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
3022{
3023 a.insert(a.end(), b.begin(), b.end());
3024 return a;
3025}
3026
3ddb9247
PD
3027vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
3028{
3029 a.insert(a.end(), b.begin(), b.end());
3030 return a;
3031}
3032
92011b8f 3033
387b9ca6
RG
3034/*
3035 This function should only be called by the handler to gather metrics, wipe the cache,
788eeb4c
RG
3036 reload the Lua script (not the Lua config) or change the current trace regex,
3037 and by the SNMP thread to gather metrics. */
b4e76a18 3038template<class T> T broadcastAccFunction(const boost::function<T*()>& func)
3427fa8a 3039{
b243ca3b 3040 if (!isHandlerThread()) {
788eeb4c 3041 g_log<<Logger::Error<<"broadcastAccFunction has been called by a worker ("<<t_id<<")"<<endl;
d77abca1 3042 exit(1);
d77abca1
RG
3043 }
3044
b243ca3b 3045 unsigned int n = 0;
3427fa8a 3046 T ret=T();
b243ca3b
RG
3047 for (const auto& threadInfo : s_threadInfos) {
3048 if (n++ == t_id) {
3049 continue;
3050 }
3051
3052 const auto& tps = threadInfo.pipes;
00c9b8c1
BH
3053 ThreadMSG* tmsg = new ThreadMSG();
3054 tmsg->func = boost::bind(voider<T>, func);
3055 tmsg->wantAnswer = true;
3ddb9247 3056
b841314c
RG
3057 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
3058 delete tmsg;
3427fa8a 3059 unixDie("write to thread pipe returned wrong size or error");
b841314c 3060 }
3ddb9247 3061
49467864 3062 T* resp = nullptr;
3427fa8a
BH
3063 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
3064 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 3065
3427fa8a 3066 if(resp) {
3427fa8a
BH
3067 ret += *resp;
3068 delete resp;
49467864 3069 resp = nullptr;
3427fa8a
BH
3070 }
3071 }
3072 return ret;
3073}
3074
b4e76a18
RG
3075template string broadcastAccFunction(const boost::function<string*()>& fun); // explicit instantiation
3076template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun); // explicit instantiation
3077template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun); // explicit instantiation
3078template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun); // explicit instantiation
5ac6d761 3079template ThreadTimes broadcastAccFunction(const boost::function<ThreadTimes*()>& fun);
3427fa8a 3080
d187038c 3081static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3082{
fbfc1809
RG
3083 try {
3084 string remote;
3085 string msg=s_rcc.recv(&remote);
3086 RecursorControlParser rcp;
3087 RecursorControlParser::func_t* command;
3ddb9247 3088
fbfc1809 3089 string answer=rcp.getAnswer(msg, &command);
f0f3f0b0 3090
fbfc1809
RG
3091 // If we are inside a chroot, we need to strip
3092 if (!arg()["chroot"].empty()) {
3093 size_t len = arg()["chroot"].length();
3094 remote = remote.substr(len);
3095 }
f0f3f0b0 3096
ab5c053d
BH
3097 s_rcc.send(answer, &remote);
3098 command();
3099 }
fbfc1809 3100 catch(const std::exception& e) {
e6a9dde5 3101 g_log<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
ab5c053d 3102 }
fbfc1809 3103 catch(const PDNSException& ae) {
e6a9dde5 3104 g_log<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
ab5c053d 3105 }
09e6702a
BH
3106}
3107
d187038c 3108static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3109{
0b18b22e 3110 PacketID* pident=any_cast<PacketID>(&var);
667f7e60 3111 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
09e6702a 3112
667f7e60 3113 shared_array<char> buffer(new char[pident->inNeeded]);
09e6702a 3114
a683e8bd 3115 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
09e6702a 3116 if(ret > 0) {
667f7e60 3117 pident->inMSG.append(&buffer[0], &buffer[ret]);
a683e8bd 3118 pident->inNeeded-=(size_t)ret;
825fa717 3119 if(!pident->inNeeded || pident->inIncompleteOkay) {
667f7e60
BH
3120 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
3121 PacketID pid=*pident;
3122 string msg=pident->inMSG;
3ddb9247 3123
bb4bdbaf 3124 t_fdm->removeReadFD(fd);
3ddb9247 3125 MT->sendEvent(pid, &msg);
09e6702a
BH
3126 }
3127 else {
667f7e60 3128 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
09e6702a
BH
3129 }
3130 }
3131 else {
667f7e60 3132 PacketID tmp=*pident;
bb4bdbaf 3133 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
09e6702a
BH
3134 string empty;
3135 MT->sendEvent(tmp, &empty); // this conveys error status
3136 }
3137}
3138
d187038c 3139static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3140{
0b18b22e 3141 PacketID* pid=any_cast<PacketID>(&var);
a683e8bd 3142 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
09e6702a 3143 if(ret > 0) {
a683e8bd 3144 pid->outPos+=(ssize_t)ret;
667f7e60
BH
3145 if(pid->outPos==pid->outMSG.size()) {
3146 PacketID tmp=*pid;
bb4bdbaf 3147 t_fdm->removeWriteFD(fd);
09e6702a
BH
3148 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
3149 }
3150 }
3151 else { // error or EOF
667f7e60 3152 PacketID tmp(*pid);
bb4bdbaf 3153 t_fdm->removeWriteFD(fd);
09e6702a 3154 string sent;
998a4334 3155 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
09e6702a
BH
3156 }
3157}
3158
34801ab1 3159// resend event to everybody chained onto it
d187038c 3160static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
34801ab1
BH
3161{
3162 if(iter->key.chain.empty())
3163 return;
e27e91a8 3164 // cerr<<"doResends called!\n";
34801ab1
BH
3165 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
3166 resend.fd=-1;
3167 resend.id=*i;
e27e91a8 3168 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
4665c31e 3169
34801ab1
BH
3170 MT->sendEvent(resend, &content);
3171 g_stats.chainResends++;
34801ab1
BH
3172 }
3173}
3174
d187038c 3175static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3176{
600fc20b 3177 PacketID pid=any_cast<PacketID>(var);
a683e8bd 3178 ssize_t len;
fae8fe07
RG
3179 std::string packet;
3180 packet.resize(g_outgoingEDNSBufsize);
996c89cc 3181 ComboAddress fromaddr;
09e6702a
BH
3182 socklen_t addrlen=sizeof(fromaddr);
3183
fae8fe07 3184 len=recvfrom(fd, &packet.at(0), packet.size(), 0, (sockaddr *)&fromaddr, &addrlen);
c1da7976 3185
a683e8bd 3186 if(len < (ssize_t) sizeof(dnsheader)) {
998a4334 3187 if(len < 0)
996c89cc 3188 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
09e6702a 3189 else {
3ddb9247 3190 g_stats.serverParseError++;
09e6702a 3191 if(g_logCommonErrors)
e6a9dde5 3192 g_log<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
e44d9fa7 3193 ": packet smaller than DNS header"<<endl;
998a4334 3194 }
34801ab1 3195
49a699c4 3196 t_udpclientsocks->returnSocket(fd);
34801ab1
BH
3197 string empty;
3198
3199 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3ddb9247 3200 if(iter != MT->d_waiters.end())
34801ab1 3201 doResends(iter, pid, empty);
3ddb9247 3202
34801ab1 3203 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
998a4334 3204 return;
3ddb9247 3205 }
998a4334 3206
fae8fe07 3207 packet.resize(len);
998a4334 3208 dnsheader dh;
fae8fe07 3209 memcpy(&dh, &packet.at(0), sizeof(dh));
3ddb9247 3210
6da3b3ad
PD
3211 PacketID pident;
3212 pident.remote=fromaddr;
3213 pident.id=dh.id;
3214 pident.fd=fd;
34801ab1 3215
33a928af 3216 if(!dh.qr && g_logCommonErrors) {
e6a9dde5 3217 g_log<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
6da3b3ad
PD
3218 }
3219
3220 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
3221 !dh.qr) { // one weird server
3222 pident.domain.clear();
3223 pident.type = 0;
3224 }
3225 else {
3226 try {
0b31e67e 3227 if(len > 12)
fae8fe07 3228 pident.domain=DNSName(&packet.at(0), len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
6da3b3ad
PD
3229 }
3230 catch(std::exception& e) {
3231 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
e6a9dde5 3232 g_log<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
6da3b3ad 3233 return;
34801ab1 3234 }
6da3b3ad 3235 }
34801ab1 3236
6da3b3ad
PD
3237 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
3238 if(iter != MT->d_waiters.end()) {
3239 doResends(iter, pident, packet);
3240 }
c1da7976 3241
6da3b3ad 3242retryWithName:
4957a608 3243
6da3b3ad
PD
3244 if(!MT->sendEvent(pident, &packet)) {
3245 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
3246 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
3247 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
e325f20c 3248 pident.domain == mthread->key.domain) {
6da3b3ad 3249 mthread->key.nearMisses++;
998a4334 3250 }
6da3b3ad
PD
3251
3252 // be a bit paranoid here since we're weakening our matching
3ddb9247 3253 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
6da3b3ad
PD
3254 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
3255 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
3256 pident.domain = mthread->key.domain;
3257 pident.type = mthread->key.type;
3258 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
d4fb76e9 3259 }
09e6702a 3260 }
6da3b3ad
PD
3261 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
3262 if(g_logCommonErrors) {
e6a9dde5 3263 g_log<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
d8f6d49f 3264 }
09e6702a 3265 }
6da3b3ad
PD
3266 else if(fd >= 0) {
3267 t_udpclientsocks->returnSocket(fd);
3268 }
09e6702a
BH
3269}
3270
1f4abb20
BH
3271FDMultiplexer* getMultiplexer()
3272{
3273 FDMultiplexer* ret;
f26bf547 3274 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
1f4abb20 3275 try {
f26bf547 3276 ret=i.second();
1f4abb20
BH
3277 return ret;
3278 }
98d0ee4a 3279 catch(FDMultiplexerException &fe) {
e6a9dde5 3280 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
98d0ee4a
BH
3281 }
3282 catch(...) {
e6a9dde5 3283 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
98d0ee4a 3284 }
1f4abb20 3285 }
e6a9dde5 3286 g_log<<Logger::Error<<"No working multiplexer found!"<<endl;
1f4abb20
BH
3287 exit(1);
3288}
3289
3ddb9247 3290
d187038c 3291static string* doReloadLuaScript()
4485aa35 3292{
674cf0f6 3293 string fname= ::arg()["lua-dns-script"];
4485aa35 3294 try {
674cf0f6 3295 if(fname.empty()) {
f26bf547 3296 t_pdl.reset();
377602e3 3297 g_log<<Logger::Info<<t_id<<" Unloaded current lua script"<<endl;
0f39c1a3 3298 return new string("unloaded\n");
4485aa35
BH
3299 }
3300 else {
9694e14f
AT
3301 t_pdl = std::make_shared<RecursorLua4>();
3302 t_pdl->loadFile(fname);
4485aa35
BH
3303 }
3304 }
fdbf35ac 3305 catch(std::exception& e) {
e6a9dde5 3306 g_log<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
0f39c1a3 3307 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
4485aa35 3308 }
3ddb9247 3309
e6a9dde5 3310 g_log<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
0f39c1a3 3311 return new string("(re)loaded '"+fname+"'\n");
4485aa35
BH
3312}
3313
49a699c4
BH
3314string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3315{
3ddb9247 3316 if(begin != end)
49a699c4 3317 ::arg().set("lua-dns-script") = *begin;
3ddb9247 3318
0f39c1a3 3319 return broadcastAccFunction<string>(doReloadLuaScript);
3ddb9247 3320}
49a699c4 3321
d187038c 3322static string* pleaseUseNewTraceRegex(const std::string& newRegex)
77499b05
BH
3323try
3324{
3325 if(newRegex.empty()) {
f26bf547 3326 t_traceRegex.reset();
77499b05
BH
3327 return new string("unset\n");
3328 }
3329 else {
f26bf547 3330 t_traceRegex = std::make_shared<Regex>(newRegex);
77499b05
BH
3331 return new string("ok\n");
3332 }
3333}
3f81d239 3334catch(PDNSException& ae)
77499b05
BH
3335{
3336 return new string(ae.reason+"\n");
3337}
3338
3339string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3340{
3341 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
3342}
3343
4e9a20e6 3344static void checkLinuxIPv6Limits()
3345{
3346#ifdef __linux__
3347 string line;
3348 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
335da0ba 3349 int lim=std::stoi(line);
4e9a20e6 3350 if(lim < 16384) {
e6a9dde5 3351 g_log<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
4e9a20e6 3352 }
3353 }
3354#endif
3355}
36849ff2 3356static void checkOrFixFDS()
4e9a20e6 3357{
c0063e60 3358 unsigned int availFDs=getFilenumLimit();
3359 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
3360
3361 if(wantFDs > availFDs) {
067ad20e 3362 unsigned int hardlimit= getFilenumLimit(true);
3363 if(hardlimit >= wantFDs) {
c0063e60 3364 setFilenumLimit(wantFDs);
e6a9dde5 3365 g_log<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
36849ff2 3366 }
3367 else {
067ad20e 3368 int newval = (hardlimit - 25) / g_numWorkerThreads;
e6a9dde5 3369 g_log<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
36849ff2 3370 g_maxMThreads = newval;
067ad20e 3371 setFilenumLimit(hardlimit);
36849ff2 3372 }
3373 }
4e9a20e6 3374}
77499b05 3375
c390b2da 3376static void* recursorThread(unsigned int tid, const string& threadName);
51e2144e 3377
f26bf547 3378static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
49a699c4
BH
3379{
3380 t_allowFrom = ng;
f26bf547 3381 return nullptr;
49a699c4
BH
3382}
3383
dbd23fc2
BH
3384int g_argc;
3385char** g_argv;
3386
18af64a8 3387void parseACLs()
f7c1d4e3 3388{
18af64a8 3389 static bool l_initialized;
3ddb9247 3390
49a699c4 3391 if(l_initialized) { // only reload configuration file on second call
18af64a8 3392 string configname=::arg()["config-dir"]+"/recursor.conf";
3e63da83
JR
3393 if(::arg()["config-name"]!="") {
3394 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3395 }
18af64a8 3396 cleanSlashes(configname);
3ddb9247
PD
3397
3398 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
7e818521 3399 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
49a699c4 3400 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
242b90e1 3401 ::arg().preParseFile(configname.c_str(), "include-dir");
829849d6
AT
3402 ::arg().preParse(g_argc, g_argv, "include-dir");
3403
3404 // then process includes
3405 std::vector<std::string> extraConfigs;
242b90e1
AT
3406 ::arg().gatherIncludes(extraConfigs);
3407
1dc8f4d0 3408 for(const std::string& fn : extraConfigs) {
7e818521 3409 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
3410 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
3411 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
3412 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
829849d6 3413 }
ca2c884c
AT
3414
3415 ::arg().preParse(g_argc, g_argv, "allow-from-file");
3416 ::arg().preParse(g_argc, g_argv, "allow-from");
f27e6356 3417 }
49a699c4 3418
f26bf547
RG
3419 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
3420 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
3ddb9247 3421
2c95fc65
BH
3422 if(!::arg()["allow-from-file"].empty()) {
3423 string line;
2c95fc65
BH
3424 ifstream ifs(::arg()["allow-from-file"].c_str());
3425 if(!ifs) {
9c61b9d0 3426 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2c95fc65
BH
3427 }
3428
3429 string::size_type pos;
3430 while(getline(ifs,line)) {
3431 pos=line.find('#');
3432 if(pos!=string::npos)
3433 line.resize(pos);
3434 trim(line);
3435 if(line.empty())
3436 continue;
3437
18af64a8 3438 allowFrom->addMask(line);
2c95fc65 3439 }
e6a9dde5 3440 g_log<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2c95fc65
BH
3441 }
3442 else if(!::arg()["allow-from"].empty()) {
f7c1d4e3
BH
3443 vector<string> ips;
3444 stringtok(ips, ::arg()["allow-from"], ", ");
3ddb9247 3445
e6a9dde5 3446 g_log<<Logger::Warning<<"Only allowing queries from: ";
f7c1d4e3 3447 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
18af64a8 3448 allowFrom->addMask(*i);
f7c1d4e3 3449 if(i!=ips.begin())
e6a9dde5
PL
3450 g_log<<Logger::Warning<<", ";
3451 g_log<<Logger::Warning<<*i;
f7c1d4e3 3452 }
e6a9dde5 3453 g_log<<Logger::Warning<<endl;
f7c1d4e3 3454 }
49a699c4 3455 else {
3ddb9247 3456 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
377602e3 3457 g_log<<Logger::Warning<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
f26bf547 3458 allowFrom = nullptr;
49a699c4 3459 }
3ddb9247 3460
49a699c4 3461 g_initialAllowFrom = allowFrom;
d7dae798 3462 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
f26bf547 3463 oldAllowFrom = nullptr;
3ddb9247 3464
49a699c4 3465 l_initialized = true;
18af64a8
BH
3466}
3467
795215f2 3468
756e82cf 3469static void setupDelegationOnly()
3470{
3471 vector<string> parts;
3472 stringtok(parts, ::arg()["delegation-only"], ", \t");
3473 for(const auto& p : parts) {
9065eb05 3474 SyncRes::addDelegationOnly(DNSName(p));
756e82cf 3475 }
3476}
795215f2 3477
8fd25133
RG
3478static std::map<unsigned int, std::set<int> > parseCPUMap()
3479{
3480 std::map<unsigned int, std::set<int> > result;
3481
3482 const std::string value = ::arg()["cpu-map"];
3483
3484 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
e6a9dde5 3485 g_log<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
8fd25133
RG
3486 return result;
3487 }
3488
3489 std::vector<std::string> parts;
3490
3491 stringtok(parts, value, " \t");
3492
3493 for(const auto& part : parts) {
3494 if (part.find('=') == string::npos)
3495 continue;
3496
3497 try {
3498 auto headers = splitField(part, '=');
3499 trim(headers.first);
3500 trim(headers.second);
3501
3502 unsigned int threadId = pdns_stou(headers.first);
3503 std::vector<std::string> cpus;
3504
3505 stringtok(cpus, headers.second, ",");
3506
3507 for(const auto& cpu : cpus) {
3508 int cpuId = std::stoi(cpu);
3509
3510 result[threadId].insert(cpuId);
3511 }
3512 }
3513 catch(const std::exception& e) {
e6a9dde5 3514 g_log<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
8fd25133
RG
3515 }
3516 }
3517
3518 return result;
3519}
3520
3521static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
3522{
3523 const auto& cpuMapping = cpusMap.find(n);
3524 if (cpuMapping != cpusMap.cend()) {
3525 int rc = mapThreadToCPUList(tid, cpuMapping->second);
3526 if (rc == 0) {
e6a9dde5 3527 g_log<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
8fd25133 3528 for (const auto cpu : cpuMapping->second) {
e6a9dde5 3529 g_log<<Logger::Info<<" "<<cpu;
8fd25133 3530 }
e6a9dde5 3531 g_log<<Logger::Info<<endl;
8fd25133
RG
3532 }
3533 else {
e6a9dde5 3534 g_log<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
8fd25133 3535 for (const auto cpu : cpuMapping->second) {
e6a9dde5 3536 g_log<<Logger::Info<<" "<<cpu;
8fd25133 3537 }
e6a9dde5 3538 g_log<<Logger::Info<<strerror(rc)<<endl;
8fd25133
RG
3539 }
3540 }
3541}
3542
af1377b7
NC
3543#ifdef NOD_ENABLED
3544static void setupNODThread()
3545{
3546 if (g_nodEnabled) {
b78727c6
NC
3547 uint32_t num_cells = ::arg().asNum("new-domain-db-size");
3548 t_nodDBp = std::make_shared<nod::NODDB>(num_cells);
af1377b7
NC
3549 try {
3550 t_nodDBp->setCacheDir(::arg()["new-domain-history-dir"]);
3551 }
3552 catch (const PDNSException& e) {
3553 g_log<<Logger::Error<<"new-domain-history-dir (" << ::arg()["new-domain-history-dir"] << ") is not readable or does not exist"<<endl;
3554 _exit(1);
3555 }
3556 if (!t_nodDBp->init()) {
3557 g_log<<Logger::Error<<"Could not initialize domain tracking"<<endl;
3558 _exit(1);
3559 }
41c542ec 3560 std::thread t(nod::NODDB::startHousekeepingThread, t_nodDBp, std::this_thread::get_id());
af1377b7 3561 t.detach();
ca2526f5 3562 g_nod_pbtag = ::arg()["new-domain-pb-tag"];
41c542ec
NC
3563 }
3564 if (g_udrEnabled) {
b78727c6
NC
3565 uint32_t num_cells = ::arg().asNum("unique-response-db-size");
3566 t_udrDBp = std::make_shared<nod::UniqueResponseDB>(num_cells);
41c542ec
NC
3567 try {
3568 t_udrDBp->setCacheDir(::arg()["unique-response-history-dir"]);
3569 }
3570 catch (const PDNSException& e) {
3571 g_log<<Logger::Error<<"unique-response-history-dir (" << ::arg()["unique-response-history-dir"] << ") is not readable or does not exist"<<endl;
3572 _exit(1);
3573 }
3574 if (!t_udrDBp->init()) {
3575 g_log<<Logger::Error<<"Could not initialize unique response tracking"<<endl;
3576 _exit(1);
3577 }
3578 std::thread t(nod::UniqueResponseDB::startHousekeepingThread, t_udrDBp, std::this_thread::get_id());
af1377b7 3579 t.detach();
ca2526f5 3580 g_udr_pbtag = ::arg()["unique-response-pb-tag"];
af1377b7
NC
3581 }
3582}
3583
3584void parseNODWhitelist(const std::string& wlist)
3585{
3586 vector<string> parts;
3587 stringtok(parts, wlist, ",; ");
3588 for(const auto& a : parts) {
3589 g_nodDomainWL.add(DNSName(a));
3590 }
3591}
3592
3593static void setupNODGlobal()
3594{
3595 // Setup NOD subsystem
3596 g_nodEnabled = ::arg().mustDo("new-domain-tracking");
3597 g_nodLookupDomain = DNSName(::arg()["new-domain-lookup"]);
3598 g_nodLog = ::arg().mustDo("new-domain-log");
3599 parseNODWhitelist(::arg()["new-domain-whitelist"]);
41c542ec
NC
3600
3601 // Setup Unique DNS Response subsystem
3602 g_udrEnabled = ::arg().mustDo("unique-response-tracking");
3603 g_udrLog = ::arg().mustDo("unique-response-log");
af1377b7
NC
3604}
3605#endif /* NOD_ENABLED */
3606
d187038c 3607static int serviceMain(int argc, char*argv[])
18af64a8 3608{
e6a9dde5
PL
3609 g_log.setName(s_programname);
3610 g_log.disableSyslog(::arg().mustDo("disable-syslog"));
3611 g_log.setTimestamps(::arg().mustDo("log-timestamp"));
18af64a8
BH
3612
3613 if(!::arg()["logging-facility"].empty()) {
f8499e52
BH
3614 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
3615 if(val >= 0)
e6a9dde5 3616 g_log.setFacility(val);
18af64a8 3617 else
e6a9dde5 3618 g_log<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
18af64a8
BH
3619 }
3620
ba1a571d 3621 showProductVersion();
3afde9b2 3622
06ea9015 3623 g_disthashseed=dns_random(0xffffffff);
3624
b7ef5828
PL
3625 checkLinuxIPv6Limits();
3626 try {
3627 vector<string> addrs;
3628 if(!::arg()["query-local-address6"].empty()) {
3629 SyncRes::s_doIPv6=true;
e6a9dde5 3630 g_log<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
b7ef5828
PL
3631
3632 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
3633 for(const string& addr : addrs) {
3634 g_localQueryAddresses6.push_back(ComboAddress(addr));
3635 }
3636 }
3637 else {
e6a9dde5 3638 g_log<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
b7ef5828
PL
3639 }
3640 addrs.clear();
3641 stringtok(addrs, ::arg()["query-local-address"], ", ;");
3642 for(const string& addr : addrs) {
3643 g_localQueryAddresses4.push_back(ComboAddress(addr));
3644 }
3645 }
3646 catch(std::exception& e) {
e6a9dde5 3647 g_log<<Logger::Error<<"Assigning local query addresses: "<<e.what();
b7ef5828
PL
3648 exit(99);
3649 }
3650
e48c6b8a
PL
3651 // keep this ABOVE loadRecursorLuaConfig!
3652 if(::arg()["dnssec"]=="off")
3653 g_dnssecmode=DNSSECMode::Off;
3654 else if(::arg()["dnssec"]=="process-no-validate")
3655 g_dnssecmode=DNSSECMode::ProcessNoValidate;
3656 else if(::arg()["dnssec"]=="process")
3657 g_dnssecmode=DNSSECMode::Process;
3658 else if(::arg()["dnssec"]=="validate")
3659 g_dnssecmode=DNSSECMode::ValidateAll;
3660 else if(::arg()["dnssec"]=="log-fail")
3661 g_dnssecmode=DNSSECMode::ValidateForLog;
3662 else {
e6a9dde5 3663 g_log<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
e48c6b8a
PL
3664 exit(1);
3665 }
3666
9a3ab3e4
KM
3667 g_signatureInceptionSkew = ::arg().asNum("signature-inception-skew");
3668 if (g_signatureInceptionSkew < 0) {
3669 g_log<<Logger::Error<<"A negative value for 'signature-inception-skew' is not allowed"<<endl;
3670 exit(1);
3671 }
3672
e48c6b8a 3673 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
d377bb54 3674 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
e48c6b8a 3675
a6f7f5fe 3676 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
3677 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
e6ec15bf
RG
3678
3679 luaConfigDelayedThreads delayedLuaThreads;
0f5785a6 3680 try {
e6ec15bf 3681 loadRecursorLuaConfig(::arg()["lua-config-file"], delayedLuaThreads);
0f5785a6
PL
3682 }
3683 catch (PDNSException &e) {
e6a9dde5 3684 g_log<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
0f5785a6
PL
3685 exit(1);
3686 }
ad42489c 3687
18af64a8 3688 parseACLs();
d6f3fcfa 3689 initPublicSuffixList(::arg()["public-suffix-list-file"]);
92011b8f 3690
eb5bae86 3691 if(!::arg()["dont-query"].empty()) {
eb5bae86
BH
3692 vector<string> ips;
3693 stringtok(ips, ::arg()["dont-query"], ", ");
66e0b6ea
BH
3694 ips.push_back("0.0.0.0");
3695 ips.push_back("::");
c36bc97a 3696
e6a9dde5 3697 g_log<<Logger::Warning<<"Will not send queries to: ";
eb5bae86 3698 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
9065eb05 3699 SyncRes::addDontQuery(*i);
eb5bae86 3700 if(i!=ips.begin())
e6a9dde5
PL
3701 g_log<<Logger::Warning<<", ";
3702 g_log<<Logger::Warning<<*i;
eb5bae86 3703 }
e6a9dde5 3704 g_log<<Logger::Warning<<endl;
eb5bae86
BH
3705 }
3706
f7c1d4e3 3707 g_quiet=::arg().mustDo("quiet");
3ddb9247 3708
b243ca3b 3709 /* this needs to be done before parseACLs(), which call broadcastFunction() */
1bc3c142
BH
3710 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
3711 if(g_weDistributeQueries) {
b243ca3b 3712 g_log<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
1bc3c142 3713 }
3ddb9247 3714
756e82cf 3715 setupDelegationOnly();
b33c2462 3716 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
756e82cf 3717
77499b05
BH
3718 if(::arg()["trace"]=="fail") {
3719 SyncRes::setDefaultLogMode(SyncRes::Store);
3720 }
3721 else if(::arg().mustDo("trace")) {
3722 SyncRes::setDefaultLogMode(SyncRes::Log);
f7c1d4e3
BH
3723 ::arg().set("quiet")="no";
3724 g_quiet=false;
3e9c6c0a 3725 g_dnssecLOG=true;
f7c1d4e3 3726 }
43a9b290
PL
3727 string myHostname = getHostname();
3728 if (myHostname == "UNKNOWN"){
3729 g_log<<Logger::Warning<<"Unable to get the hostname, NSID and id.server values will be empty"<<endl;
3730 myHostname = "";
d0983bff 3731 }
3ddb9247 3732
aadceba8 3733 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
5cf4b2e7 3734 SyncRes::s_minimumECSTTL = ::arg().asNum("ecs-minimum-ttl-override");
aadceba8 3735
1051f8a9
BH
3736 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
3737
f7c1d4e3 3738 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
b9473937 3739 SyncRes::s_maxbogusttl=::arg().asNum("max-cache-bogus-ttl");
63637fd8 3740 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
1051f8a9 3741 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
79ec0627
PL
3742 // Cap the packetcache-servfail-ttl to the packetcache-ttl
3743 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
3744 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
628e2c7b
PA
3745 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
3746 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
f7c1d4e3 3747 SyncRes::s_serverID=::arg()["server-id"];
173d790e 3748 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
9de3e034 3749 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
7c3398aa 3750 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
01402d56 3751 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
f7c1d4e3 3752 if(SyncRes::s_serverID.empty()) {
d0983bff 3753 SyncRes::s_serverID = myHostname;
f7c1d4e3 3754 }
3ddb9247 3755
e9f9b8ec
RG
3756 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
3757 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
c9783016 3758 SyncRes::clearECSStats();
fd8898fb 3759 SyncRes::s_ecsipv4cachelimit = ::arg().asNum("ecs-ipv4-cache-bits");
3760 SyncRes::s_ecsipv6cachelimit = ::arg().asNum("ecs-ipv6-cache-bits");
ed9019c9 3761 SyncRes::s_ecscachelimitttl = ::arg().asNum("ecs-cache-limit-ttl");
e9f9b8ec 3762
8a3a3822
RG
3763 if (!::arg().isEmpty("ecs-scope-zero-address")) {
3764 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
3765 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
3766 }
3767 else {
3768 bool found = false;
3769 for (const auto& addr : g_localQueryAddresses4) {
3770 if (!IsAnyAddress(addr)) {
3771 SyncRes::setECSScopeZeroAddress(Netmask(addr, 32));
3772 found = true;
3773 break;
3774 }
3775 }
3776 if (!found) {
3777 for (const auto& addr : g_localQueryAddresses6) {
3778 if (!IsAnyAddress(addr)) {
3779 SyncRes::setECSScopeZeroAddress(Netmask(addr, 128));
3780 found = true;
3781 break;
3782 }
3783 }
3784 if (!found) {
3785 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
3786 }
3787 }
3788 }
3789
2fe3354d
CH
3790 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
3791 SyncRes::parseEDNSSubnetAddFor(::arg()["ecs-add-for"]);
3792 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
3793
5cc8371b 3794 g_XPFAcl.toMasks(::arg()["xpf-allow-from"]);
59cb4a79 3795 g_xpfRRCode = ::arg().asNum("xpf-rr-code");
5cc8371b 3796
5b0ddd18 3797 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
bb4bdbaf 3798
49a699c4 3799 g_initialDomainMap = parseAuthAndForwards();
3ddb9247 3800
08f3f638 3801 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3ddb9247 3802
f7c1d4e3 3803 g_logCommonErrors=::arg().mustDo("log-common-errors");
98d36505 3804 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
e661a20b
PD
3805
3806 g_anyToTcp = ::arg().mustDo("any-to-tcp");
a09a8ce0
PD
3807 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
3808
b3adda56
PD
3809 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
3810
b243ca3b 3811 g_numDistributorThreads = ::arg().asNum("distributor-threads");
810ff705 3812 g_numWorkerThreads = ::arg().asNum("threads");
1c4f2e1b 3813 if (g_numWorkerThreads < 1) {
e6a9dde5 3814 g_log<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
1c4f2e1b
RG
3815 g_numWorkerThreads = 1;
3816 }
3817
b243ca3b 3818 g_numThreads = g_numDistributorThreads + g_numWorkerThreads;
810ff705
RG
3819 g_maxMThreads = ::arg().asNum("max-mthreads");
3820
00b8cadc
RG
3821 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
3822
0ec489bf 3823 g_statisticsInterval = ::arg().asNum("statistics-interval");
3824
559b6c93
PL
3825 {
3826 SuffixMatchNode dontThrottleNames;
3827 vector<string> parts;
3828 stringtok(parts, ::arg()["dont-throttle-names"]);
3829 for (const auto &p : parts) {
3830 dontThrottleNames.add(DNSName(p));
3831 }
3832 g_dontThrottleNames.setState(dontThrottleNames);
3833
3834 NetmaskGroup dontThrottleNetmasks;
3835 stringtok(parts, ::arg()["dont-throttle-netmasks"]);
3836 for (const auto &p : parts) {
3837 dontThrottleNetmasks.addMask(Netmask(p));
3838 }
3839 g_dontThrottleNetmasks.setState(dontThrottleNetmasks);
3840 }
3841
144040be 3842 s_balancingFactor = ::arg().asDouble("distribution-load-factor");
078be17f
RG
3843 if (s_balancingFactor != 0.0 && s_balancingFactor < 1.0) {
3844 s_balancingFactor = 0.0;
3845 g_log<<Logger::Warning<<"Asked to run with a distribution-load-factor below 1.0, disabling it instead"<<endl;
3846 }
144040be 3847
810ff705
RG
3848#ifdef SO_REUSEPORT
3849 g_reusePort = ::arg().mustDo("reuseport");
3850#endif
3851
b243ca3b 3852 s_threadInfos.resize(g_numDistributorThreads + g_numWorkerThreads + /* handler */ 1);
810ff705 3853
b243ca3b
RG
3854 if (g_reusePort) {
3855 if (g_weDistributeQueries) {
3856 /* first thread is the handler, then distributors */
3857 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3858 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
adb6cd72 3859 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
b243ca3b 3860 makeUDPServerSockets(deferredAdds);
adb6cd72 3861 makeTCPServerSockets(deferredAdds, tcpSockets);
b243ca3b
RG
3862 }
3863 }
3864 else {
3865 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3866 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3867 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
adb6cd72 3868 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
b243ca3b 3869 makeUDPServerSockets(deferredAdds);
adb6cd72 3870 makeTCPServerSockets(deferredAdds, tcpSockets);
b243ca3b 3871 }
810ff705
RG
3872 }
3873 }
3874 else {
c47f201b 3875 std::set<int> tcpSockets;
b243ca3b
RG
3876 /* we don't have reuseport so we can only open one socket per
3877 listening addr:port and everyone will listen on it */
3878 makeUDPServerSockets(g_deferredAdds);
c47f201b
RG
3879 makeTCPServerSockets(g_deferredAdds, tcpSockets);
3880
3881 /* every listener (so distributor if g_weDistributeQueries, workers otherwise)
3882 needs to listen to the shared sockets */
3883 if (g_weDistributeQueries) {
3884 /* first thread is the handler, then distributors */
3885 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3886 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3887 }
3888 }
3889 else {
3890 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3891 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3892 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3893 }
3894 }
810ff705 3895 }
815099b2 3896
af1377b7
NC
3897#ifdef NOD_ENABLED
3898 // Setup newly observed domain globals
3899 setupNODGlobal();
3900#endif /* NOD_ENABLED */
3901
677e2a46
BH
3902 int forks;
3903 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
1bc3c142
BH
3904 if(!fork()) // we are child
3905 break;
3906 }
3ddb9247 3907
f7c1d4e3 3908 if(::arg().mustDo("daemon")) {
e6a9dde5
PL
3909 g_log<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
3910 g_log.toConsole(Logger::Critical);
f7c1d4e3
BH
3911 daemonize();
3912 }
3913 signal(SIGUSR1,usr1Handler);
3914 signal(SIGUSR2,usr2Handler);
3915 signal(SIGPIPE,SIG_IGN);
810ff705 3916
a6414fdc 3917 checkOrFixFDS();
3ddb9247 3918
d1b28475
KM
3919#ifdef HAVE_LIBSODIUM
3920 if (sodium_init() == -1) {
e6a9dde5 3921 g_log<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
d1b28475
KM
3922 exit(99);
3923 }
3924#endif
3925
3afde9b2
PL
3926 openssl_thread_setup();
3927 openssl_seed();
e97cb679
AT
3928 /* setup rng before chroot */
3929 dns_random_init();
3afde9b2 3930
bdbb07e0 3931 if(::arg()["server-id"].empty()) {
d0983bff 3932 ::arg().set("server-id") = myHostname;
bdbb07e0
PL
3933 }
3934
138435cb
BH
3935 int newgid=0;
3936 if(!::arg()["setgid"].empty())
3937 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
3938 int newuid=0;
3939 if(!::arg()["setuid"].empty())
3940 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
3941
f1d6a7ce
KM
3942 Utility::dropGroupPrivs(newuid, newgid);
3943
138435cb 3944 if (!::arg()["chroot"].empty()) {
75336810
PL
3945#ifdef HAVE_SYSTEMD
3946 char *ns;
3947 ns = getenv("NOTIFY_SOCKET");
3948 if (ns != nullptr) {
e6a9dde5 3949 g_log<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
75336810
PL
3950 exit(1);
3951 }
3952#endif
138435cb 3953 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
e6a9dde5 3954 g_log<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
138435cb
BH
3955 exit(1);
3956 }
f0f3f0b0 3957 else
377602e3 3958 g_log<<Logger::Info<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
138435cb
BH
3959 }
3960
f0f3f0b0
PL
3961 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
3962 if(!s_pidfname.empty())
3963 unlink(s_pidfname.c_str()); // remove possible old pid file
3964 writePid();
3965
3966 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
3967
f1d6a7ce 3968 Utility::dropUserPrivs(newuid);
1f2b341e
RG
3969 try {
3970 /* we might still have capabilities remaining, for example if we have been started as root
3971 without --setuid (please don't do that) or as an unprivileged user with ambient capabilities
3972 like CAP_NET_BIND_SERVICE.
3973 */
3974 dropCapabilities();
3975 }
3976 catch(const std::exception& e) {
3977 g_log<<Logger::Warning<<e.what()<<endl;
3978 }
c0063e60 3979
e6ec15bf
RG
3980 startLuaConfigDelayedThreads(delayedLuaThreads, g_luaconfs.getCopy().generation);
3981
49a699c4 3982 makeThreadPipes();
3ddb9247 3983
5d4dd7fe
BH
3984 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
3985 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
fde296a3 3986 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
a5886e6a 3987 s_maxUDPQueriesPerRound=::arg().asNum("max-udp-queries-per-round");
343257a4 3988
c29d820c
RG
3989 g_useKernelTimestamp = ::arg().mustDo("protobuf-use-kernel-timestamp");
3990
563517f3
RG
3991 blacklistStats(StatComponent::API, ::arg()["stats-api-blacklist"]);
3992 blacklistStats(StatComponent::Carbon, ::arg()["stats-carbon-blacklist"]);
3993 blacklistStats(StatComponent::RecControl, ::arg()["stats-rec-control-blacklist"]);
3994 blacklistStats(StatComponent::SNMP, ::arg()["stats-snmp-blacklist"]);
72259676 3995
d705aad9
RG
3996 if (::arg().mustDo("snmp-agent")) {
3997 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
3998 g_snmpAgent->run();
3999 }
4000
b47026fd 4001 int port = ::arg().asNum("udp-source-port-min");
58da9034 4002 if(port < 1024 || port > 65535){
e6a9dde5 4003 g_log<<Logger::Error<<"Unable to launch, udp-source-port-min is not a valid port number"<<endl;
bf6f28ca
CHB
4004 exit(99); // this isn't going to fix itself either
4005 }
4006 s_minUdpSourcePort = port;
b47026fd 4007 port = ::arg().asNum("udp-source-port-max");
58da9034 4008 if(port < 1024 || port > 65535 || port < s_minUdpSourcePort){
e6a9dde5 4009 g_log<<Logger::Error<<"Unable to launch, udp-source-port-max is not a valid port number or is smaller than udp-source-port-min"<<endl;
bf6f28ca
CHB
4010 exit(99); // this isn't going to fix itself either
4011 }
4012 s_maxUdpSourcePort = port;
4013 std::vector<string> parts {};
b47026fd 4014 stringtok(parts, ::arg()["udp-source-port-avoid"], ", ");
bf6f28ca
CHB
4015 for (const auto &part : parts)
4016 {
4017 port = std::stoi(part);
58da9034 4018 if(port < 1024 || port > 65535){
e6a9dde5 4019 g_log<<Logger::Error<<"Unable to launch, udp-source-port-avoid contains an invalid port number: "<<part<<endl;
bf6f28ca
CHB
4020 exit(99); // this isn't going to fix itself either
4021 }
4022 s_avoidUdpSourcePorts.insert(port);
4023 }
4024
b243ca3b 4025 unsigned int currentThreadId = 1;
8fd25133 4026 const auto cpusMap = parseCPUMap();
d77abca1 4027
c3828c03 4028 if(g_numThreads == 1) {
e6a9dde5 4029 g_log<<Logger::Warning<<"Operating unthreaded"<<endl;
6b6720de
PL
4030#ifdef HAVE_SYSTEMD
4031 sd_notify(0, "READY=1");
4032#endif
b243ca3b
RG
4033
4034 /* This thread handles the web server, carbon, statistics and the control channel */
4035 auto& handlerInfos = s_threadInfos.at(0);
4036 handlerInfos.isHandler = true;
c390b2da 4037 handlerInfos.thread = std::thread(recursorThread, 0, "main");
b243ca3b
RG
4038
4039 setCPUMap(cpusMap, currentThreadId, pthread_self());
4040
4041 auto& infos = s_threadInfos.at(currentThreadId);
4042 infos.isListener = true;
4043 infos.isWorker = true;
c390b2da 4044 recursorThread(currentThreadId++, "worker");
76698c6e
BH
4045 }
4046 else {
8fd25133 4047
b243ca3b
RG
4048 if (g_weDistributeQueries) {
4049 g_log<<Logger::Warning<<"Launching "<< g_numDistributorThreads <<" distributor threads"<<endl;
4050 for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
4051 auto& infos = s_threadInfos.at(currentThreadId);
4052 infos.isListener = true;
c390b2da 4053 infos.thread = std::thread(recursorThread, currentThreadId++, "distr");
b243ca3b
RG
4054
4055 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
4056 }
4057 }
8fd25133 4058
62b549e0
RG
4059 g_log<<Logger::Warning<<"Launching "<< g_numWorkerThreads <<" worker threads"<<endl;
4060
b243ca3b
RG
4061 for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
4062 auto& infos = s_threadInfos.at(currentThreadId);
4063 infos.isListener = g_weDistributeQueries ? false : true;
4064 infos.isWorker = true;
c390b2da 4065 infos.thread = std::thread(recursorThread, currentThreadId++, "worker");
b243ca3b
RG
4066
4067 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
76698c6e 4068 }
b243ca3b 4069
6b6720de
PL
4070#ifdef HAVE_SYSTEMD
4071 sd_notify(0, "READY=1");
4072#endif
b243ca3b
RG
4073
4074 /* This thread handles the web server, carbon, statistics and the control channel */
4075 auto& infos = s_threadInfos.at(0);
4076 infos.isHandler = true;
c390b2da 4077 infos.thread = std::thread(recursorThread, 0, "web+stat");
b243ca3b
RG
4078
4079 s_threadInfos.at(0).thread.join();
bb4bdbaf 4080 }
bb4bdbaf
BH
4081 return 0;
4082}
4083
c390b2da 4084static void* recursorThread(unsigned int n, const string& threadName)
bb4bdbaf
BH
4085try
4086{
d77abca1 4087 t_id=n;
b243ca3b 4088 auto& threadInfo = s_threadInfos.at(t_id);
c390b2da
PL
4089
4090 static string threadPrefix = "pdns-r/";
519f5484 4091 setThreadName(threadPrefix + threadName);
c390b2da 4092
49a699c4 4093 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
a712cb56 4094 SyncRes::setDomainMap(g_initialDomainMap);
49a699c4 4095 t_allowFrom = g_initialAllowFrom;
f26bf547
RG
4096 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
4097 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
49a699c4 4098 primeHints();
3ddb9247 4099
f26bf547 4100 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
3ddb9247 4101
e6a9dde5 4102 g_log<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3ddb9247 4103
af1377b7 4104#ifdef NOD_ENABLED
41c542ec
NC
4105 if (threadInfo.isWorker)
4106 setupNODThread();
af1377b7 4107#endif /* NOD_ENABLED */
c1751a59
RG
4108
4109 /* the listener threads handle TCP queries */
4110 if(threadInfo.isWorker || threadInfo.isListener) {
5b388d28
PD
4111 try {
4112 if(!::arg()["lua-dns-script"].empty()) {
4113 t_pdl = std::make_shared<RecursorLua4>();
4114 t_pdl->loadFile(::arg()["lua-dns-script"]);
4115 g_log<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
4116 }
4117 }
4118 catch(std::exception &e) {
4119 g_log<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
4120 _exit(99);
674cf0f6 4121 }
674cf0f6 4122 }
3ddb9247 4123
f8f243b0 4124 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
92011b8f 4125 if(ringsize) {
f26bf547 4126 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
b243ca3b
RG
4127 if(g_weDistributeQueries)
4128 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries") / g_numDistributorThreads);
f8f243b0 4129 else
3ddb9247 4130 t_remotes->set_capacity(ringsize);
f26bf547 4131 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 4132 t_servfailremotes->set_capacity(ringsize);
66f2e6ad
KM
4133 t_bogusremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4134 t_bogusremotes->set_capacity(ringsize);
f26bf547 4135 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 4136 t_largeanswerremotes->set_capacity(ringsize);
621ccf89 4137 t_timeouts = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4138 t_timeouts->set_capacity(ringsize);
92011b8f 4139
f26bf547 4140 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 4141 t_queryring->set_capacity(ringsize);
f26bf547 4142 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 4143 t_servfailqueryring->set_capacity(ringsize);
66f2e6ad
KM
4144 t_bogusqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4145 t_bogusqueryring->set_capacity(ringsize);
92011b8f 4146 }
3ddb9247 4147
f26bf547 4148 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
144040be 4149 threadInfo.mt = MT.get();
3ddb9247 4150
63341e8d
RG
4151#ifdef HAVE_PROTOBUF
4152 /* start protobuf export threads if needed */
4153 auto luaconfsLocal = g_luaconfs.getLocal();
4154 checkProtobufExport(luaconfsLocal);
4155 checkOutgoingProtobufExport(luaconfsLocal);
4156#endif /* HAVE_PROTOBUF */
4157
bb4bdbaf
BH
4158 PacketID pident;
4159
4160 t_fdm=getMultiplexer();
d77abca1 4161
b243ca3b 4162 if(threadInfo.isHandler) {
d07bf7ff 4163 if(::arg().mustDo("webserver")) {
e6a9dde5 4164 g_log<<Logger::Warning << "Enabling web server" << endl;
8989097d 4165 try {
1ce57618 4166 new RecursorWebServer(t_fdm);
8989097d
CH
4167 }
4168 catch(PDNSException &e) {
e6a9dde5 4169 g_log<<Logger::Error<<"Exception: "<<e.reason<<endl;
8989097d
CH
4170 exit(99);
4171 }
f3d1d67b 4172 }
377602e3 4173 g_log<<Logger::Info<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
f3d1d67b 4174 }
810ff705 4175 else {
d77abca1 4176
b243ca3b
RG
4177 t_fdm->addReadFD(threadInfo.pipes.readToThread, handlePipeRequest);
4178 t_fdm->addReadFD(threadInfo.pipes.readQueriesToThread, handlePipeRequest);
4179
4180 if (threadInfo.isListener) {
4181 if (g_reusePort) {
4182 /* then every listener has its own FDs */
4183 for(const auto deferred : threadInfo.deferredAdds) {
4184 t_fdm->addReadFD(deferred.first, deferred.second);
4185 }
810ff705 4186 }
b243ca3b
RG
4187 else {
4188 /* otherwise all listeners are listening on the same ones */
4189 for(const auto deferred : g_deferredAdds) {
4190 t_fdm->addReadFD(deferred.first, deferred.second);
d77abca1
RG
4191 }
4192 }
4193 }
810ff705 4194 }
3ddb9247 4195
b0b37121 4196 registerAllStats();
d77abca1 4197
b243ca3b 4198 if(threadInfo.isHandler) {
674cf0f6
BH
4199 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
4200 }
1bc3c142 4201
f7c1d4e3 4202 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3ddb9247 4203
f7c1d4e3 4204 bool listenOnTCP(true);
49a699c4 4205
cb1523d1 4206 time_t last_stat = 0;
a2f87dd1 4207 time_t last_carbon=0, last_lua_maintenance=0;
2c78bd57 4208 time_t carbonInterval=::arg().asNum("carbon-interval");
a2f87dd1 4209 time_t luaMaintenanceInterval=::arg().asNum("lua-maintenance-interval");
ac0995bb 4210 counter.store(0); // used to periodically execute certain tasks
f7c1d4e3 4211 for(;;) {
ac0e821b 4212 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3ddb9247 4213
3427fa8a
BH
4214 if(!(counter%500)) {
4215 MT->makeThread(houseKeeping, 0);
f7c1d4e3
BH
4216 }
4217
d2392145 4218 if(!(counter%55)) {
d8f6d49f 4219 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
bb4bdbaf 4220 expired_t expired=t_fdm->getTimeouts(g_now);
3ddb9247 4221
f7c1d4e3 4222 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
cd989c87 4223 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4957a608 4224 if(g_logCommonErrors)
e6a9dde5 4225 g_log<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4957a608 4226 t_fdm->removeReadFD(i->first);
f7c1d4e3
BH
4227 }
4228 }
3ddb9247 4229
f7c1d4e3
BH
4230 counter++;
4231
b243ca3b 4232 if(threadInfo.isHandler) {
cb1523d1
RG
4233 if(statsWanted || (g_statisticsInterval > 0 && (g_now.tv_sec - last_stat) >= g_statisticsInterval)) {
4234 doStats();
4235 last_stat = g_now.tv_sec;
4236 }
f7c1d4e3 4237
cb1523d1 4238 Utility::gettimeofday(&g_now, 0);
2c78bd57 4239
cb1523d1
RG
4240 if((g_now.tv_sec - last_carbon) >= carbonInterval) {
4241 MT->makeThread(doCarbonDump, 0);
4242 last_carbon = g_now.tv_sec;
4243 }
2c78bd57 4244 }
2a0276a9 4245 if (t_pdl != nullptr) {
9adbe790 4246 // lua-dns-script directive is present, call the maintenance callback if needed
c1751a59
RG
4247 /* remember that the listener threads handle TCP queries */
4248 if (threadInfo.isWorker || threadInfo.isListener) {
2a0276a9
CHB
4249 // Only on threads processing queries
4250 if(g_now.tv_sec - last_lua_maintenance >= luaMaintenanceInterval) {
4251 t_pdl->maintenance();
4252 last_lua_maintenance = g_now.tv_sec;
4253 }
9adbe790 4254 }
a2f87dd1 4255 }
2c78bd57 4256
bb4bdbaf 4257 t_fdm->run(&g_now);
3ea54bf0 4258 // 'run' updates g_now for us
f7c1d4e3 4259
b243ca3b 4260 if(threadInfo.isListener) {
5c889cf5 4261 if(listenOnTCP) {
c47f201b
RG
4262 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
4263 for(const auto fd : threadInfo.tcpSockets) {
4264 t_fdm->removeReadFD(fd);
b243ca3b 4265 }
c47f201b
RG
4266 listenOnTCP=false;
4267 }
f7c1d4e3 4268 }
5c889cf5 4269 else {
c47f201b
RG
4270 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
4271 for(const auto fd : threadInfo.tcpSockets) {
4272 t_fdm->addReadFD(fd, handleNewTCPQuestion);
b243ca3b 4273 }
c47f201b
RG
4274 listenOnTCP=true;
4275 }
f7c1d4e3
BH
4276 }
4277 }
4278 }
4279}
3f81d239 4280catch(PDNSException &ae) {
e6a9dde5 4281 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
bb4bdbaf
BH
4282 return 0;
4283}
4284catch(std::exception &e) {
e6a9dde5 4285 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
bb4bdbaf
BH
4286 return 0;
4287}
4288catch(...) {
e6a9dde5 4289 g_log<<Logger::Error<<"any other exception in main: "<<endl;
bb4bdbaf
BH
4290 return 0;
4291}
4292
51e2144e 4293
3ddb9247 4294int main(int argc, char **argv)
288f4aa9 4295{
dbd23fc2
BH
4296 g_argc = argc;
4297 g_argv = argv;
5e3de507 4298 g_stats.startupTime=time(0);
b51ef4f9 4299 Utility::srandom();
3e135495 4300 versionSetProduct(ProductRecursor);
8a63d3ce 4301 reportBasicTypes();
0007c2e5 4302 reportOtherTypes();
ea634573 4303
22030c37 4304 int ret = EXIT_SUCCESS;
caa6eefa 4305
288f4aa9 4306 try {
f888311c 4307 ::arg().set("stack-size","stack size per mthread")="200000";
2e3d8a19 4308 ::arg().set("soa-minimum-ttl","Don't change")="0";
2e3d8a19 4309 ::arg().set("no-shuffle","Don't change")="off";
2e3d8a19 4310 ::arg().set("local-port","port to listen on")="53";
32252594 4311 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
fec7dd5a 4312 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
77499b05 4313 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
a6415142 4314 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
c87e1876 4315 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
13c46e62 4316 ::arg().set("signature-inception-skew", "Allow the signature inception to be off by this number of seconds")="60";
d3f809bf 4317 ::arg().set("daemon","Operate as a daemon")="no";
191f2e47 4318 ::arg().setSwitch("write-pid","Write a PID file")="yes";
9afa8662 4319 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
b6cfa948 4320 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
b18fa400 4321 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
22e0810c 4322 ::arg().set("log-common-errors","If we should log rather common errors")="no";
2e3d8a19
BH
4323 ::arg().set("chroot","switch to chroot jail")="";
4324 ::arg().set("setgid","If set, change group id to this gid for more security")="";
4325 ::arg().set("setuid","If set, change user id to this uid for more security")="";
c83ee49d 4326 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
bb4bdbaf 4327 ::arg().set("threads", "Launch this number of threads")="2";
b243ca3b 4328 ::arg().set("distributor-threads", "Launch this number of distributor threads, distributing queries to other threads")="0";
adabfcb9 4329 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
5124de27 4330 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
d07bf7ff 4331 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
479e0976 4332 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
479e0976 4333 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
d07bf7ff
PL
4334 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
4335 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
4336 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
be3e1477 4337 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
8ca656a8 4338 ::arg().set("webserver-loglevel", "Amount of logging in the webserver (none, normal, detailed)") = "normal";
cc08b5a9 4339 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
e12f8407 4340 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
2c78bd57 4341 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
f7a645ec
RG
4342 ::arg().set("carbon-namespace", "If set overwrites the first part of the carbon string")="pdns";
4343 ::arg().set("carbon-instance", "If set overwrites the the instance name default")="recursor";
4344
0ec489bf 4345 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
c038218b 4346 ::arg().set("quiet","Suppress logging of questions and answers")="";
f27e6356 4347 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
2e3d8a19 4348 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
fdbf35ac
BH
4349 ::arg().set("socket-owner","Owner of socket")="";
4350 ::arg().set("socket-group","Group of socket")="";
4351 ::arg().set("socket-mode", "Permissions for socket")="";
3ddb9247 4352
f0f3f0b0 4353 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
2e3d8a19
BH
4354 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
4355 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
d4fb76e9 4356 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
2e3d8a19 4357 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
85c32340 4358 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
2e3d8a19 4359 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
324dc148 4360 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
979edd70 4361 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
559b6c93
PL
4362 ::arg().set("dont-throttle-names", "Do not throttle nameservers with this name or suffix")="";
4363 ::arg().set("dont-throttle-netmasks", "Do not throttle nameservers with this IP netmask")="";
2e3d8a19 4364 ::arg().set("hint-file", "If set, load root hints from this file")="";
b45eb27c 4365 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
a9af3782 4366 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
b9473937 4367 ::arg().set("max-cache-bogus-ttl", "maximum number of seconds to keep a Bogus (positive or negative) cached entry in memory")="3600";
c3e753c7 4368 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
1051f8a9 4369 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
927c12b0 4370 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
1051f8a9 4371 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
950626be 4372 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname, set custom or 'disabled'")="";
92011b8f 4373 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
ba1a571d 4374 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
49a699c4 4375 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
2c95fc65 4376 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
51e2144e 4377 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3ddb9247 4378 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4e120339 4379 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
fde296a3 4380 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
0d5f0a9f 4381 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4ef015cd 4382 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
5605c067 4383 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3e61e7f7 4384 ::arg().set("lua-config-file", "More powerful configuration options")="";
644dd1da 4385
5605c067 4386 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
927c12b0
BH
4387 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
4388 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
5605c067 4389 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
ac0b4eb3 4390 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3ea54bf0 4391 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
e498dac1 4392 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
4485aa35 4393 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
a2f87dd1 4394 ::arg().set("lua-maintenance-interval", "Number of seconds between calls to the lua user defined maintenance() function")="1";
08f3f638 4395 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3ddb9247 4396 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
35695d18 4397 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
fd8898fb 4398 ::arg().set("ecs-ipv4-cache-bits", "Maximum number of bits of IPv4 mask to cache ECS response")="24";
35695d18 4399 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
fd8898fb 4400 ::arg().set("ecs-ipv6-cache-bits", "Maximum number of bits of IPv6 mask to cache ECS response")="56";
5cf4b2e7 4401 ::arg().set("ecs-minimum-ttl-override", "Set under adverse conditions, a minimum TTL for records in ECS-specific answers")="0";
ed9019c9 4402 ::arg().set("ecs-cache-limit-ttl", "Minimum TTL to cache ECS response")="0";
3f975863 4403 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
2fe3354d 4404 ::arg().set("ecs-add-for", "List of client netmasks for which EDNS Client Subnet will be added")="0.0.0.0/0, ::/0, " LOCAL_NETS_INVERSE;
8a3a3822 4405 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
a16c4536 4406 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
e498dac1 4407 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
4ca2d205 4408 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
e661a20b 4409 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
b3adda56 4410 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
00b8cadc 4411 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
54c36063
PL
4412 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1232";
4413 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1232";
aadceba8 4414 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
173d790e 4415 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
c5950146 4416 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
7c3398aa 4417 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
78227847 4418 ::arg().set("max-udp-queries-per-round", "Maximum number of UDP queries processed per recvmsg() round, before returning back to normal processing")="10000";
c29d820c 4419 ::arg().set("protobuf-use-kernel-timestamp", "Compute the latency of queries in protobuf messages by using the timestamp set by the kernel when the query was received (when available)")="";
ee271fc4 4420 ::arg().set("distribution-pipe-buffer-size", "Size in bytes of the internal buffer of the pipe used by the distributor to pass incoming queries to a worker thread")="0";
a09a8ce0 4421
68e6df3c 4422 ::arg().set("include-dir","Include *.conf files from this directory")="";
d67620e4 4423 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
2332f42d 4424
4425 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
2e3d8a19 4426
d705aad9 4427 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
396f126e 4428 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
d705aad9 4429
72259676
RG
4430 std::string defaultBlacklistedStats = "cache-bytes, packetcache-bytes, special-memory-usage";
4431 for (size_t idx = 0; idx < 32; idx++) {
4432 defaultBlacklistedStats += ", ecs-v4-response-bits-" + std::to_string(idx + 1);
4433 }
4434 for (size_t idx = 0; idx < 128; idx++) {
4435 defaultBlacklistedStats += ", ecs-v6-response-bits-" + std::to_string(idx + 1);
4436 }
563517f3
RG
4437 ::arg().set("stats-api-blacklist", "List of statistics that are disabled when retrieving the complete list of statistics via the API")=defaultBlacklistedStats;
4438 ::arg().set("stats-carbon-blacklist", "List of statistics that are prevented from being exported via Carbon")=defaultBlacklistedStats;
4439 ::arg().set("stats-rec-control-blacklist", "List of statistics that are prevented from being exported via rec_control get-all")=defaultBlacklistedStats;
4440 ::arg().set("stats-snmp-blacklist", "List of statistics that are prevented from being exported via SNMP")=defaultBlacklistedStats;
d705aad9 4441
0735b17e 4442 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
d377bb54 4443 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
0735b17e 4444
8fd25133
RG
4445 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
4446
98d36505
RG
4447 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
4448
5cc8371b 4449 ::arg().set("xpf-allow-from","XPF information is only processed from these subnets")="";
59cb4a79 4450 ::arg().set("xpf-rr-code","XPF option code to use")="0";
5cc8371b 4451
58da9034 4452 ::arg().set("udp-source-port-min", "Minimum UDP port to bind on")="1024";
b47026fd
CHB
4453 ::arg().set("udp-source-port-max", "Maximum UDP port to bind on")="65535";
4454 ::arg().set("udp-source-port-avoid", "List of comma separated UDP port number to avoid")="11211";
e97cb679 4455 ::arg().set("rng", "Specify random number generator to use. Valid values are auto,sodium,openssl,getrandom,arc4random,urandom.")="auto";
d6f3fcfa 4456 ::arg().set("public-suffix-list-file", "Path to the Public Suffix List file, if any")="";
144040be 4457 ::arg().set("distribution-load-factor", "The load factor used when PowerDNS is distributing queries to worker threads")="0.0";
af1377b7
NC
4458#ifdef NOD_ENABLED
4459 ::arg().set("new-domain-tracking", "Track newly observed domains (i.e. never seen before).")="no";
4460 ::arg().set("new-domain-log", "Log newly observed domains.")="yes";
4461 ::arg().set("new-domain-lookup", "Perform a DNS lookup newly observed domains as a subdomain of the configured domain")="";
4462 ::arg().set("new-domain-history-dir", "Persist new domain tracking data here to persist between restarts")=string(NODCACHEDIR)+"/nod";
4463 ::arg().set("new-domain-whitelist", "List of domains (and implicitly all subdomains) which will never be considered a new domain")="";
b78727c6 4464 ::arg().set("new-domain-db-size", "Size of the DB used to track new domains in terms of number of cells. Defaults to 67108864")="67108864";
ca2526f5 4465 ::arg().set("new-domain-pb-tag", "If protobuf is configured, the tag to use for messages containing newly observed domains. Defaults to 'pdns-nod'")="pdns-nod";
41c542ec
NC
4466 ::arg().set("unique-response-tracking", "Track unique responses (tuple of query name, type and RR).")="no";
4467 ::arg().set("unique-response-log", "Log unique responses")="yes";
4468 ::arg().set("unique-response-history-dir", "Persist unique response tracking data here to persist between restarts")=string(NODCACHEDIR)+"/udr";
b78727c6 4469 ::arg().set("unique-response-db-size", "Size of the DB used to track unique responses in terms of number of cells. Defaults to 67108864")="67108864";
ca2526f5 4470 ::arg().set("unique-response-pb-tag", "If protobuf is configured, the tag to use for messages containing unique DNS responses. Defaults to 'pdns-udr'")="pdns-udr";
af1377b7 4471#endif /* NOD_ENABLED */
2e3d8a19 4472 ::arg().setCmd("help","Provide a helpful message");
ba1a571d 4473 ::arg().setCmd("version","Print version string");
d5141417 4474 ::arg().setCmd("config","Output blank configuration");
e6a9dde5 4475 g_log.toConsole(Logger::Info);
2e3d8a19 4476 ::arg().laxParse(argc,argv); // do a lax parse
c75a6a9e 4477
2d733c0f
CH
4478 string configname=::arg()["config-dir"]+"/recursor.conf";
4479 if(::arg()["config-name"]!="") {
4480 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
5124de27 4481 s_programname+="-"+::arg()["config-name"];
2d733c0f
CH
4482 }
4483 cleanSlashes(configname);
5124de27 4484
5cc1ea1d
CH
4485 if(!::arg().getCommands().empty()) {
4486 cerr<<"Fatal: non-option on the command line, perhaps a '--setting=123' statement missed the '='?"<<endl;
4487 exit(99);
4488 }
4489
577cf284
BH
4490 if(::arg().mustDo("config")) {
4491 cout<<::arg().configstring()<<endl;
4492 exit(0);
4493 }
4494
3ddb9247 4495 if(!::arg().file(configname.c_str()))
e6a9dde5 4496 g_log<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
c75a6a9e 4497
2e3d8a19 4498 ::arg().parse(argc,argv);
c836dc19 4499
2054afbb
CH
4500 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() ) {
4501 g_log<<Logger::Error<<"Using chroot and enabling the API is not possible"<<endl;
f0f3f0b0
PL
4502 exit(EXIT_FAILURE);
4503 }
4504
4505 if (::arg()["socket-dir"].empty()) {
4506 if (::arg()["chroot"].empty())
4507 ::arg().set("socket-dir") = LOCALSTATEDIR;
4508 else
4509 ::arg().set("socket-dir") = "/";
4510 }
4511
2e3d8a19 4512 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
562588a3 4513
b243ca3b
RG
4514 if(::arg().asNum("threads")==1) {
4515 if (::arg().mustDo("pdns-distributes-queries")) {
4516 g_log<<Logger::Warning<<"Only one thread, no need to distribute queries ourselves"<<endl;
4517 ::arg().set("pdns-distributes-queries")="no";
4518 }
4519 }
4520
4521 if(::arg().mustDo("pdns-distributes-queries") && ::arg().asNum("distributor-threads") <= 0) {
4522 g_log<<Logger::Warning<<"Asked to run with pdns-distributes-queries set but no distributor threads, raising to 1"<<endl;
4523 ::arg().set("distributor-threads")="1";
4524 }
4525
4526 if (!::arg().mustDo("pdns-distributes-queries")) {
4527 ::arg().set("distributor-threads")="0";
4528 }
61d74169 4529
2e3d8a19 4530 if(::arg().mustDo("help")) {
ff5ba4f9
WA
4531 cout<<"syntax:"<<endl<<endl;
4532 cout<<::arg().helpstring(::arg()["help"])<<endl;
4533 exit(0);
b636533b 4534 }
5e3de507 4535 if(::arg().mustDo("version")) {
ba1a571d 4536 showProductVersion();
3613a51c 4537 showBuildConfiguration();
67076869 4538 exit(0);
5e3de507 4539 }
b636533b 4540
34162f8f 4541 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
f48d7b65 4542
34162f8f
CH
4543 if (logUrgency < Logger::Error)
4544 logUrgency = Logger::Error;
f48d7b65 4545 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
4546 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
4547 }
e6a9dde5
PL
4548 g_log.setLoglevel(logUrgency);
4549 g_log.toConsole(logUrgency);
34162f8f 4550
f7c1d4e3 4551 serviceMain(argc, argv);
288f4aa9 4552 }
3f81d239 4553 catch(PDNSException &ae) {
e6a9dde5 4554 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
22030c37 4555 ret=EXIT_FAILURE;
288f4aa9 4556 }
fdbf35ac 4557 catch(std::exception &e) {
e6a9dde5 4558 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
22030c37 4559 ret=EXIT_FAILURE;
288f4aa9
BH
4560 }
4561 catch(...) {
e6a9dde5 4562 g_log<<Logger::Error<<"any other exception in main: "<<endl;
22030c37 4563 ret=EXIT_FAILURE;
288f4aa9 4564 }
3ddb9247 4565
22030c37 4566 return ret;
288f4aa9 4567}