]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/pdns_recursor.cc
Account for the fact that udr and nod flags are optional in protobuf
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
CommitLineData
288f4aa9 1/*
6edbf68a
PL
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
870a0fe4
AT
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
3e61e7f7 25
76473b92
KM
26#include <netdb.h>
27#include <sys/stat.h>
28#include <unistd.h>
f097141b 29#ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
b47026fd 30#include <boost/container/flat_set.hpp>
f097141b 31#endif
2470b36e 32#include "ws-recursor.hh"
c390b2da 33#include <thread>
519f5484 34#include "threadname.hh"
3ea54bf0 35#include "recpacketcache.hh"
3ddb9247 36#include "utility.hh"
51e2144e 37#include "dns_random.hh"
d1b28475
KM
38#ifdef HAVE_LIBSODIUM
39#include <sodium.h>
40#endif
3afde9b2 41#include "opensslsigners.hh"
288f4aa9
BH
42#include <iostream>
43#include <errno.h>
81859ba5 44#include <boost/static_assert.hpp>
288f4aa9
BH
45#include <map>
46#include <set>
97bb160b 47#include "recursor_cache.hh"
38c9ceaa 48#include "cachecleaner.hh"
288f4aa9 49#include <stdio.h>
c75a6a9e 50#include <signal.h>
288f4aa9 51#include <stdlib.h>
bb4bdbaf 52#include "misc.hh"
288f4aa9
BH
53#include "mtasker.hh"
54#include <utility>
288f4aa9
BH
55#include "arguments.hh"
56#include "syncres.hh"
88def049
BH
57#include <fcntl.h>
58#include <fstream>
3e61e7f7 59#include "sortlist.hh"
5c633640
BH
60#include "sstuff.hh"
61#include <boost/tuple/tuple.hpp>
62#include <boost/tuple/tuple_comparison.hpp>
72df400f 63#include <boost/shared_array.hpp>
7f1fa77d 64#include <boost/function.hpp>
5605c067 65#include <boost/algorithm/string.hpp>
8f7473d7 66#ifdef MALLOC_TRACE
67#include "malloctrace.hh"
68#endif
40a3dd64 69#include <netinet/tcp.h>
ea634573
BH
70#include "dnsparser.hh"
71#include "dnswriter.hh"
72#include "dnsrecords.hh"
f814d7c8 73#include "zoneparser-tng.hh"
1d5b3ce6 74#include "rec_channel.hh"
aaacf7f2 75#include "logger.hh"
c8ddb7c2 76#include "iputils.hh"
09e6702a 77#include "mplexer.hh"
c038218b 78#include "config.h"
808c5ef7 79#include "lua-recursor4.hh"
ba1a571d 80#include "version.hh"
79332bff 81#include "responsestats.hh"
d67620e4 82#include "secpoll-recursor.hh"
c5c066bf 83#include "dnsname.hh"
644dd1da 84#include "filterpo.hh"
85#include "rpzloader.hh"
b3f0ed10 86#include "validate-recursor.hh"
f3c18728 87#include "rec-lua-conf.hh"
5c3b5e7f 88#include "ednsoptions.hh"
85c7ca75 89#include "gettime.hh"
af1377b7
NC
90#ifdef NOD_ENABLED
91#include "nod.hh"
92#endif /* NOD_ENABLED */
f3c18728 93
d9d3f9c1 94#include "rec-protobuf.hh"
d705aad9 95#include "rec-snmp.hh"
aa7929a3 96
6b6720de
PL
97#ifdef HAVE_SYSTEMD
98#include <systemd/sd-daemon.h>
99#endif
100
d187038c
RG
101#include "namespaces.hh"
102
5cc8371b
RG
103#include "xpf.hh"
104
d187038c
RG
105typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
106
f26bf547 107static thread_local std::shared_ptr<RecursorLua4> t_pdl;
b243ca3b 108static thread_local unsigned int t_id = 0;
f26bf547
RG
109static thread_local std::shared_ptr<Regex> t_traceRegex;
110static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
63341e8d
RG
111#ifdef HAVE_PROTOBUF
112static thread_local std::shared_ptr<RemoteLogger> t_protobufServer{nullptr};
113static thread_local std::shared_ptr<RemoteLogger> t_outgoingProtobufServer{nullptr};
114#endif /* HAVE_PROTOBUF */
f26bf547
RG
115
116thread_local std::unique_ptr<MT_t> MT; // the big MTasker
117thread_local std::unique_ptr<MemRecursorCache> t_RC;
118thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
3337c2f7 119thread_local FDMultiplexer* t_fdm{nullptr};
be9078b3 120thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes, t_bogusremotes;
66f2e6ad 121thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring, t_bogusqueryring;
f26bf547 122thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
aa7929a3 123#ifdef HAVE_PROTOBUF
f26bf547 124thread_local std::unique_ptr<boost::uuids::random_generator> t_uuidGenerator;
aa7929a3 125#endif
af1377b7
NC
126#ifdef NOD_ENABLED
127thread_local std::shared_ptr<nod::NODDB> t_nodDBp;
41c542ec 128thread_local std::shared_ptr<nod::UniqueResponseDB> t_udrDBp;
af1377b7 129#endif /* NOD_ENABLED */
d187038c 130__thread struct timeval g_now; // timestamp, updated (too) frequently
d7dae798 131
b243ca3b
RG
132typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
133
d7dae798 134// for communicating with our threads
b243ca3b
RG
135// effectively readonly after startup
136struct RecThreadInfo
137{
138 struct ThreadPipeSet
139 {
140 int writeToThread{-1};
141 int readToThread{-1};
142 int writeFromThread{-1};
143 int readFromThread{-1};
144 int writeQueriesToThread{-1}; // this one is non-blocking
145 int readQueriesToThread{-1};
146 };
147
adb6cd72 148 /* FD corresponding to TCP sockets this thread is listening
c47f201b 149 on.
adb6cd72
RG
150 These FDs are also in deferredAdds when we have one
151 socket per listener, and in g_deferredAdds instead. */
152 std::set<int> tcpSockets;
b243ca3b
RG
153 /* FD corresponding to listening sockets if we have one socket per
154 listener (with reuseport), otherwise all listeners share the
155 same FD and g_deferredAdds is then used instead */
156 deferredAdd_t deferredAdds;
157 struct ThreadPipeSet pipes;
158 std::thread thread;
159 /* handle the web server, carbon, statistics and the control channel */
160 bool isHandler{false};
161 /* accept incoming queries (and distributes them to the workers if pdns-distributes-queries is set) */
162 bool isListener{false};
163 /* process queries */
164 bool isWorker{false};
49a699c4 165};
810ff705 166
b243ca3b
RG
167/* first we have the handler thread, t_id == 0 (some other
168 helper threads like SNMP might have t_id == 0 as well)
169 then the distributor threads if any
170 and finally the workers */
171static std::vector<RecThreadInfo> s_threadInfos;
172/* without reuseport, all listeners share the same sockets */
173static deferredAdd_t g_deferredAdds;
faf580f5 174
d187038c
RG
175typedef vector<int> tcpListenSockets_t;
176typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
3ea54bf0 177
d187038c 178static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
d187038c 179static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
d187038c
RG
180static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
181static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
182static AtomicCounter counter;
9065eb05 183static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
f26bf547 184static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
5cc8371b 185static NetmaskGroup g_XPFAcl;
d187038c 186static size_t g_tcpMaxQueriesPerConn;
a5886e6a 187static size_t s_maxUDPQueriesPerRound;
d187038c
RG
188static uint64_t g_latencyStatSize;
189static uint32_t g_disthashseed;
190static unsigned int g_maxTCPPerClient;
d187038c 191static unsigned int g_maxMThreads;
b243ca3b 192static unsigned int g_numDistributorThreads;
d187038c
RG
193static unsigned int g_numWorkerThreads;
194static int g_tcpTimeout;
195static uint16_t g_udpTruncationThreshold;
59cb4a79 196static uint16_t g_xpfRRCode{0};
d187038c
RG
197static std::atomic<bool> statsWanted;
198static std::atomic<bool> g_quiet;
199static bool g_logCommonErrors;
200static bool g_anyToTcp;
b243ca3b 201static bool g_weDistributeQueries; // if true, 1 or more threads listen on the incoming query sockets and distribute them to workers
810ff705 202static bool g_reusePort{false};
00b8cadc 203static bool g_gettagNeedsEDNSOptions{false};
0ec489bf 204static time_t g_statisticsInterval;
9065eb05 205static bool g_useIncomingECS;
a6f7f5fe 206std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
af1377b7
NC
207#ifdef NOD_ENABLED
208static bool g_nodEnabled;
209static DNSName g_nodLookupDomain;
210static bool g_nodLog;
211static SuffixMatchNode g_nodDomainWL;
41c542ec
NC
212static bool g_udrEnabled;
213static bool g_udrLog;
af1377b7 214#endif /* NOD_ENABLED */
f097141b 215#ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
bf6f28ca 216static boost::container::flat_set<uint16_t> s_avoidUdpSourcePorts;
f097141b
CHB
217#else
218static std::set<uint16_t> s_avoidUdpSourcePorts;
219#endif
bf6f28ca
CHB
220static uint16_t s_minUdpSourcePort;
221static uint16_t s_maxUdpSourcePort;
49a699c4 222
b243ca3b 223RecursorControlChannel s_rcc; // only active in the handler thread
d187038c 224RecursorStats g_stats;
2d733c0f 225string s_programname="pdns_recursor";
d187038c 226string s_pidfname;
c1c29961 227bool g_lowercaseOutgoing;
bf19ccfd 228unsigned int g_networkTimeoutMsec;
d187038c
RG
229unsigned int g_numThreads;
230uint16_t g_outgoingEDNSBufsize;
98d36505 231bool g_logRPZChanges{false};
c3828c03 232
12cd44ee 233#define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
2fe3354d 234#define LOCAL_NETS_INVERSE "!127.0.0.0/8, !10.0.0.0/8, !100.64.0.0/10, !169.254.0.0/16, !192.168.0.0/16, !172.16.0.0/12, !::1/128, !fc00::/7, !fe80::/10"
12cd44ee 235// Bad Nets taken from both:
3ddb9247 236// http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
12cd44ee 237// and
238// http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
239// where such a network may not be considered a valid destination
240#define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
241#define DONT_QUERY LOCAL_NETS ", " BAD_NETS
49a699c4 242
d7dae798 243//! used to send information to a newborn mthread
ea634573 244struct DNSComboWriter {
78f56b38 245 DNSComboWriter(const std::string& query, const struct timeval& now): d_mdp(true, query), d_now(now)
2749c3fe
RG
246 {
247 }
5cc8371b 248
78f56b38 249 DNSComboWriter(const std::string& query, const struct timeval& now, std::vector<std::string>&& policyTags, LuaContext::LuaObject&& data): d_mdp(true, query), d_now(now), d_policyTags(std::move(policyTags)), d_data(std::move(data))
5164bac3
RG
250 {
251 }
252
5cc8371b
RG
253 void setRemote(const ComboAddress& sa)
254 {
255 d_remote=sa;
256 }
257
258 void setSource(const ComboAddress& sa)
ea634573 259 {
5cc8371b 260 d_source=sa;
ea634573
BH
261 }
262
b71b60ee 263 void setLocal(const ComboAddress& sa)
264 {
265 d_local=sa;
266 }
267
5cc8371b
RG
268 void setDestination(const ComboAddress& sa)
269 {
270 d_destination=sa;
271 }
b71b60ee 272
ea634573
BH
273 void setSocket(int sock)
274 {
275 d_socket=sock;
276 }
a1754c6a
BH
277
278 string getRemote() const
279 {
5cc8371b
RG
280 if (d_source == d_remote) {
281 return d_source.toStringWithPort();
282 }
283 return d_source.toStringWithPort() + " (proxied by " + d_remote.toStringWithPort() + ")";
a1754c6a
BH
284 }
285
5cc8371b 286 MOADNSParser d_mdp;
c9e9e5e0 287 struct timeval d_now;
5cc8371b
RG
288 /* Remote client, might differ from d_source
289 in case of XPF, in which case d_source holds
290 the IP of the client and d_remote of the proxy
291 */
292 ComboAddress d_remote;
293 ComboAddress d_source;
294 /* Destination address, might differ from
295 d_destination in case of XPF, in which case
296 d_destination holds the IP of the proxy and
297 d_local holds our own. */
298 ComboAddress d_local;
299 ComboAddress d_destination;
aa7929a3
RG
300#ifdef HAVE_PROTOBUF
301 boost::uuids::uuid d_uuid;
67e31ebe 302 string d_requestorId;
590388d2 303 string d_deviceId;
aa7929a3 304#endif
5164bac3
RG
305 std::vector<std::string> d_policyTags;
306 LuaContext::LuaObject d_data;
b40562da 307 EDNSSubnetOpts d_ednssubnet;
5164bac3 308 shared_ptr<TCPConnection> d_tcpConnection;
ea634573 309 int d_socket;
b673817a 310 unsigned int d_tag{0};
e9f63d47 311 uint32_t d_qhash{0};
70fb28d9
RG
312 uint32_t d_ttlCap{std::numeric_limits<uint32_t>::max()};
313 bool d_variable{false};
5164bac3
RG
314 bool d_ecsFound{false};
315 bool d_ecsParsed{false};
316 bool d_tcp;
ea634573
BH
317};
318
06857845
RG
319MT_t* getMT()
320{
321 return MT ? MT.get() : nullptr;
322}
ea634573 323
288f4aa9
BH
324ArgvMap &arg()
325{
326 static ArgvMap theArg;
327 return theArg;
328}
4ef015cd 329
8fb594ba 330unsigned int getRecursorThreadId()
b4015453 331{
30da2030 332 return t_id;
b4015453 333}
09e6702a 334
30ee601a
RG
335int getMTaskerTID()
336{
337 return MT->getTid();
338}
339
b243ca3b
RG
340static bool isDistributorThread()
341{
342 if (t_id == 0) {
343 return false;
344 }
345
346 return g_weDistributeQueries && s_threadInfos.at(t_id).isListener;
347}
348
349static bool isHandlerThread()
350{
351 if (t_id == 0) {
352 return true;
353 }
354
355 return s_threadInfos.at(t_id).isHandler;
356}
357
d187038c 358static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 359
50c81227 360// -1 is error, 0 is timeout, 1 is success
3ddb9247 361int asendtcp(const string& data, Socket* sock)
5c633640
BH
362{
363 PacketID pident;
364 pident.sock=sock;
365 pident.outMSG=data;
3ddb9247 366
bb4bdbaf 367 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
50c81227 368 string packet;
5c633640 369
5b0ddd18 370 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
23db0a09 371
9170fbaf 372 if(!ret || ret==-1) { // timeout
bb4bdbaf 373 t_fdm->removeWriteFD(sock->getHandle());
5c633640 374 }
50c81227
BH
375 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
376 return -1;
377 }
9170fbaf 378 return ret;
5c633640
BH
379}
380
d187038c 381static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 382
9170fbaf 383// -1 is error, 0 is timeout, 1 is success
a683e8bd 384int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
288f4aa9 385{
50c81227 386 data.clear();
5c633640
BH
387 PacketID pident;
388 pident.sock=sock;
389 pident.inNeeded=len;
825fa717 390 pident.inIncompleteOkay=incompleteOkay;
bb4bdbaf 391 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
5c633640 392
bb4bdbaf 393 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
9170fbaf 394 if(!ret || ret==-1) { // timeout
bb4bdbaf 395 t_fdm->removeReadFD(sock->getHandle());
288f4aa9 396 }
50c81227
BH
397 else if(data.empty()) {// error, EOF or other
398 return -1;
399 }
400
9170fbaf 401 return ret;
288f4aa9
BH
402}
403
d187038c 404static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
4465e941 405{
fba1e944 406 PacketID pident=*any_cast<PacketID>(&var);
4465e941 407 char resp[512];
7c77ce63
RG
408 ComboAddress fromaddr;
409 socklen_t addrlen=sizeof(fromaddr);
410
411 ssize_t ret=recvfrom(fd, resp, sizeof(resp), 0, (sockaddr *)&fromaddr, &addrlen);
412 if (fromaddr != pident.remote) {
e6a9dde5 413 g_log<<Logger::Notice<<"Response received from the wrong remote host ("<<fromaddr.toStringWithPort()<<" instead of "<<pident.remote.toStringWithPort()<<"), discarding"<<endl;
7c77ce63
RG
414
415 }
416
4465e941 417 t_fdm->removeReadFD(fd);
418 if(ret >= 0) {
a683e8bd 419 string data(resp, (size_t) ret);
fba1e944 420 MT->sendEvent(pident, &data);
4465e941 421 }
422 else {
fba1e944 423 string empty;
424 MT->sendEvent(pident, &empty);
425 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
4465e941 426 }
427}
fba1e944 428string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
4465e941 429{
4465e941 430 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
431 s.setNonBlocking();
432 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
433
434 s.bind(local);
435 s.connect(dest);
4465e941 436 s.send(query);
437
438 PacketID pident;
439 pident.sock=&s;
7c77ce63 440 pident.remote=dest;
4465e941 441 pident.type=0;
fba1e944 442 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
4465e941 443
444 string data;
fba1e944 445
4465e941 446 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
fba1e944 447
4465e941 448 if(!ret || ret==-1) { // timeout
4465e941 449 t_fdm->removeReadFD(s.getHandle());
450 }
451 else if(data.empty()) {// error, EOF or other
fba1e944 452 // we could special case this
4465e941 453 return data;
454 }
4465e941 455 return data;
456}
457
d7dae798 458//! pick a random query local address
1652a63e 459ComboAddress getQueryLocalAddress(int family, uint16_t port)
5a38281c 460{
1652a63e 461 ComboAddress ret;
5a38281c 462 if(family==AF_INET) {
3ddb9247 463 if(g_localQueryAddresses4.empty())
1652a63e 464 ret = g_local4;
3ddb9247 465 else
1652a63e
BH
466 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
467 ret.sin4.sin_port = htons(port);
5a38281c
BH
468 }
469 else {
470 if(g_localQueryAddresses6.empty())
1652a63e
BH
471 ret = g_local6;
472 else
473 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
3ddb9247 474
1652a63e 475 ret.sin6.sin6_port = htons(port);
5a38281c 476 }
1652a63e 477 return ret;
5a38281c 478}
4ef015cd 479
d187038c 480static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
09e6702a 481
d187038c 482static void setSocketBuffer(int fd, int optname, uint32_t size)
d7dae798
BH
483{
484 uint32_t psize=0;
485 socklen_t len=sizeof(psize);
3ddb9247 486
d7dae798 487 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
e6a9dde5 488 g_log<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
3ddb9247 489 return;
d7dae798
BH
490 }
491
492 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
e6a9dde5 493 g_log<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
d7dae798
BH
494}
495
496
497static void setSocketReceiveBuffer(int fd, uint32_t size)
498{
499 setSocketBuffer(fd, SO_RCVBUF, size);
500}
501
502static void setSocketSendBuffer(int fd, uint32_t size)
503{
504 setSocketBuffer(fd, SO_SNDBUF, size);
505}
506
507
4ef015cd
BH
508// you can ask this class for a UDP socket to send a query from
509// this socket is not yours, don't even think about deleting it
510// but after you call 'returnSocket' on it, don't assume anything anymore
511class UDPClientSocks
512{
4ef015cd 513 unsigned int d_numsocks;
4ef015cd 514public:
e2642526 515 UDPClientSocks() : d_numsocks(0)
4ef015cd
BH
516 {
517 }
518
996c89cc 519 typedef set<int> socks_t;
4ef015cd
BH
520 socks_t d_socks;
521
2ee280cf 522 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
d8f6d49f 523 int getSocket(const ComboAddress& toaddr, int* fd)
4ef015cd 524 {
d8f6d49f
BH
525 *fd=makeClientSocket(toaddr.sin4.sin_family);
526 if(*fd < 0) // temporary error - receive exception otherwise
2ee280cf 527 return -2;
d8f6d49f
BH
528
529 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
530 int err = errno;
41ff43f8 531 // returnSocket(*fd);
a7b68ae7
RG
532 try {
533 closesocket(*fd);
534 }
535 catch(const PDNSException& e) {
e6a9dde5 536 g_log<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
a7b68ae7
RG
537 }
538
d8f6d49f 539 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
4957a608 540 return -2;
998a4334 541 return -1;
d8f6d49f 542 }
998a4334 543
d8f6d49f 544 d_socks.insert(*fd);
998a4334 545 d_numsocks++;
d8f6d49f 546 return 0;
4ef015cd
BH
547 }
548
095c3045
BH
549 void returnSocket(int fd)
550 {
551 socks_t::iterator i=d_socks.find(fd);
34801ab1 552 if(i==d_socks.end()) {
335da0ba 553 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd)+") not in the pool");
34801ab1 554 }
bb4bdbaf 555 returnSocketLocked(i);
095c3045
BH
556 }
557
4ef015cd 558 // return a socket to the pool, or simply erase it
bb4bdbaf 559 void returnSocketLocked(socks_t::iterator& i)
4ef015cd 560 {
600fc20b 561 if(i==d_socks.end()) {
3f81d239 562 throw PDNSException("Trying to return a socket not in the pool");
600fc20b 563 }
80baf329 564 try {
bb4bdbaf 565 t_fdm->removeReadFD(*i);
80baf329
BH
566 }
567 catch(FDMultiplexerException& e) {
bb4bdbaf 568 // we sometimes return a socket that has not yet been assigned to t_fdm
80baf329 569 }
a7b68ae7
RG
570 try {
571 closesocket(*i);
572 }
573 catch(const PDNSException& e) {
e6a9dde5 574 g_log<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
a7b68ae7 575 }
3ddb9247 576
998a4334
BH
577 d_socks.erase(i++);
578 --d_numsocks;
4ef015cd 579 }
d8f6d49f
BH
580
581 // returns -1 for errors which might go away, throws for ones that won't
bb4bdbaf 582 static int makeClientSocket(int family)
d8f6d49f 583 {
a683e8bd 584 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
42c235e5 585
d8f6d49f
BH
586 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
587 return ret;
3ddb9247
PD
588
589 if(ret<0)
335da0ba 590 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
36855b53 591
7eb73ffa 592 // setCloseOnExec(ret); // we're not going to exec
5a38281c 593
d8f6d49f 594 int tries=10;
3aa91c3e 595 ComboAddress sin;
d8f6d49f 596 while(--tries) {
1652a63e 597 uint16_t port;
3ddb9247 598
d8f6d49f 599 if(tries==1) // fall back to kernel 'random'
4957a608 600 port = 0;
bf6f28ca
CHB
601 else {
602 do {
603 port = s_minUdpSourcePort + dns_random(s_maxUdpSourcePort - s_minUdpSourcePort + 1);
604 }
605 while (s_avoidUdpSourcePorts.count(port));
606 }
5a38281c 607
3aa91c3e 608 sin=getQueryLocalAddress(family, port); // does htons for us
5a38281c 609
3ddb9247 610 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
4957a608 611 break;
d8f6d49f
BH
612 }
613 if(!tries)
3aa91c3e 614 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
3ddb9247 615
3897b9e1 616 setNonBlocking(ret);
d8f6d49f
BH
617 return ret;
618 }
49a699c4
BH
619};
620
f26bf547 621static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
4ef015cd 622
288f4aa9 623/* these two functions are used by LWRes */
34801ab1 624// -2 is OS error, -1 is error that depends on the remote, > 0 is success
a683e8bd 625int asendto(const char *data, size_t len, int flags,
3ddb9247 626 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
288f4aa9 627{
34801ab1
BH
628
629 PacketID pident;
787e5eab
BH
630 pident.domain = domain;
631 pident.remote = toaddr;
632 pident.type = qtype;
34801ab1
BH
633
634 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
635 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
636
637 for(; chain.first != chain.second; chain.first++) {
638 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
e27e91a8 639 /*
4665c31e
BH
640 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
641 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
4957a608 642 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
e27e91a8 643 */
34801ab1
BH
644 chain.first->key.chain.insert(id); // we can chain
645 *fd=-1; // gets used in waitEvent / sendEvent later on
646 return 1;
647 }
648 }
649
49a699c4 650 int ret=t_udpclientsocks->getSocket(toaddr, fd);
d8f6d49f
BH
651 if(ret < 0)
652 return ret;
34801ab1 653
998a4334
BH
654 pident.fd=*fd;
655 pident.id=id;
3ddb9247 656
bb4bdbaf
BH
657 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
658 ret = send(*fd, data, len, 0);
659
5b0ddd18 660 int tmp = errno;
bb4bdbaf 661
7302ed0a 662 if(ret < 0)
49a699c4 663 t_udpclientsocks->returnSocket(*fd);
bb4bdbaf 664
5b0ddd18 665 errno = tmp; // this is for logging purposes only
7302ed0a 666 return ret;
288f4aa9
BH
667}
668
9170fbaf 669// -1 is error, 0 is timeout, 1 is success
f128d20d 670int arecvfrom(std::string& packet, int flags, const ComboAddress& fromaddr, size_t *d_len,
c5c066bf 671 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
288f4aa9 672{
0d5f0a9f 673 static optional<unsigned int> nearMissLimit;
3ddb9247 674 if(!nearMissLimit)
0d5f0a9f
BH
675 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
676
288f4aa9 677 PacketID pident;
4ef015cd 678 pident.fd=fd;
288f4aa9 679 pident.id=id;
0d5f0a9f 680 pident.domain=domain;
787e5eab 681 pident.type = qtype;
996c89cc 682 pident.remote=fromaddr;
b636533b 683
5b0ddd18 684 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
34801ab1 685
9170fbaf 686 if(ret > 0) {
996c89cc 687 if(packet.empty()) // means "error"
3ddb9247 688 return -1;
998a4334 689
a683e8bd 690 *d_len=packet.size();
f128d20d 691
0d5f0a9f 692 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
e6a9dde5 693 g_log<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
0d5f0a9f 694 g_stats.spoofCount++;
35ce8576
BH
695 return -1;
696 }
288f4aa9 697 }
09e6702a 698 else {
34801ab1 699 if(fd >= 0)
49a699c4 700 t_udpclientsocks->returnSocket(fd);
09e6702a 701 }
9170fbaf 702 return ret;
288f4aa9
BH
703}
704
88def049
BH
705static void writePid(void)
706{
191f2e47 707 if(!::arg().mustDo("write-pid"))
708 return;
18e7758c 709 ofstream of(s_pidfname.c_str(), std::ios_base::app);
88def049 710 if(of)
705f31ae 711 of<< Utility::getpid() <<endl;
88def049 712 else
e6a9dde5 713 g_log<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
88def049
BH
714}
715
2749c3fe 716TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : data(2, 0), d_remote(addr), d_fd(fd)
3ddb9247
PD
717{
718 ++s_currentConnections;
cd989c87 719 (*t_tcpClientCounts)[d_remote]++;
0e408828 720}
cd989c87
BH
721
722TCPConnection::~TCPConnection()
0e408828 723{
a7b68ae7
RG
724 try {
725 if(closesocket(d_fd) < 0)
e6a9dde5 726 g_log<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
a7b68ae7
RG
727 }
728 catch(const PDNSException& e) {
e6a9dde5 729 g_log<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
a7b68ae7
RG
730 }
731
3ddb9247 732 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
cd989c87 733 t_tcpClientCounts->erase(d_remote);
1bc9e6bd 734 --s_currentConnections;
0e408828 735}
0e9d9ce2 736
3ddb9247 737AtomicCounter TCPConnection::s_currentConnections;
d187038c
RG
738
739static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
6dcd28c3 740
92011b8f 741// the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
d187038c 742static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
2cc13433 743{
92011b8f 744 if(packetsize > 1000 && t_largeanswerremotes)
745 t_largeanswerremotes->push_back(remote);
2cc13433
BH
746 switch(res) {
747 case RCode::ServFail:
92011b8f 748 if(t_servfailremotes) {
749 t_servfailremotes->push_back(remote);
5af86fdc 750 if(query && t_servfailqueryring) // packet cache
92011b8f 751 t_servfailqueryring->push_back(make_pair(*query, qtype));
752 }
2cc13433
BH
753 g_stats.servFails++;
754 break;
755 case RCode::NXDomain:
756 g_stats.nxDomains++;
757 break;
758 case RCode::NoError:
759 g_stats.noErrors++;
760 break;
761 }
762}
763
5164bac3 764static string makeLoginfo(const DNSComboWriter* dc)
a903b39c 765try
766{
5cc8371b 767 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->getRemote())+")";
a903b39c 768}
769catch(...)
770{
771 return "Exception making error message for exception";
772}
773
aa7929a3 774#ifdef HAVE_PROTOBUF
590388d2 775static void protobufLogQuery(const std::shared_ptr<RemoteLogger>& logger, uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId)
aa7929a3 776{
e1c8a4bb
RG
777 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
778 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
779 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
c165308b 780 message.setServerIdentity(SyncRes::s_serverID);
a94bc5d7 781 message.setEDNSSubnet(ednssubnet, ednssubnet.isIpv4() ? maskV4 : maskV6);
67e31ebe 782 message.setRequestorId(requestorId);
590388d2 783 message.setDeviceId(deviceId);
02b47f43 784
02b47f43 785 if (!policyTags.empty()) {
d9d3f9c1 786 message.setPolicyTags(policyTags);
02b47f43 787 }
aa7929a3 788
d9d3f9c1 789// cerr <<message.toDebugString()<<endl;
aa7929a3 790 std::string str;
d9d3f9c1 791 message.serialize(str);
aa7929a3 792 logger->queueData(str);
aa7929a3
RG
793}
794
d9d3f9c1 795static void protobufLogResponse(const std::shared_ptr<RemoteLogger>& logger, const RecProtoBufMessage& message)
aa7929a3 796{
d9d3f9c1 797// cerr <<message.toDebugString()<<endl;
aa7929a3 798 std::string str;
d9d3f9c1 799 message.serialize(str);
aa7929a3 800 logger->queueData(str);
aa7929a3
RG
801}
802#endif
803
53508135
PL
804/**
805 * Chases the CNAME provided by the PolicyCustom RPZ policy.
806 *
807 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
808 * @param qtype: The QType of the original query
809 * @param sr: A SyncRes
810 * @param res: An integer that will contain the RCODE of the lookup we do
811 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
812 */
d187038c 813static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
53508135
PL
814{
815 if (spoofed.d_type == QType::CNAME) {
30ee601a
RG
816 bool oldWantsRPZ = sr.getWantsRPZ();
817 sr.setWantsRPZ(false);
53508135
PL
818 vector<DNSRecord> ans;
819 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, 1, ans);
820 for (const auto& rec : ans) {
821 if(rec.d_place == DNSResourceRecord::ANSWER) {
822 ret.push_back(rec);
823 }
824 }
825 // Reset the RPZ state of the SyncRes
30ee601a 826 sr.setWantsRPZ(oldWantsRPZ);
53508135
PL
827 }
828}
829
70fb28d9 830static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, uint32_t ttlCap, const uint16_t maxAnswerSize)
97c6d7e5 831{
70fb28d9 832 pw.startRecord(rec.d_name, rec.d_type, (rec.d_ttl > ttlCap ? ttlCap : rec.d_ttl), rec.d_class, rec.d_place);
97c6d7e5
RG
833
834 if(rec.d_type != QType::OPT) // their TTL ain't real
835 minTTL = min(minTTL, rec.d_ttl);
836
837 rec.d_content->toPacket(pw);
838 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
839 pw.rollback();
840 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
841 pw.getHeader()->tc=1;
842 pw.truncate();
843 }
844 return false;
845 }
846
847 return true;
848}
849
63341e8d
RG
850#ifdef HAVE_PROTOBUF
851static std::shared_ptr<RemoteLogger> startProtobufServer(const ProtobufExportConfig& config, uint64_t generation)
852{
853 std::shared_ptr<RemoteLogger> result = nullptr;
854 try {
855 result = std::make_shared<RemoteLogger>(config.server, config.timeout, config.maxQueuedEntries, config.reconnectWaitTime, config.asyncConnect);
856 result->setGeneration(generation);
857 }
858 catch(const std::exception& e) {
859 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<config.server<<": "<<e.what()<<endl;
860 }
861 catch(const PDNSException& e) {
862 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<config.server<<": "<<e.reason<<endl;
863 }
864
865 return result;
866}
867
868static bool checkProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
869{
870 if (!luaconfsLocal->protobufExportConfig.enabled) {
871 if (t_protobufServer != nullptr) {
872 t_protobufServer->stop();
873 t_protobufServer = nullptr;
874 }
875
876 return false;
877 }
878
879 /* if the server was not running, or if it was running according to a
880 previous configuration */
881 if (t_protobufServer == nullptr ||
882 t_protobufServer->getGeneration() < luaconfsLocal->generation) {
883
884 if (t_protobufServer) {
885 t_protobufServer->stop();
886 }
887
888 t_protobufServer = startProtobufServer(luaconfsLocal->protobufExportConfig, luaconfsLocal->generation);
889 }
890
891 return true;
892}
893
894static bool checkOutgoingProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
895{
896 if (!luaconfsLocal->outgoingProtobufExportConfig.enabled) {
897 if (t_outgoingProtobufServer != nullptr) {
898 t_outgoingProtobufServer->stop();
899 t_outgoingProtobufServer = nullptr;
900 }
901
902 return false;
903 }
904
905 /* if the server was not running, or if it was running according to a
906 previous configuration */
907 if (t_outgoingProtobufServer == nullptr ||
908 t_outgoingProtobufServer->getGeneration() < luaconfsLocal->generation) {
909
910 if (t_outgoingProtobufServer) {
911 t_outgoingProtobufServer->stop();
912 }
913
914 t_outgoingProtobufServer = startProtobufServer(luaconfsLocal->outgoingProtobufExportConfig, luaconfsLocal->generation);
915 }
916
917 return true;
918}
919#endif /* HAVE_PROTOBUF */
920
af1377b7 921#ifdef NOD_ENABLED
41c542ec 922static bool nodCheckNewDomain(const DNSName& dname)
af1377b7
NC
923{
924 static const QType qt(QType::A);
925 static const uint16_t qc(QClass::IN);
41c542ec 926 bool ret = false;
af1377b7
NC
927 // First check the (sub)domain isn't whitelisted for NOD purposes
928 if (!g_nodDomainWL.check(dname)) {
929 // Now check the NODDB (note this is probablistic so can have FNs/FPs)
930 if (t_nodDBp && t_nodDBp->isNewDomain(dname)) {
931 if (g_nodLog) {
932 // This should probably log to a dedicated log file
933 g_log<<Logger::Notice<<"Newly observed domain nod="<<dname.toLogString()<<endl;
934 }
935 if (!(g_nodLookupDomain.isRoot())) {
936 // Send a DNS A query to <domain>.g_nodLookupDomain
937 DNSName qname = dname;
938 vector<DNSRecord> dummy;
939 qname += g_nodLookupDomain;
940 directResolve(qname, qt, qc, dummy);
941 }
41c542ec 942 ret = true;
af1377b7
NC
943 }
944 }
41c542ec 945 return ret;
af1377b7
NC
946}
947
948static void nodAddDomain(const DNSName& dname)
949{
950 // Don't bother adding domains on the nod whitelist
951 if (!g_nodDomainWL.check(dname)) {
952 if (t_nodDBp) {
953 // This keeps the nod info up to date
954 t_nodDBp->addDomain(dname);
955 }
956 }
957}
41c542ec
NC
958
959static bool udrCheckUniqueDNSRecord(const DNSName& dname, uint16_t qtype, const DNSRecord& record)
960{
961 bool ret = false;
962 if (record.d_place == DNSResourceRecord::ANSWER ||
963 record.d_place == DNSResourceRecord::ADDITIONAL) {
964 // Create a string that represent a triplet of (qname, qtype and RR[type, name, content])
965 std::stringstream ss;
966 ss << dname.toDNSStringLC() << ":" << qtype << ":" << qtype << ":" << record.d_type << ":" << record.d_name.toDNSStringLC() << ":" << record.d_content->getZoneRepresentation();
967 if (t_udrDBp && t_udrDBp->isUniqueResponse(ss.str())) {
968 if (g_udrLog) {
969 // This should also probably log to a dedicated file.
970 g_log<<Logger::Notice<<"Unique response observed: qname="<<dname.toLogString()<<" qtype="<<QType(qtype).getName()<< " rrtype=" << QType(record.d_type).getName() << " rrname=" << record.d_name.toLogString() << " rrcontent=" << record.d_content->getZoneRepresentation() << endl;
971 }
972 ret = true;
973 }
974 }
975 return ret;
976}
af1377b7
NC
977#endif /* NOD_ENABLED */
978
d187038c 979static void startDoResolve(void *p)
288f4aa9 980{
7b1469bb 981 DNSComboWriter* dc=(DNSComboWriter *)p;
288f4aa9 982 try {
5af86fdc
RG
983 if (t_queryring)
984 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
92011b8f 985
32015748 986 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
7f7b8d55 987 EDNSOpts edo;
5164bac3 988 std::vector<pair<uint16_t, string> > ednsOpts;
eb9444be 989 bool variableAnswer = dc->d_variable;
8e079f3a 990 bool haveEDNS=false;
f1db0de2
PL
991 DNSPacketWriter::optvect_t returnedEdnsOptions; // Here we stuff all the options for the return packet
992 uint8_t ednsExtRCode = 0;
8e079f3a 993 if(getEDNSOpts(dc->d_mdp, &edo)) {
f1db0de2
PL
994 haveEDNS=true;
995 if (edo.d_version != 0) {
996 ednsExtRCode = ERCode::BADVERS;
997 }
998
32015748
RG
999 if(!dc->d_tcp) {
1000 /* rfc6891 6.2.3:
1001 "Values lower than 512 MUST be treated as equal to 512."
1002 */
1003 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
1004 }
5164bac3 1005 ednsOpts = edo.d_options;
8e079f3a 1006 haveEDNS=true;
3af35968 1007 maxanswersize -= 11; // EDNS header size
b40562da 1008
1f691b94
PL
1009 for (const auto& o : edo.d_options) {
1010 if (o.first == EDNSOptionCode::ECS && g_useIncomingECS && !dc->d_ecsParsed) {
1011 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
1012 } else if (o.first == EDNSOptionCode::NSID) {
f1db0de2 1013 const static string mode_server_id = ::arg()["server-id"];
8a42919a
PL
1014 if(mode_server_id != "disabled" && !mode_server_id.empty() &&
1015 maxanswersize > (2 + 2 + mode_server_id.size())) {
f1db0de2
PL
1016 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::NSID, mode_server_id));
1017 variableAnswer = true; // Can't packetcache an answer with NSID
1018 // Option Code and Option Length are both 2
1019 maxanswersize -= 2 + 2 + mode_server_id.size();
1020 }
b40562da
RG
1021 }
1022 }
10321a98 1023 }
b40562da
RG
1024 /* perhaps there was no EDNS or no ECS but by now we looked */
1025 dc->d_ecsParsed = true;
e325f20c 1026 vector<DNSRecord> ret;
ea634573 1027 vector<uint8_t> packet;
b23b8614 1028
ad42489c 1029 auto luaconfsLocal = g_luaconfs.getLocal();
b8470add
PL
1030 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
1031 bool wantsRPZ(true);
1fbc6dc5 1032 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
f1c7929a 1033 bool logResponse = false;
aa7929a3 1034#ifdef HAVE_PROTOBUF
63341e8d 1035 if (checkProtobufExport(luaconfsLocal)) {
f1c7929a 1036 logResponse = t_protobufServer && luaconfsLocal->protobufExportConfig.logResponses;
5cc8371b 1037 Netmask requestorNM(dc->d_source, dc->d_source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
e1c8a4bb 1038 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
0bd2e252 1039 pbMessage = RecProtoBufMessage(RecProtoBufMessage::Response, dc->d_uuid, &requestor, &dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass, dc->d_mdp.d_header.id, dc->d_tcp, 0);
c165308b 1040 pbMessage->setServerIdentity(SyncRes::s_serverID);
d362f7c1 1041 pbMessage->setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
d9d3f9c1
RG
1042 }
1043#endif /* HAVE_PROTOBUF */
ad42489c 1044
3ddb9247 1045 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
ea634573
BH
1046
1047 pw.getHeader()->aa=0;
1048 pw.getHeader()->ra=1;
c154c8a4 1049 pw.getHeader()->qr=1;
bb4bdbaf 1050 pw.getHeader()->tc=0;
ea634573 1051 pw.getHeader()->id=dc->d_mdp.d_header.id;
10321a98 1052 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
57769f13 1053 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
ea634573 1054
70fb28d9
RG
1055 /* This is the lowest TTL seen in the records of the response,
1056 so we can't cache it for longer than this value.
1057 If we have a TTL cap, this value can't be larger than the
1058 cap no matter what. */
1059 uint32_t minTTL = dc->d_ttlCap;
904d3219
PD
1060
1061 SyncRes sr(dc->d_now);
0c43f455 1062
2e921ec6 1063 bool DNSSECOK=false;
3457a2a0 1064 if(t_pdl) {
f26bf547 1065 sr.setLuaEngine(t_pdl);
3457a2a0 1066 }
9eec8c98 1067 if(g_dnssecmode != DNSSECMode::Off) {
30ee601a 1068 sr.setDoDNSSEC(true);
9eec8c98
PL
1069
1070 // Does the requestor want DNSSEC records?
d6c335ab 1071 if(edo.d_extFlags & EDNSOpts::DNSSECOK) {
9eec8c98
PL
1072 DNSSECOK=true;
1073 g_stats.dnssecQueries++;
1074 }
1075 } else {
1076 // Ignore the client-set CD flag
1077 pw.getHeader()->cd=0;
5b9853c9 1078 }
0c43f455
RG
1079 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
1080
4898a348 1081#ifdef HAVE_PROTOBUF
30ee601a 1082 sr.setInitialRequestId(dc->d_uuid);
63341e8d 1083 sr.setOutgoingProtobufServer(t_outgoingProtobufServer);
4898a348 1084#endif
0c43f455 1085
2fe3354d 1086 sr.setQuerySource(dc->d_remote, g_useIncomingECS && !dc->d_ednssubnet.source.empty() ? boost::optional<const EDNSSubnetOpts&>(dc->d_ednssubnet) : boost::none);
57769f13 1087
904d3219 1088 bool tracedQuery=false; // we could consider letting Lua know about this too
9fc36e90 1089 bool shouldNotValidate = false;
904d3219 1090
ef3b6cd7
RG
1091 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
1092 int res = RCode::NoError;
1f1ca368 1093 DNSFilterEngine::Policy appliedPolicy;
39ec5d29 1094 DNSRecord spoofed;
f1c7929a 1095 RecursorLua4::DNSQuestion dq(dc->d_source, dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ, logResponse);
d6c335ab 1096 dq.ednsFlags = &edo.d_extFlags;
5164bac3 1097 dq.ednsOptions = &ednsOpts;
6e505c5e
RG
1098 dq.tag = dc->d_tag;
1099 dq.discardedPolicies = &sr.d_discardedPolicies;
1100 dq.policyTags = &dc->d_policyTags;
1101 dq.appliedPolicy = &appliedPolicy;
1102 dq.currentRecords = &ret;
1103 dq.dh = &dc->d_mdp.d_header;
05c74122 1104 dq.data = dc->d_data;
67e31ebe
RG
1105#ifdef HAVE_PROTOBUF
1106 dq.requestorId = dc->d_requestorId;
590388d2 1107 dq.deviceId = dc->d_deviceId;
67e31ebe 1108#endif
ba21fcfe 1109
6cf96227
PL
1110 if(ednsExtRCode != 0) {
1111 goto sendit;
1112 }
1113
e661a20b 1114 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
56b4d21b
PD
1115 pw.getHeader()->tc = 1;
1116 res = 0;
1117 variableAnswer = true;
e661a20b
PD
1118 goto sendit;
1119 }
1120
f26bf547 1121 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
77499b05
BH
1122 sr.setLogMode(SyncRes::Store);
1123 tracedQuery=true;
1124 }
3ddb9247 1125
8f7473d7 1126
976ec823 1127 if(!g_quiet || tracedQuery) {
e6a9dde5 1128 g_log<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
976ec823 1129 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
b40562da 1130 if(!dc->d_ednssubnet.source.empty()) {
e6a9dde5 1131 g_log<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
6e986f5e 1132 }
e6a9dde5 1133 g_log<<endl;
976ec823 1134 }
c75a6a9e 1135
fededf47 1136 sr.setId(MT->getTid());
67828389 1137 if(!dc->d_mdp.d_header.rd)
c836dc19
BH
1138 sr.setCacheOnly();
1139
f26bf547
RG
1140 if (t_pdl) {
1141 t_pdl->prerpz(dq, res);
0a273054
RG
1142 }
1143
db486de5 1144 // Check if the query has a policy attached to it
0a273054 1145 if (wantsRPZ) {
5cc8371b 1146 appliedPolicy = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_source, sr.d_discardedPolicies);
0a273054 1147 }
644dd1da 1148
54be222b 1149 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
f26bf547 1150 if(!t_pdl || !t_pdl->preresolve(dq, res)) {
b8470add 1151
30ee601a 1152 sr.setWantsRPZ(wantsRPZ);
b8470add
PL
1153 if(wantsRPZ) {
1154 switch(appliedPolicy.d_kind) {
1155 case DNSFilterEngine::PolicyKind::NoAction:
1156 break;
1157 case DNSFilterEngine::PolicyKind::Drop:
1158 g_stats.policyDrops++;
7a25883a 1159 g_stats.policyResults[appliedPolicy.d_kind]++;
b8470add
PL
1160 delete dc;
1161 dc=0;
1162 return;
1163 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1164 g_stats.policyResults[appliedPolicy.d_kind]++;
1165 res=RCode::NXDomain;
1166 goto haveAnswer;
1167 case DNSFilterEngine::PolicyKind::NODATA:
1168 g_stats.policyResults[appliedPolicy.d_kind]++;
1169 res=RCode::NoError;
db486de5 1170 goto haveAnswer;
b8470add
PL
1171 case DNSFilterEngine::PolicyKind::Custom:
1172 g_stats.policyResults[appliedPolicy.d_kind]++;
1173 res=RCode::NoError;
a9e029ee 1174 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
b8470add 1175 ret.push_back(spoofed);
53508135 1176 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
b8470add
PL
1177 goto haveAnswer;
1178 case DNSFilterEngine::PolicyKind::Truncate:
1179 if(!dc->d_tcp) {
1180 g_stats.policyResults[appliedPolicy.d_kind]++;
1181 res=RCode::NoError;
1182 pw.getHeader()->tc=1;
1183 goto haveAnswer;
1184 }
1185 break;
1186 }
db486de5
PL
1187 }
1188
b8470add 1189 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
44971ca0
PD
1190 try {
1191 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
9fc36e90 1192 shouldNotValidate = sr.wasOutOfBand();
44971ca0
PD
1193 }
1194 catch(ImmediateServFailException &e) {
854d44e3 1195 if(g_logCommonErrors)
e6a9dde5 1196 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
44971ca0
PD
1197 res = RCode::ServFail;
1198 }
4485aa35 1199
1921a4c2
RG
1200 dq.validationState = sr.getValidationState();
1201
b8470add
PL
1202 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
1203 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
1204 appliedPolicy = sr.d_appliedPolicy;
1205 g_stats.policyResults[appliedPolicy.d_kind]++;
1206 switch(appliedPolicy.d_kind) {
1207 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
1208 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
1209 case DNSFilterEngine::PolicyKind::Drop:
1210 g_stats.policyDrops++;
1211 delete dc;
1212 dc=0;
1213 return;
1214 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1215 ret.clear();
1216 res=RCode::NXDomain;
1217 goto haveAnswer;
1218
1219 case DNSFilterEngine::PolicyKind::NODATA:
1220 ret.clear();
1221 res=RCode::NoError;
1222 goto haveAnswer;
1223
1224 case DNSFilterEngine::PolicyKind::Truncate:
1225 if(!dc->d_tcp) {
1226 ret.clear();
1227 res=RCode::NoError;
1228 pw.getHeader()->tc=1;
1229 goto haveAnswer;
1230 }
1231 break;
1232
1233 case DNSFilterEngine::PolicyKind::Custom:
1234 ret.clear();
1235 res=RCode::NoError;
a9e029ee 1236 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
b8470add 1237 ret.push_back(spoofed);
53508135 1238 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
b8470add
PL
1239 goto haveAnswer;
1240 }
1241 }
1242
1243 if (wantsRPZ) {
1f1ca368 1244 appliedPolicy = luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies);
b8470add 1245 }
db486de5 1246
f26bf547 1247 if(t_pdl) {
db486de5
PL
1248 if(res == RCode::NoError) {
1249 auto i=ret.cbegin();
1250 for(; i!= ret.cend(); ++i)
1251 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
1252 break;
f26bf547 1253 if(i == ret.cend() && t_pdl->nodata(dq, res))
3ca4e735
PL
1254 shouldNotValidate = true;
1255
db486de5 1256 }
f26bf547 1257 else if(res == RCode::NXDomain && t_pdl->nxdomain(dq, res))
3ca4e735 1258 shouldNotValidate = true;
db486de5 1259
f26bf547 1260 if(t_pdl->postresolve(dq, res))
3ca4e735 1261 shouldNotValidate = true;
db486de5
PL
1262 }
1263
b8470add
PL
1264 if (wantsRPZ) { //XXX This block is repeated, see above
1265 g_stats.policyResults[appliedPolicy.d_kind]++;
1266 switch(appliedPolicy.d_kind) {
1267 case DNSFilterEngine::PolicyKind::NoAction:
1268 break;
1269 case DNSFilterEngine::PolicyKind::Drop:
1270 g_stats.policyDrops++;
1271 delete dc;
1272 dc=0;
1273 return;
1274 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1275 ret.clear();
1276 res=RCode::NXDomain;
1277 goto haveAnswer;
1278
1279 case DNSFilterEngine::PolicyKind::NODATA:
1280 ret.clear();
1281 res=RCode::NoError;
1282 goto haveAnswer;
1283
1284 case DNSFilterEngine::PolicyKind::Truncate:
1285 if(!dc->d_tcp) {
1286 ret.clear();
1287 res=RCode::NoError;
1288 pw.getHeader()->tc=1;
1289 goto haveAnswer;
1290 }
1291 break;
1292
1293 case DNSFilterEngine::PolicyKind::Custom:
1294 ret.clear();
1295 res=RCode::NoError;
a9e029ee 1296 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
b8470add 1297 ret.push_back(spoofed);
53508135 1298 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
b8470add
PL
1299 goto haveAnswer;
1300 }
644dd1da 1301 }
4485aa35 1302 }
644dd1da 1303 haveAnswer:;
3e8216c8 1304 if(res == PolicyDecision::DROP) {
e9c2ad3a 1305 g_stats.policyDrops++;
ae7e77ad 1306 delete dc;
1307 dc=0;
1308 return;
3ddb9247 1309 }
9cdfab64 1310 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1dc8f4d0 1311 {
85ffbc53
PD
1312 string trace(sr.getTrace());
1313 if(!trace.empty()) {
1314 vector<string> lines;
1315 boost::split(lines, trace, boost::is_any_of("\n"));
1dc8f4d0 1316 for(const string& line : lines) {
85ffbc53 1317 if(!line.empty())
e6a9dde5 1318 g_log<<Logger::Warning<< line << endl;
85ffbc53
PD
1319 }
1320 }
1321 }
3ddb9247 1322
9cdfab64 1323 if(res == -1) {
0fe1d080
PD
1324 pw.getHeader()->rcode=RCode::ServFail;
1325 // no commit here, because no record
1326 g_stats.servFails++;
1327 }
288f4aa9 1328 else {
ea634573 1329 pw.getHeader()->rcode=res;
92011b8f 1330
f3fe4ae6 1331 // Does the validation mode or query demand validation?
0c43f455 1332 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
b25cae9a 1333 try {
f3fe4ae6 1334 if(sr.doLog()) {
e6a9dde5 1335 g_log<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<endl;
2e921ec6 1336 }
4d2be65d
RG
1337
1338 auto state = sr.getValidationState();
1339
b25cae9a 1340 if(state == Secure) {
2e921ec6 1341 if(sr.doLog()) {
e6a9dde5 1342 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates correctly"<<endl;
2e921ec6 1343 }
b25cae9a 1344
1345 // Is the query source interested in the value of the ad-bit?
885c8881 1346 if (dc->d_mdp.d_header.ad || DNSSECOK)
b25cae9a 1347 pw.getHeader()->ad=1;
1348 }
1349 else if(state == Insecure) {
f3fe4ae6 1350 if(sr.doLog()) {
e6a9dde5 1351 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Insecure"<<endl;
12ce523e 1352 }
b25cae9a 1353
1354 pw.getHeader()->ad=0;
f3fe4ae6 1355 }
b25cae9a 1356 else if(state == Bogus) {
66f2e6ad
KM
1357 if(t_bogusremotes)
1358 t_bogusremotes->push_back(dc->d_source);
1359 if(t_bogusqueryring)
1360 t_bogusqueryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
c87e1876 1361 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
e6a9dde5 1362 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Bogus"<<endl;
b25cae9a 1363 }
1364
1365 // Does the query or validation mode sending out a SERVFAIL on validation errors?
885c8881 1366 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
b25cae9a 1367 if(sr.doLog()) {
e6a9dde5 1368 g_log<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
b25cae9a 1369 }
1370
1371 pw.getHeader()->rcode=RCode::ServFail;
1372 goto sendit;
1373 } else {
1374 if(sr.doLog()) {
e6a9dde5 1375 g_log<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
b25cae9a 1376 }
1377 }
1378 }
1379 }
1380 catch(ImmediateServFailException &e) {
1381 if(g_logCommonErrors)
e6a9dde5 1382 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
b25cae9a 1383 pw.getHeader()->rcode=RCode::ServFail;
1384 goto sendit;
f3fe4ae6 1385 }
b3f0ed10 1386 }
1387
c154c8a4 1388 if(ret.size()) {
92476c8b 1389 orderAndShuffle(ret);
5cc8371b 1390 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_source)) {
20d84f77 1391 stable_sort(ret.begin(), ret.end(), *sl);
3e61e7f7 1392 variableAnswer=true;
1393 }
8e079f3a 1394 }
0afa32d4
RG
1395
1396 bool needCommit = false;
8e079f3a 1397 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
3e80ebce
KM
1398 if( ! DNSSECOK &&
1399 ( i->d_type == QType::NSEC3 ||
1400 (
1401 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1402 (
1403 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1404 i->d_place != DNSResourceRecord::ANSWER
1405 )
1406 )
1407 )
1408 ) {
2e921ec6 1409 continue;
3e80ebce
KM
1410 }
1411
70fb28d9 1412 if (!addRecordToPacket(pw, *i, minTTL, dc->d_ttlCap, maxanswersize)) {
97c6d7e5
RG
1413 needCommit = false;
1414 break;
1415 }
1416 needCommit = true;
1417
41c542ec
NC
1418#ifdef NOD_ENABLED
1419 bool udr = false;
1420 if (g_udrEnabled) {
1421 udr = udrCheckUniqueDNSRecord(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, *i);
1422 }
1423#endif /* NOD ENABLED */
1424
aa7929a3 1425#ifdef HAVE_PROTOBUF
0bd2e252 1426 if(t_protobufServer) {
41c542ec
NC
1427#ifdef NOD_ENABLED
1428 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes, udr);
1429#else
0bd2e252 1430 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes);
41c542ec 1431#endif /* NOD_ENABLED */
aa7929a3
RG
1432 }
1433#endif
ea634573 1434 }
0afa32d4 1435 if(needCommit)
8e079f3a 1436 pw.commit();
288f4aa9 1437 }
10321a98 1438 sendit:;
b3f0ed10 1439
97c6d7e5
RG
1440 if (haveEDNS) {
1441 /* we try to add the EDNS OPT RR even for truncated answers,
1442 as rfc6891 states:
1443 "The minimal response MUST be the DNS header, question section, and an
1444 OPT record. This MUST also occur when a truncated response (using
1445 the DNS header's TC bit) is returned."
1446 */
9b60fb71 1447 pw.addOpt(512, ednsExtRCode, DNSSECOK ? EDNSOpts::DNSSECOK : 0, returnedEdnsOptions);
1f691b94 1448 pw.commit();
97c6d7e5
RG
1449 }
1450
79332bff 1451 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
5cc8371b 1452 updateResponseStats(res, dc->d_source, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
aa7929a3 1453#ifdef HAVE_PROTOBUF
845cbf4c 1454 if (t_protobufServer && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && (!appliedPolicy.d_name || appliedPolicy.d_name->empty()) && dc->d_policyTags.empty())) {
d362f7c1
RG
1455 pbMessage->setBytes(packet.size());
1456 pbMessage->setResponseCode(pw.getHeader()->rcode);
0a273054 1457 if (appliedPolicy.d_name) {
d362f7c1
RG
1458 pbMessage->setAppliedPolicy(*appliedPolicy.d_name);
1459 pbMessage->setAppliedPolicyType(appliedPolicy.d_type);
0a273054 1460 }
d362f7c1
RG
1461 pbMessage->setPolicyTags(dc->d_policyTags);
1462 pbMessage->setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1463 pbMessage->setRequestorId(dq.requestorId);
1464 pbMessage->setDeviceId(dq.deviceId);
41c542ec
NC
1465#ifdef NOD_ENABLED
1466 if (g_nodEnabled) {
1467 if (nodCheckNewDomain(dc->d_mdp.d_qname))
1468 pbMessage->setNOD(true);
1469 }
1470#endif /* NOD_ENABLED */
63341e8d 1471 protobufLogResponse(t_protobufServer, *pbMessage);
ac238ea7
NC
1472#ifdef NOD_ENABLED
1473 pbMessage->setNOD(false);
1474 pbMessage->clearUDR();
1475#endif /* NOD_ENABLED */
aa7929a3
RG
1476 }
1477#endif
ea634573 1478 if(!dc->d_tcp) {
b71b60ee 1479 struct msghdr msgh;
1480 struct iovec iov;
1481 char cbuf[256];
1482 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
2c0af54f
PD
1483 msgh.msg_control=NULL;
1484
cbc03320 1485 if(g_fromtosockets.count(dc->d_socket)) {
fbe2a2e0 1486 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
2c0af54f 1487 }
cbc03320 1488 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
e6a9dde5 1489 g_log<<Logger::Warning<<"Sending UDP reply to client "<<dc->getRemote()<<" failed with: "<<strerror(errno)<<endl;
70fb28d9 1490
3762e821 1491 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
e9f63d47 1492 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
76e2b9e3 1493 string((const char*)&*packet.begin(), packet.size()),
3ddb9247 1494 g_now.tv_sec,
76e2b9e3 1495 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
d9d3f9c1 1496 min(minTTL,SyncRes::s_packetcachettl),
88694a6a 1497 dq.validationState,
d362f7c1 1498 pbMessage);
1051f8a9 1499 }
3762e821 1500 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
feccc9fc 1501 }
9c495589
BH
1502 else {
1503 char buf[2];
ea634573
BH
1504 buf[0]=packet.size()/256;
1505 buf[1]=packet.size()%256;
feccc9fc 1506
c038218b 1507 Utility::iovec iov[2];
feccc9fc 1508
ea634573
BH
1509 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1510 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
feccc9fc 1511
dd079764 1512 int wret=Utility::writev(dc->d_socket, iov, 2);
0e9d9ce2 1513 bool hadError=true;
feccc9fc 1514
dd079764 1515 if(wret == 0)
e6a9dde5 1516 g_log<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
dd079764 1517 else if(wret < 0 )
e6a9dde5 1518 g_log<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
dd079764 1519 else if((unsigned int)wret != 2 + packet.size())
e6a9dde5 1520 g_log<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
0e9d9ce2 1521 else
18af64a8 1522 hadError=false;
3ddb9247 1523
09e6702a 1524 // update tcp connection status, either by closing or moving to 'BYTE0'
3ddb9247 1525
09e6702a 1526 if(hadError) {
18af64a8 1527 // no need to remove us from FDM, we weren't there
c36bc97a 1528 dc->d_socket = -1;
09e6702a 1529 }
a6ae6414 1530 else {
fde296a3
RG
1531 dc->d_tcpConnection->queriesCount++;
1532 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1533 dc->d_socket = -1;
1534 }
1535 else {
1536 dc->d_tcpConnection->state=TCPConnection::BYTE0;
1537 Utility::gettimeofday(&g_now, 0); // needs to be updated
1538 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection);
1539 t_fdm->setReadTTD(dc->d_socket, g_now, g_tcpTimeout);
1540 }
0e9d9ce2 1541 }
9c495589 1542 }
2c9119cd 1543 float spent=makeFloat(sr.getNow()-dc->d_now);
1d5b3ce6 1544 if(!g_quiet) {
e6a9dde5
PL
1545 g_log<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1546 g_log<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
2c9119cd 1547 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
1548 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
1549
1550 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
e6a9dde5 1551 g_log<< ", dnssec="<<vStates[sr.getValidationState()];
2c9119cd 1552 }
1553
e6a9dde5 1554 g_log<<endl;
2c9119cd 1555
c75a6a9e 1556 }
b23b8614 1557
f7b8cffa
RG
1558 if (sr.d_outqueries || sr.d_authzonequeries) {
1559 t_RC->cacheMisses++;
1560 }
1561 else {
1562 t_RC->cacheHits++;
1563 }
2c9119cd 1564
fe213470
BH
1565 if(spent < 0.001)
1566 g_stats.answers0_1++;
1567 else if(spent < 0.010)
1568 g_stats.answers1_10++;
1569 else if(spent < 0.1)
1570 g_stats.answers10_100++;
1571 else if(spent < 1.0)
1572 g_stats.answers100_1000++;
1573 else
1574 g_stats.answersSlow++;
1575
574af7ea 1576 uint64_t newLat=(uint64_t)(spent*1000000);
b841314c 1577 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
08f3f638 1578 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
0a6b1027 1579 // no worries, we do this for packet cache hits elsewhere
19178da9 1580
1581 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
1582 if(ourtime < 1)
1583 g_stats.ourtime0_1++;
1584 else if(ourtime < 2)
1585 g_stats.ourtime1_2++;
1586 else if(ourtime < 4)
1587 g_stats.ourtime2_4++;
1588 else if(ourtime < 8)
1589 g_stats.ourtime4_8++;
1590 else if(ourtime < 16)
1591 g_stats.ourtime8_16++;
1592 else if(ourtime < 32)
1593 g_stats.ourtime16_32++;
1594 else {
1595 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1596 g_stats.ourtimeSlow++;
1597 }
042da1a1 1598 if(ourtime >= 0.0) {
1599 newLat=ourtime*1000; // usec
1600 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
1601 }
c6d04bdc 1602 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
ea634573 1603 delete dc;
c36bc97a 1604 dc=0;
288f4aa9 1605 }
3f81d239 1606 catch(PDNSException &ae) {
e6a9dde5 1607 g_log<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
c36bc97a 1608 delete dc;
288f4aa9 1609 }
16ce7f18
JS
1610 catch(const MOADNSException &mde) {
1611 g_log<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<mde.what()<<endl;
c36bc97a 1612 delete dc;
7b1469bb 1613 }
fdbf35ac 1614 catch(std::exception& e) {
e6a9dde5 1615 g_log<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
068c7634
PD
1616
1617 // Luawrapper nests the exception from Lua, so we unnest it here
1618 try {
1619 std::rethrow_if_nested(e);
2010ac95 1620 } catch(const std::exception& ne) {
e6a9dde5 1621 g_log<<". Extra info: "<<ne.what();
068c7634
PD
1622 } catch(...) {}
1623
e6a9dde5 1624 g_log<<endl;
c36bc97a 1625 delete dc;
c154c8a4 1626 }
288f4aa9 1627 catch(...) {
e6a9dde5 1628 g_log<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
cbb097d8 1629 delete dc;
288f4aa9 1630 }
3ddb9247 1631
ec6eacbc 1632 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
288f4aa9
BH
1633}
1634
d187038c 1635static void makeControlChannelSocket(int processNum=-1)
1d5b3ce6 1636{
2d733c0f 1637 string sockname=::arg()["socket-dir"]+"/"+s_programname;
677e2a46 1638 if(processNum >= 0)
335da0ba 1639 sockname += "."+std::to_string(processNum);
677e2a46 1640 sockname+=".controlsocket";
41f7a068 1641 s_rcc.listen(sockname);
3ddb9247 1642
387de317
BH
1643 int sockowner = -1;
1644 int sockgroup = -1;
1645
1646 if (!::arg().isEmpty("socket-group"))
1647 sockgroup=::arg().asGid("socket-group");
1648 if (!::arg().isEmpty("socket-owner"))
1649 sockowner=::arg().asUid("socket-owner");
3ddb9247 1650
f838ad8d
BH
1651 if (sockgroup > -1 || sockowner > -1) {
1652 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1653 unixDie("Failed to chown control socket");
1654 }
1655 }
387de317
BH
1656
1657 // do mode change if socket-mode is given
1658 if(!::arg().isEmpty("socket-mode")) {
1659 mode_t sockmode=::arg().asMode("socket-mode");
34c513f9
RG
1660 if(chmod(sockname.c_str(), sockmode) < 0) {
1661 unixDie("Failed to chmod control socket");
1662 }
387de317 1663 }
1d5b3ce6
BH
1664}
1665
5cc8371b 1666static void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass,
29e6303a 1667 bool& foundECS, EDNSSubnetOpts* ednssubnet, EDNSOptionViewMap* options,
5cc8371b 1668 bool& foundXPF, ComboAddress* xpfSource, ComboAddress* xpfDest)
02b47f43 1669{
59cb4a79 1670 const bool lookForXPF = xpfSource != nullptr && g_xpfRRCode != 0;
5cc8371b
RG
1671 const bool lookForECS = ednssubnet != nullptr;
1672 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(question.c_str());
02b47f43
RG
1673 size_t questionLen = question.length();
1674 unsigned int consumed=0;
1675 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1676
1677 size_t pos= sizeof(dnsheader)+consumed+4;
5cc8371b
RG
1678 const size_t headerSize = /* root */ 1 + sizeof(dnsrecordheader);
1679 const uint16_t arcount = ntohs(dh->arcount);
1680
1681 for (uint16_t arpos = 0; arpos < arcount && questionLen > (pos + headerSize) && ((lookForECS && !foundECS) || (lookForXPF && !foundXPF)); arpos++) {
1682 if (question.at(pos) != 0) {
1683 /* not an OPT or a XPF, bye. */
1684 return;
1685 }
1686
1687 pos += 1;
1688 const dnsrecordheader* drh = reinterpret_cast<const dnsrecordheader*>(&question.at(pos));
1689 pos += sizeof(dnsrecordheader);
1690
1691 if (pos >= questionLen) {
1692 return;
1693 }
1694
02b47f43 1695 /* OPT root label (1) followed by type (2) */
5cc8371b 1696 if(lookForECS && ntohs(drh->d_type) == QType::OPT) {
00b8cadc
RG
1697 if (!options) {
1698 char* ecsStart = nullptr;
1699 size_t ecsLen = 0;
5cc8371b
RG
1700 /* we need to pass the record len */
1701 int res = getEDNSOption(const_cast<char*>(reinterpret_cast<const char*>(&question.at(pos - sizeof(drh->d_clen)))), questionLen - pos + sizeof(drh->d_clen), EDNSOptionCode::ECS, &ecsStart, &ecsLen);
00b8cadc
RG
1702 if (res == 0 && ecsLen > 4) {
1703 EDNSSubnetOpts eso;
1704 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1705 *ednssubnet=eso;
5cc8371b 1706 foundECS = true;
00b8cadc
RG
1707 }
1708 }
1709 }
1710 else {
5cc8371b
RG
1711 /* we need to pass the record len */
1712 int res = getEDNSOptions(reinterpret_cast<const char*>(&question.at(pos -sizeof(drh->d_clen))), questionLen - pos + (sizeof(drh->d_clen)), *options);
00b8cadc
RG
1713 if (res == 0) {
1714 const auto& it = options->find(EDNSOptionCode::ECS);
29e6303a 1715 if (it != options->end() && !it->second.values.empty() && it->second.values.at(0).content != nullptr && it->second.values.at(0).size > 0) {
00b8cadc 1716 EDNSSubnetOpts eso;
29e6303a 1717 if(getEDNSSubnetOptsFromString(it->second.values.at(0).content, it->second.values.at(0).size, &eso)) {
00b8cadc 1718 *ednssubnet=eso;
5cc8371b 1719 foundECS = true;
00b8cadc
RG
1720 }
1721 }
02b47f43
RG
1722 }
1723 }
1724 }
59cb4a79 1725 else if (lookForXPF && ntohs(drh->d_type) == g_xpfRRCode && ntohs(drh->d_class) == QClass::IN && drh->d_ttl == 0) {
5cc8371b
RG
1726 if ((questionLen - pos) < ntohs(drh->d_clen)) {
1727 return;
1728 }
1729
1730 foundXPF = parseXPFPayload(reinterpret_cast<const char*>(&question.at(pos)), ntohs(drh->d_clen), *xpfSource, xpfDest);
1731 }
1732
1733 pos += ntohs(drh->d_clen);
02b47f43
RG
1734 }
1735}
1736
d187038c 1737static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1738{
cd989c87 1739 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
c038218b 1740
879b3f70 1741 if(conn->state==TCPConnection::BYTE0) {
2749c3fe 1742 ssize_t bytes=recv(conn->getFD(), &conn->data[0], 2, 0);
09e6702a 1743 if(bytes==1)
667f7e60 1744 conn->state=TCPConnection::BYTE1;
3ddb9247 1745 if(bytes==2) {
a0aa4f64 1746 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2749c3fe 1747 conn->data.resize(conn->qlen);
667f7e60
BH
1748 conn->bytesread=0;
1749 conn->state=TCPConnection::GETQUESTION;
09e6702a
BH
1750 }
1751 if(!bytes || bytes < 0) {
bb4bdbaf 1752 t_fdm->removeReadFD(fd);
09e6702a
BH
1753 return;
1754 }
1755 }
667f7e60 1756 else if(conn->state==TCPConnection::BYTE1) {
2749c3fe 1757 ssize_t bytes=recv(conn->getFD(), &conn->data[1], 1, 0);
09e6702a 1758 if(bytes==1) {
667f7e60 1759 conn->state=TCPConnection::GETQUESTION;
a0aa4f64 1760 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2749c3fe 1761 conn->data.resize(conn->qlen);
667f7e60 1762 conn->bytesread=0;
09e6702a
BH
1763 }
1764 if(!bytes || bytes < 0) {
1765 if(g_logCommonErrors)
e6a9dde5 1766 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected after first byte"<<endl;
bb4bdbaf 1767 t_fdm->removeReadFD(fd);
09e6702a
BH
1768 return;
1769 }
1770 }
667f7e60 1771 else if(conn->state==TCPConnection::GETQUESTION) {
2749c3fe 1772 ssize_t bytes=recv(conn->getFD(), &conn->data[conn->bytesread], conn->qlen - conn->bytesread, 0);
f9d67b41 1773 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
c0f9be19
RG
1774 if(g_logCommonErrors) {
1775 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected while reading question body"<<endl;
1776 }
bb4bdbaf 1777 t_fdm->removeReadFD(fd);
09e6702a
BH
1778 return;
1779 }
b841314c 1780 conn->bytesread+=(uint16_t)bytes;
667f7e60 1781 if(conn->bytesread==conn->qlen) {
bb4bdbaf 1782 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
879b3f70 1783
f26bf547 1784 DNSComboWriter* dc=nullptr;
09e6702a 1785 try {
2749c3fe 1786 dc=new DNSComboWriter(conn->data, g_now);
09e6702a 1787 }
16ce7f18 1788 catch(const MOADNSException &mde) {
3ddb9247 1789 g_stats.clientParseError++;
4957a608 1790 if(g_logCommonErrors)
e6a9dde5 1791 g_log<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4957a608 1792 return;
09e6702a 1793 }
cd989c87
BH
1794 dc->d_tcpConnection = conn; // carry the torch
1795 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
09e6702a 1796 dc->d_tcp=true;
5cc8371b
RG
1797 dc->setRemote(conn->d_remote);
1798 dc->setSource(conn->d_remote);
a6147cd2 1799 ComboAddress dest;
d38e2ba9 1800 dest.reset();
a6147cd2 1801 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1802 socklen_t len = dest.getSocklen();
1803 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1804 dc->setLocal(dest);
5cc8371b 1805 dc->setDestination(dest);
33dcceba
RG
1806 DNSName qname;
1807 uint16_t qtype=0;
1808 uint16_t qclass=0;
1809 bool needECS = false;
5cc8371b 1810 bool needXPF = g_XPFAcl.match(conn->d_remote);
67e31ebe 1811 string requestorId;
590388d2 1812 string deviceId;
16bbc6e3 1813 bool logQuery = false;
aa7929a3 1814#ifdef HAVE_PROTOBUF
02b47f43 1815 auto luaconfsLocal = g_luaconfs.getLocal();
63341e8d 1816 if (checkProtobufExport(luaconfsLocal)) {
33dcceba
RG
1817 needECS = true;
1818 }
16bbc6e3 1819 logQuery = t_protobufServer && luaconfsLocal->protobufExportConfig.logQueries;
33dcceba
RG
1820#endif
1821
70fb28d9 1822 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag_ffi || t_pdl->d_gettag))) {
33dcceba
RG
1823
1824 try {
29e6303a 1825 EDNSOptionViewMap ednsOptions;
5cc8371b 1826 bool xpfFound = false;
b40562da 1827 dc->d_ecsParsed = true;
5cc8371b 1828 dc->d_ecsFound = false;
2749c3fe 1829 getQNameAndSubnet(conn->data, &qname, &qtype, &qclass,
5cc8371b
RG
1830 dc->d_ecsFound, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
1831 xpfFound, needXPF ? &dc->d_source : nullptr, needXPF ? &dc->d_destination : nullptr);
02b47f43 1832
70fb28d9 1833 if(t_pdl) {
33dcceba 1834 try {
70fb28d9 1835 if (t_pdl->d_gettag_ffi) {
f1c7929a 1836 dc->d_tag = t_pdl->gettag_ffi(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, dc->d_ttlCap, dc->d_variable, logQuery);
70fb28d9
RG
1837 }
1838 else if (t_pdl->d_gettag) {
1839 dc->d_tag = t_pdl->gettag(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId);
1840 }
33dcceba 1841 }
70fb28d9 1842 catch(const std::exception& e) {
33dcceba 1843 if(g_logCommonErrors)
e6a9dde5 1844 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
33dcceba
RG
1845 }
1846 }
1847 }
70fb28d9 1848 catch(const std::exception& e)
33dcceba
RG
1849 {
1850 if(g_logCommonErrors)
e6a9dde5 1851 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
33dcceba
RG
1852 }
1853 }
f52177c3
RG
1854
1855 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(&conn->data[0]);
1856
33dcceba 1857#ifdef HAVE_PROTOBUF
63341e8d 1858 if(t_protobufServer || t_outgoingProtobufServer) {
67e31ebe 1859 dc->d_requestorId = requestorId;
590388d2 1860 dc->d_deviceId = deviceId;
02b47f43 1861 dc->d_uuid = (*t_uuidGenerator)();
4898a348 1862 }
02b47f43 1863
63341e8d 1864 if(t_protobufServer) {
02b47f43 1865 try {
02b47f43 1866
845cbf4c 1867 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && dc->d_policyTags.empty())) {
63341e8d 1868 protobufLogQuery(t_protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, dc->d_source, dc->d_destination, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId);
b790ef3d 1869 }
02b47f43
RG
1870 }
1871 catch(std::exception& e) {
1872 if(g_logCommonErrors)
e6a9dde5 1873 g_log<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
02b47f43
RG
1874 }
1875 }
aa7929a3 1876#endif
879b3f70 1877 if(dc->d_mdp.d_header.qr) {
048f5db6 1878 g_stats.ignoredCount++;
c0f9be19
RG
1879 if(g_logCommonErrors) {
1880 g_log<<Logger::Error<<"Ignoring answer from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
1881 }
cf14c141 1882 delete dc;
4957a608 1883 return;
879b3f70 1884 }
3abcdab2 1885 if(dc->d_mdp.d_header.opcode) {
048f5db6 1886 g_stats.ignoredCount++;
c0f9be19
RG
1887 if(g_logCommonErrors) {
1888 g_log<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
1889 }
1890 delete dc;
1891 return;
1892 }
1893 else if (dh->qdcount == 0) {
1894 g_stats.emptyQueriesCount++;
1895 if(g_logCommonErrors) {
1896 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<< dc->getRemote() <<" on server socket!"<<endl;
1897 }
cf14c141 1898 delete dc;
3abcdab2
PD
1899 return;
1900 }
09e6702a 1901 else {
4957a608
BH
1902 ++g_stats.qcounter;
1903 ++g_stats.tcpqcounter;
50a5ef72 1904 MT->makeThread(startDoResolve, dc); // deletes dc, will set state to BYTE0 again
4957a608 1905 return;
09e6702a
BH
1906 }
1907 }
1908 }
1909}
1910
6dcd28c3 1911//! Handle new incoming TCP connection
d187038c 1912static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
09e6702a 1913{
37d3f960 1914 ComboAddress addr;
09e6702a 1915 socklen_t addrlen=sizeof(addr);
a683e8bd 1916 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
b841314c 1917 if(newsock>=0) {
85c32340
BH
1918 if(MT->numProcesses() > g_maxMThreads) {
1919 g_stats.overCapacityDrops++;
a7b68ae7
RG
1920 try {
1921 closesocket(newsock);
1922 }
1923 catch(const PDNSException& e) {
e6a9dde5 1924 g_log<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
a7b68ae7 1925 }
85c32340
BH
1926 return;
1927 }
1928
92011b8f 1929 if(t_remotes)
1930 t_remotes->push_back(addr);
49a699c4 1931 if(t_allowFrom && !t_allowFrom->match(&addr)) {
3ddb9247 1932 if(!g_quiet)
e6a9dde5 1933 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2914b022 1934
09e6702a 1935 g_stats.unauthorizedTCP++;
a7b68ae7
RG
1936 try {
1937 closesocket(newsock);
1938 }
1939 catch(const PDNSException& e) {
e6a9dde5 1940 g_log<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
a7b68ae7 1941 }
09e6702a
BH
1942 return;
1943 }
bd0289fc 1944 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
09e6702a 1945 g_stats.tcpClientOverflow++;
a7b68ae7
RG
1946 try {
1947 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
1948 }
1949 catch(const PDNSException& e) {
e6a9dde5 1950 g_log<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
a7b68ae7 1951 }
09e6702a
BH
1952 return;
1953 }
3ddb9247 1954
3897b9e1 1955 setNonBlocking(newsock);
f26bf547 1956 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
cd989c87 1957 tc->state=TCPConnection::BYTE0;
3ddb9247 1958
cd989c87 1959 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc);
c038218b 1960
0bff046b 1961 struct timeval now;
c038218b 1962 Utility::gettimeofday(&now, 0);
cd989c87 1963 t_fdm->setReadTTD(tc->getFD(), now, g_tcpTimeout);
09e6702a
BH
1964 }
1965}
3ddb9247 1966
d187038c 1967static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
1bc3c142 1968{
183eb877 1969 gettimeofday(&g_now, 0);
b71b60ee 1970 struct timeval diff = g_now - tv;
1971 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
183eb877 1972
22cf1fda 1973 if(tv.tv_sec && delta > 1000.0) {
b71b60ee 1974 g_stats.tooOldDrops++;
1975 return 0;
1976 }
1977
1bc3c142 1978 ++g_stats.qcounter;
d7f10541
BH
1979 if(fromaddr.sin4.sin_family==AF_INET6)
1980 g_stats.ipv6qcounter++;
1bc3c142
BH
1981
1982 string response;
93f0da94 1983 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
49a3500d 1984 unsigned int ctag=0;
f57486f1 1985 uint32_t qhash = 0;
12aff2e5 1986 bool needECS = false;
5cc8371b 1987 bool needXPF = g_XPFAcl.match(fromaddr);
02b47f43 1988 std::vector<std::string> policyTags;
5fd2577f 1989 LuaContext::LuaObject data;
5cc8371b
RG
1990 ComboAddress source = fromaddr;
1991 ComboAddress destination = destaddr;
67e31ebe 1992 string requestorId;
590388d2 1993 string deviceId;
16bbc6e3 1994 bool logQuery = false;
12aff2e5 1995#ifdef HAVE_PROTOBUF
02b47f43 1996 boost::uuids::uuid uniqueId;
02b47f43 1997 auto luaconfsLocal = g_luaconfs.getLocal();
63341e8d 1998 if (checkProtobufExport(luaconfsLocal)) {
4898a348 1999 uniqueId = (*t_uuidGenerator)();
02b47f43 2000 needECS = true;
63341e8d 2001 } else if (checkOutgoingProtobufExport(luaconfsLocal)) {
02b47f43
RG
2002 uniqueId = (*t_uuidGenerator)();
2003 }
16bbc6e3 2004 logQuery = t_protobufServer && luaconfsLocal->protobufExportConfig.logQueries;
f1c7929a 2005 bool logResponse = t_protobufServer && luaconfsLocal->protobufExportConfig.logResponses;
12aff2e5 2006#endif
b40562da
RG
2007 EDNSSubnetOpts ednssubnet;
2008 bool ecsFound = false;
2009 bool ecsParsed = false;
70fb28d9
RG
2010 uint32_t ttlCap = std::numeric_limits<uint32_t>::max();
2011 bool variable = false;
1bc3c142 2012 try {
02b47f43
RG
2013 DNSName qname;
2014 uint16_t qtype=0;
2015 uint16_t qclass=0;
1bc3c142 2016 uint32_t age;
c15ff3df 2017 bool qnameParsed=false;
8f7473d7 2018#ifdef MALLOC_TRACE
2019 /*
2020 static uint64_t last=0;
2021 if(!last)
2022 g_mtracer->clearAllocators();
2023 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
2024 last=g_mtracer->getAllocs();
2025 cout<<g_mtracer->topAllocatorsString()<<endl;
2026 g_mtracer->clearAllocators();
2027 */
2028#endif
55a1378f 2029
70fb28d9 2030 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag || t_pdl->d_gettag_ffi))) {
b2eacd67 2031 try {
29e6303a 2032 EDNSOptionViewMap ednsOptions;
5cc8371b
RG
2033 bool xpfFound = false;
2034
2035 ecsFound = false;
2036
2037 getQNameAndSubnet(question, &qname, &qtype, &qclass,
2038 ecsFound, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2039 xpfFound, needXPF ? &source : nullptr, needXPF ? &destination : nullptr);
2040
c15ff3df
RG
2041 qnameParsed = true;
2042 ecsParsed = true;
12aff2e5 2043
70fb28d9 2044 if(t_pdl) {
12aff2e5 2045 try {
70fb28d9 2046 if (t_pdl->d_gettag_ffi) {
f1c7929a 2047 ctag = t_pdl->gettag_ffi(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, ttlCap, variable, logQuery);
70fb28d9
RG
2048 }
2049 else if (t_pdl->d_gettag) {
2050 ctag = t_pdl->gettag(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId);
2051 }
12aff2e5 2052 }
70fb28d9 2053 catch(const std::exception& e) {
12aff2e5 2054 if(g_logCommonErrors)
e6a9dde5 2055 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 2056 }
8ea8c302 2057 }
b2eacd67 2058 }
70fb28d9 2059 catch(const std::exception& e)
b2eacd67 2060 {
2061 if(g_logCommonErrors)
e6a9dde5 2062 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 2063 }
12ce523e 2064 }
3ddb9247 2065
02b47f43 2066 bool cacheHit = false;
1fbc6dc5 2067 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
02b47f43 2068#ifdef HAVE_PROTOBUF
63341e8d 2069 if(t_protobufServer) {
d362f7c1 2070 pbMessage = RecProtoBufMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
c165308b 2071 pbMessage->setServerIdentity(SyncRes::s_serverID);
845cbf4c 2072 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && policyTags.empty())) {
63341e8d 2073 protobufLogQuery(t_protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, source, destination, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId);
b790ef3d 2074 }
d9d3f9c1
RG
2075 }
2076#endif /* HAVE_PROTOBUF */
02b47f43 2077
70fb28d9
RG
2078 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
2079 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
2080 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
8467ec26 2081 vState valState;
c15ff3df 2082 if (qnameParsed) {
8467ec26 2083 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &valState, &qhash, pbMessage ? &(*pbMessage) : nullptr));
c15ff3df
RG
2084 }
2085 else {
8467ec26 2086 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, &qtype, &qclass, g_now.tv_sec, &response, &age, &valState, &qhash, pbMessage ? &(*pbMessage) : nullptr));
c15ff3df
RG
2087 }
2088
d9d3f9c1 2089 if (cacheHit) {
8467ec26
KM
2090 if(valState == Bogus) {
2091 if(t_bogusremotes)
2092 t_bogusremotes->push_back(source);
2093 if(t_bogusqueryring)
2094 t_bogusqueryring->push_back(make_pair(qname, qtype));
2095 }
2096
d9d3f9c1 2097#ifdef HAVE_PROTOBUF
845cbf4c 2098 if(t_protobufServer && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && pbMessage->getAppliedPolicy().empty() && pbMessage->getPolicyTags().empty())) {
5cc8371b 2099 Netmask requestorNM(source, source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
e1c8a4bb 2100 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
d362f7c1
RG
2101 pbMessage->update(uniqueId, &requestor, &destination, false, dh->id);
2102 pbMessage->setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
2103 pbMessage->setQueryTime(g_now.tv_sec, g_now.tv_usec);
2104 pbMessage->setRequestorId(requestorId);
2105 pbMessage->setDeviceId(deviceId);
63341e8d 2106 protobufLogResponse(t_protobufServer, *pbMessage);
02b47f43 2107 }
d9d3f9c1 2108#endif /* HAVE_PROTOBUF */
49a3500d 2109 if(!g_quiet)
e6a9dde5 2110 g_log<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<endl;
8f7473d7 2111
1bc3c142
BH
2112 g_stats.packetCacheHits++;
2113 SyncRes::s_queries++;
2114 ageDNSPacket(response, age);
b71b60ee 2115 struct msghdr msgh;
2116 struct iovec iov;
2117 char cbuf[256];
2118 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
2c0af54f
PD
2119 msgh.msg_control=NULL;
2120
cbc03320 2121 if(g_fromtosockets.count(fd)) {
fbe2a2e0 2122 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
b71b60ee 2123 }
cbc03320 2124 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
e6a9dde5 2125 g_log<<Logger::Warning<<"Sending UDP reply to client "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" failed with: "<<strerror(errno)<<endl;
b71b60ee 2126
97bee66d 2127 if(response.length() >= sizeof(struct dnsheader)) {
dd079764
RG
2128 struct dnsheader tmpdh;
2129 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
5cc8371b 2130 updateResponseStats(tmpdh.rcode, source, response.length(), 0, 0);
97bee66d 2131 }
08f3f638 2132 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
19178da9 2133 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
1bc3c142
BH
2134 return 0;
2135 }
3ddb9247 2136 }
1bc3c142 2137 catch(std::exception& e) {
e6a9dde5 2138 g_log<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1bc3c142
BH
2139 return 0;
2140 }
3ddb9247 2141
f26bf547 2142 if(t_pdl) {
5cc8371b 2143 if(t_pdl->ipfilter(source, destination, *dh)) {
4ea94941 2144 if(!g_quiet)
e6a9dde5 2145 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" based on policy"<<endl;
4ea94941 2146 g_stats.policyDrops++;
2147 return 0;
2148 }
2149 }
2150
1bc3c142 2151 if(MT->numProcesses() > g_maxMThreads) {
461df9d2 2152 if(!g_quiet)
e6a9dde5 2153 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<", over capacity"<<endl;
461df9d2 2154
1bc3c142
BH
2155 g_stats.overCapacityDrops++;
2156 return 0;
2157 }
3ddb9247 2158
5164bac3 2159 DNSComboWriter* dc = new DNSComboWriter(question, g_now, std::move(policyTags), std::move(data));
1bc3c142 2160 dc->setSocket(fd);
49a3500d 2161 dc->d_tag=ctag;
e9f63d47 2162 dc->d_qhash=qhash;
5cc8371b
RG
2163 dc->setRemote(fromaddr);
2164 dc->setSource(source);
b71b60ee 2165 dc->setLocal(destaddr);
5cc8371b 2166 dc->setDestination(destination);
1bc3c142 2167 dc->d_tcp=false;
b40562da
RG
2168 dc->d_ecsFound = ecsFound;
2169 dc->d_ecsParsed = ecsParsed;
2170 dc->d_ednssubnet = ednssubnet;
70fb28d9
RG
2171 dc->d_ttlCap = ttlCap;
2172 dc->d_variable = variable;
aa7929a3 2173#ifdef HAVE_PROTOBUF
63341e8d 2174 if (t_protobufServer || t_outgoingProtobufServer) {
5164bac3 2175 dc->d_uuid = std::move(uniqueId);
d9d3f9c1 2176 }
67e31ebe 2177 dc->d_requestorId = requestorId;
590388d2 2178 dc->d_deviceId = deviceId;
aa7929a3
RG
2179#endif
2180
1bc3c142
BH
2181 MT->makeThread(startDoResolve, (void*) dc); // deletes dc
2182 return 0;
3ddb9247
PD
2183}
2184
b71b60ee 2185
d187038c 2186static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
5db529f8 2187{
a683e8bd 2188 ssize_t len;
12c2f2b9 2189 static const size_t maxIncomingQuerySize = 512;
04896b99 2190 static thread_local std::string data;
5db529f8 2191 ComboAddress fromaddr;
b71b60ee 2192 struct msghdr msgh;
2193 struct iovec iov;
2194 char cbuf[256];
390f1dab 2195 bool firstQuery = true;
b71b60ee 2196
c0a00acd
RG
2197 for(size_t queriesCounter = 0; queriesCounter < s_maxUDPQueriesPerRound; queriesCounter++) {
2198 data.resize(maxIncomingQuerySize);
2199 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
2200 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), &data[0], data.size(), &fromaddr);
b71b60ee 2201
c0a00acd 2202 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
390f1dab 2203
c0a00acd 2204 firstQuery = false;
390f1dab 2205
c0a00acd
RG
2206 if (static_cast<size_t>(len) < sizeof(dnsheader)) {
2207 g_stats.ignoredCount++;
2208 if (!g_quiet) {
2209 g_log<<Logger::Error<<"Ignoring too-short ("<<std::to_string(len)<<") query from "<<fromaddr.toString()<<endl;
2210 }
2211 return;
04896b99 2212 }
04896b99 2213
c0a00acd
RG
2214 if (msgh.msg_flags & MSG_TRUNC) {
2215 g_stats.truncatedDrops++;
2216 if (!g_quiet) {
2217 g_log<<Logger::Error<<"Ignoring truncated query from "<<fromaddr.toString()<<endl;
2218 }
2219 return;
ba892c7f 2220 }
b23b8614 2221
c0a00acd
RG
2222 if(t_remotes) {
2223 t_remotes->push_back(fromaddr);
2224 }
81859ba5 2225
c0a00acd
RG
2226 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
2227 if(!g_quiet) {
2228 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2229 }
3ddb9247 2230
c0a00acd
RG
2231 g_stats.unauthorizedUDP++;
2232 return;
5db529f8 2233 }
c0a00acd
RG
2234 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
2235 if(!fromaddr.sin4.sin_port) { // also works for IPv6
2236 if(!g_quiet) {
2237 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
2238 }
2239
2240 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
2241 return;
3abcdab2 2242 }
c0a00acd
RG
2243
2244 try {
2245 data.resize(static_cast<size_t>(len));
2246 dnsheader* dh=(dnsheader*)&data[0];
2247
2248 if(dh->qr) {
2249 g_stats.ignoredCount++;
2250 if(g_logCommonErrors) {
2251 g_log<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
2252 }
2253 }
2254 else if(dh->opcode) {
2255 g_stats.ignoredCount++;
2256 if(g_logCommonErrors) {
2257 g_log<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
2258 }
a6147cd2 2259 }
c0f9be19
RG
2260 else if (dh->qdcount == 0) {
2261 g_stats.emptyQueriesCount++;
2262 if(g_logCommonErrors) {
2263 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<<fromaddr.toString()<<" on server socket!"<<endl;
2264 }
2265 }
a6147cd2 2266 else {
c0a00acd
RG
2267 struct timeval tv={0,0};
2268 HarvestTimestamp(&msgh, &tv);
2269 ComboAddress dest;
2270 dest.reset(); // this makes sure we ignore this address if not returned by recvmsg above
2271 auto loc = rplookup(g_listenSocketsAddresses, fd);
2272 if(HarvestDestinationAddress(&msgh, &dest)) {
2273 // but.. need to get port too
2274 if(loc) {
2275 dest.sin4.sin_port = loc->sin4.sin_port;
2276 }
a6147cd2 2277 }
2278 else {
c0a00acd
RG
2279 if(loc) {
2280 dest = *loc;
2281 }
2282 else {
2283 dest.sin4.sin_family = fromaddr.sin4.sin_family;
2284 socklen_t slen = dest.getSocklen();
2285 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
2286 }
2287 }
2288
2289 if(g_weDistributeQueries) {
2290 distributeAsyncFunction(data, boost::bind(doProcessUDPQuestion, data, fromaddr, dest, tv, fd));
2291 }
2292 else {
2293 doProcessUDPQuestion(data, fromaddr, dest, tv, fd);
a6147cd2 2294 }
2295 }
c0a00acd 2296 }
16ce7f18 2297 catch(const MOADNSException &mde) {
c0a00acd
RG
2298 g_stats.clientParseError++;
2299 if(g_logCommonErrors) {
2300 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
2301 }
2302 }
2303 catch(const std::runtime_error& e) {
2304 g_stats.clientParseError++;
2305 if(g_logCommonErrors) {
2306 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
2307 }
5db529f8
BH
2308 }
2309 }
c0a00acd
RG
2310 else {
2311 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
2312 if(firstQuery && errno == EAGAIN) {
2313 g_stats.noPacketError++;
2314 }
390f1dab 2315
c0a00acd
RG
2316 break;
2317 }
ac0e821b 2318 }
5db529f8
BH
2319}
2320
adb6cd72 2321static void makeTCPServerSockets(deferredAdd_t& deferredAdds, std::set<int>& tcpSockets)
9c495589 2322{
37d3f960 2323 int fd;
f28307ad 2324 vector<string>locals;
2e3d8a19 2325 stringtok(locals,::arg()["local-address"]," ,");
9c495589 2326
f28307ad 2327 if(locals.empty())
3f81d239 2328 throw PDNSException("No local address specified");
3ddb9247 2329
f28307ad 2330 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
2331 ServiceTuple st;
2332 st.port=::arg().asNum("local-port");
2333 parseService(*i, st);
3ddb9247 2334
32252594
BH
2335 ComboAddress sin;
2336
d38e2ba9 2337 sin.reset();
37d3f960 2338 sin.sin4.sin_family = AF_INET;
32252594 2339 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 2340 sin.sin6.sin6_family = AF_INET6;
f71bc087 2341 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 2342 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
37d3f960
BH
2343 }
2344
2345 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
3ddb9247 2346 if(fd<0)
3f81d239 2347 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
f28307ad 2348
3897b9e1 2349 setCloseOnExec(fd);
a903b39c 2350
f28307ad 2351 int tmp=1;
810ff705 2352 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
e6a9dde5 2353 g_log<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
c8ddb7c2 2354 exit(1);
f28307ad 2355 }
0dfa94ab 2356 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
e6a9dde5 2357 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
0dfa94ab 2358 }
2359
c8ddb7c2 2360#ifdef TCP_DEFER_ACCEPT
38ac0821 2361 if(setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
37d3f960 2362 if(i==locals.begin())
e6a9dde5 2363 g_log<<Logger::Error<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
c8ddb7c2
BH
2364 }
2365#endif
2366
fec7dd5a
SS
2367 if( ::arg().mustDo("non-local-bind") )
2368 Utility::setBindAny(AF_INET, fd);
2369
2332f42d 2370#ifdef SO_REUSEPORT
810ff705
RG
2371 if(g_reusePort) {
2372 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
2332f42d 2373 throw PDNSException("SO_REUSEPORT: "+stringerror());
2374 }
2375#endif
2376
0735b17e
RG
2377 if (::arg().asNum("tcp-fast-open") > 0) {
2378#ifdef TCP_FASTOPEN
2379 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
2380 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
e6a9dde5 2381 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno)<<endl;
0735b17e
RG
2382 }
2383#else
e6a9dde5 2384 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
0735b17e
RG
2385#endif
2386 }
2387
32252594 2388 sin.sin4.sin_port = htons(st.port);
a683e8bd 2389 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
3ddb9247 2390 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
3f81d239 2391 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
3ddb9247 2392
3897b9e1 2393 setNonBlocking(fd);
49a699c4 2394 setSocketSendBuffer(fd, 65000);
37d3f960 2395 listen(fd, 128);
b243ca3b 2396 deferredAdds.push_back(make_pair(fd, handleNewTCPQuestion));
adb6cd72
RG
2397 tcpSockets.insert(fd);
2398
84433b79 2399 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
2400 // - fd is not that which we know here, but returned from accept()
3ddb9247 2401 if(sin.sin4.sin_family == AF_INET)
e6a9dde5 2402 g_log<<Logger::Error<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 2403 else
e6a9dde5 2404 g_log<<Logger::Error<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 2405 }
9c495589
BH
2406}
2407
b243ca3b 2408static void makeUDPServerSockets(deferredAdd_t& deferredAdds)
288f4aa9 2409{
fec7dd5a 2410 int one=1;
f28307ad 2411 vector<string>locals;
2e3d8a19 2412 stringtok(locals,::arg()["local-address"]," ,");
288f4aa9 2413
f28307ad 2414 if(locals.empty())
3f81d239 2415 throw PDNSException("No local address specified");
3ddb9247 2416
f28307ad 2417 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
2418 ServiceTuple st;
2419 st.port=::arg().asNum("local-port");
2420 parseService(*i, st);
2421
37d3f960 2422 ComboAddress sin;
996c89cc 2423
d38e2ba9 2424 sin.reset();
37d3f960 2425 sin.sin4.sin_family = AF_INET;
32252594 2426 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 2427 sin.sin6.sin6_family = AF_INET6;
f71bc087 2428 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 2429 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
37d3f960 2430 }
3ddb9247 2431
bb4bdbaf 2432 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
d3b4137e 2433 if(fd < 0) {
3f81d239 2434 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
d3b4137e 2435 }
915b0c39 2436 if (!setSocketTimestamps(fd))
e6a9dde5 2437 g_log<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
0dfa94ab 2438
b71b60ee 2439 if(IsAnyAddress(sin)) {
cbc03320 2440 if(sin.sin4.sin_family == AF_INET)
2441 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
2442 g_fromtosockets.insert(fd);
757d3179 2443#ifdef IPV6_RECVPKTINFO
cbc03320 2444 if(sin.sin4.sin_family == AF_INET6)
2445 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
2446 g_fromtosockets.insert(fd);
757d3179 2447#endif
0dfa94ab 2448 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
e6a9dde5 2449 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
0dfa94ab 2450 }
b71b60ee 2451 }
fec7dd5a
SS
2452 if( ::arg().mustDo("non-local-bind") )
2453 Utility::setBindAny(AF_INET6, fd);
2454
3897b9e1 2455 setCloseOnExec(fd);
a903b39c 2456
4e9a20e6 2457 setSocketReceiveBuffer(fd, 250000);
32252594 2458 sin.sin4.sin_port = htons(st.port);
37d3f960 2459
2332f42d 2460
2573d4a6 2461#ifdef SO_REUSEPORT
810ff705 2462 if(g_reusePort) {
2332f42d 2463 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
2464 throw PDNSException("SO_REUSEPORT: "+stringerror());
2465 }
2466#endif
a683e8bd 2467 socklen_t socklen=sin.getSocklen();
3ddb9247 2468 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
335da0ba 2469 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
3ddb9247 2470
3897b9e1 2471 setNonBlocking(fd);
c2136bf0 2472
b243ca3b 2473 deferredAdds.push_back(make_pair(fd, handleNewUDPQuestion));
40a3dd64 2474 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
3ddb9247 2475 if(sin.sin4.sin_family == AF_INET)
e6a9dde5 2476 g_log<<Logger::Error<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 2477 else
e6a9dde5 2478 g_log<<Logger::Error<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 2479 }
c836dc19 2480}
caa6eefa 2481
d187038c 2482static void daemonize(void)
c836dc19
BH
2483{
2484 if(fork())
2485 exit(0); // bye bye
3ddb9247
PD
2486
2487 setsid();
c836dc19 2488
27a5ead5 2489 int i=open("/dev/null",O_RDWR); /* open stdin */
3ddb9247 2490 if(i < 0)
e6a9dde5 2491 g_log<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
27a5ead5
BH
2492 else {
2493 dup2(i,0); /* stdin */
2494 dup2(i,1); /* stderr */
2495 dup2(i,2); /* stderr */
2496 close(i);
2497 }
288f4aa9 2498}
caa6eefa 2499
d187038c 2500static void usr1Handler(int)
c75a6a9e
BH
2501{
2502 statsWanted=true;
2503}
ae1b2e98 2504
d187038c 2505static void usr2Handler(int)
9170fbaf 2506{
f1f34cc2 2507 g_quiet= !g_quiet;
2508 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
2509 ::arg().set("quiet")=g_quiet ? "" : "no";
9170fbaf
BH
2510}
2511
d187038c 2512static void doStats(void)
c75a6a9e 2513{
16beeaa4
BH
2514 static time_t lastOutputTime;
2515 static uint64_t lastQueryCount;
d299d4f5 2516
2517 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
2518 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
3ddb9247 2519
d299d4f5 2520 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
e6a9dde5 2521 g_log<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
3427fa8a
BH
2522 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
2523 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
3ddb9247
PD
2524 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
2525
e6a9dde5 2526 g_log<<Logger::Notice<<"stats: throttle map: "
3427fa8a 2527 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
3ddb9247 2528 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
e6a9dde5
PL
2529 g_log<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2530 g_log<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
525b8a7c 2531 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
e6a9dde5 2532 g_log<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
3427fa8a 2533 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
81883dcc 2534
e6a9dde5 2535 //g_log<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
16beeaa4 2536 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
3ddb9247 2537
e6a9dde5 2538 g_log<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
16beeaa4 2539 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
3ddb9247 2540
16beeaa4
BH
2541 time_t now = time(0);
2542 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
e6a9dde5 2543 g_log<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
16beeaa4
BH
2544 }
2545 lastOutputTime = now;
2546 lastQueryCount = SyncRes::s_queries;
c75a6a9e 2547 }
3ddb9247 2548 else if(statsWanted)
e6a9dde5 2549 g_log<<Logger::Notice<<"stats: no stats yet!"<<endl;
7becf07f 2550
c75a6a9e
BH
2551 statsWanted=false;
2552}
c836dc19 2553
29f0b1ce 2554static void houseKeeping(void *)
c836dc19 2555{
cb1523d1 2556 static thread_local time_t last_rootupdate, last_prune, last_secpoll;
3337c2f7
RG
2557 static thread_local int cleanCounter=0;
2558 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
cc59bce6 2559 try {
2560 if(s_running)
2561 return;
2562 s_running=true;
3ddb9247 2563
cc59bce6 2564 struct timeval now;
2565 Utility::gettimeofday(&now, 0);
3ddb9247
PD
2566
2567 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
cc59bce6 2568 DTime dt;
2569 dt.setTimeval(now);
a6f7f5fe 2570 t_RC->doPrune(g_maxCacheEntries / g_numThreads); // this function is local to a thread, so fine anyhow
2571 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
3ddb9247 2572
a6f7f5fe 2573 SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
3ddb9247 2574
cc59bce6 2575 if(!((cleanCounter++)%40)) { // this is a full scan!
2576 time_t limit=now.tv_sec-300;
a712cb56 2577 SyncRes::pruneNSSpeeds(limit);
cc59bce6 2578 }
2579 last_prune=time(0);
d67620e4 2580 }
3ddb9247 2581
cc59bce6 2582 if(now.tv_sec - last_rootupdate > 7200) {
30ee601a 2583 int res = SyncRes::getRootNS(g_now, nullptr);
7836f7b4
PL
2584 if (!res)
2585 last_rootupdate=now.tv_sec;
cc59bce6 2586 }
3ddb9247 2587
b243ca3b 2588 if(isHandlerThread()) {
3ddb9247 2589
cc59bce6 2590 if(now.tv_sec - last_secpoll >= 3600) {
2591 try {
2592 doSecPoll(&last_secpoll);
2593 }
581d4ea3 2594 catch(std::exception& e)
2595 {
e6a9dde5 2596 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
581d4ea3 2597 }
47e9b74f 2598 catch(PDNSException& e)
2599 {
e6a9dde5 2600 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
47e9b74f 2601 }
d0992a65
CH
2602 catch(ImmediateServFailException &e)
2603 {
e6a9dde5 2604 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
d0992a65 2605 }
47e9b74f 2606 catch(...)
2607 {
e6a9dde5 2608 g_log<<Logger::Error<<"Exception while performing security poll"<<endl;
47e9b74f 2609 }
18b73338 2610 }
d67620e4 2611 }
cc59bce6 2612 s_running=false;
d67620e4 2613 }
cc59bce6 2614 catch(PDNSException& ae)
2615 {
2616 s_running=false;
e6a9dde5 2617 g_log<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
cc59bce6 2618 throw;
2619 }
779828c4 2620}
d6d5dea7 2621
d187038c 2622static void makeThreadPipes()
49a699c4 2623{
b243ca3b
RG
2624 /* thread 0 is the handler / SNMP, we start at 1 */
2625 for(unsigned int n = 1; n <= (g_numWorkerThreads + g_numDistributorThreads); ++n) {
2626 auto& threadInfos = s_threadInfos.at(n);
2627
49a699c4
BH
2628 int fd[2];
2629 if(pipe(fd) < 0)
2630 unixDie("Creating pipe for inter-thread communications");
3ddb9247 2631
b243ca3b
RG
2632 threadInfos.pipes.readToThread = fd[0];
2633 threadInfos.pipes.writeToThread = fd[1];
3ddb9247 2634
49a699c4
BH
2635 if(pipe(fd) < 0)
2636 unixDie("Creating pipe for inter-thread communications");
b243ca3b
RG
2637
2638 threadInfos.pipes.readFromThread = fd[0];
2639 threadInfos.pipes.writeFromThread = fd[1];
3ddb9247 2640
cf8cda18
RG
2641 if(pipe(fd) < 0)
2642 unixDie("Creating pipe for inter-thread communications");
d10307c5 2643
b243ca3b
RG
2644 threadInfos.pipes.readQueriesToThread = fd[0];
2645 threadInfos.pipes.writeQueriesToThread = fd[1];
2646
2647 if (!setNonBlocking(threadInfos.pipes.writeQueriesToThread)) {
d10307c5
RG
2648 unixDie("Making pipe for inter-thread communications non-blocking");
2649 }
49a699c4
BH
2650 }
2651}
2652
00c9b8c1
BH
2653struct ThreadMSG
2654{
2655 pipefunc_t func;
2656 bool wantAnswer;
2657};
2658
b4e76a18 2659void broadcastFunction(const pipefunc_t& func)
49a699c4 2660{
b243ca3b
RG
2661 /* This function might be called by the worker with t_id 0 during startup
2662 for the initialization of ACLs and domain maps. After that it should only
2663 be called by the handler. */
d77abca1 2664
b243ca3b
RG
2665 if (s_threadInfos.empty() && isHandlerThread()) {
2666 /* the handler and distributors will call themselves below, but
2667 during startup we get called while s_threadInfos has not been
2668 populated yet to update the ACL or domain maps, so we need to
2669 handle that case.
2670 */
2671 func();
2672 }
b4e76a18 2673
b243ca3b
RG
2674 unsigned int n = 0;
2675 for (const auto& threadInfo : s_threadInfos) {
49a699c4 2676 if(n++ == t_id) {
b4e76a18 2677 func(); // don't write to ourselves!
49a699c4
BH
2678 continue;
2679 }
3ddb9247 2680
00c9b8c1
BH
2681 ThreadMSG* tmsg = new ThreadMSG();
2682 tmsg->func = func;
2683 tmsg->wantAnswer = true;
b243ca3b 2684 if(write(threadInfo.pipes.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
b841314c 2685 delete tmsg;
b243ca3b 2686
49a699c4 2687 unixDie("write to thread pipe returned wrong size or error");
b841314c 2688 }
3ddb9247 2689
49467864 2690 string* resp = nullptr;
b243ca3b 2691 if(read(threadInfo.pipes.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
49a699c4 2692 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 2693
49a699c4 2694 if(resp) {
49a699c4 2695 delete resp;
49467864 2696 resp = nullptr;
49a699c4
BH
2697 }
2698 }
2699}
06ea9015 2700
b243ca3b 2701// This function is only called by the distributor threads, when pdns-distributes-queries is set
8171ab83 2702void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
00c9b8c1 2703{
b243ca3b 2704 if (!isDistributorThread()) {
d77abca1
RG
2705 g_log<<Logger::Error<<"distributeAsyncFunction() has been called by a worker ("<<t_id<<")"<<endl;
2706 exit(1);
2707 }
2708
8171ab83 2709 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
b243ca3b 2710 unsigned int target = /* skip handler */ 1 + g_numDistributorThreads + (hash % g_numWorkerThreads);
06ea9015 2711
b243ca3b
RG
2712 const auto& targetInfo = s_threadInfos[target];
2713 if(!targetInfo.isWorker) {
2714 g_log<<Logger::Error<<"distributeAsyncFunction() tried to assign a query to a non-worker thread"<<endl;
d77abca1 2715 exit(1);
00c9b8c1 2716 }
d77abca1 2717
b243ca3b 2718 const auto& tps = targetInfo.pipes;
00c9b8c1
BH
2719 ThreadMSG* tmsg = new ThreadMSG();
2720 tmsg->func = func;
2721 tmsg->wantAnswer = false;
3ddb9247 2722
cf8cda18
RG
2723 ssize_t written = write(tps.writeQueriesToThread, &tmsg, sizeof(tmsg));
2724 if (written > 0) {
2725 if (static_cast<size_t>(written) != sizeof(tmsg)) {
2726 delete tmsg;
2727 unixDie("write to thread pipe returned wrong size or error");
2728 }
2729 }
2730 else {
2731 int error = errno;
b841314c 2732 delete tmsg;
cf8cda18
RG
2733 if (error == EAGAIN || error == EWOULDBLOCK) {
2734 g_stats.queryPipeFullDrops++;
2735 } else {
17634427 2736 unixDie("write to thread pipe returned wrong size or error:" + std::to_string(error));
cf8cda18 2737 }
b841314c 2738 }
00c9b8c1 2739}
3427fa8a 2740
d187038c 2741static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
49a699c4 2742{
f26bf547 2743 ThreadMSG* tmsg = nullptr;
3ddb9247 2744
cf8cda18 2745 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread || fd == readQueriesToThread
49a699c4
BH
2746 unixDie("read from thread pipe returned wrong size or error");
2747 }
3ddb9247 2748
2f22827a 2749 void *resp=0;
2750 try {
2751 resp = tmsg->func();
2752 }
2753 catch(std::exception& e) {
6d2010a8 2754 if(g_logCommonErrors)
e6a9dde5 2755 g_log<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2f22827a 2756 }
2757 catch(PDNSException& e) {
6d2010a8 2758 if(g_logCommonErrors)
e6a9dde5 2759 g_log<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2f22827a 2760 }
d7c676a5 2761 if(tmsg->wantAnswer) {
b243ca3b
RG
2762 const auto& threadInfo = s_threadInfos.at(t_id);
2763 if(write(threadInfo.pipes.writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
d7c676a5 2764 delete tmsg;
00c9b8c1 2765 unixDie("write to thread pipe returned wrong size or error");
d7c676a5
RG
2766 }
2767 }
3ddb9247 2768
00c9b8c1 2769 delete tmsg;
49a699c4 2770}
09e6702a 2771
13034931
BH
2772template<class T> void *voider(const boost::function<T*()>& func)
2773{
2774 return func();
2775}
2776
b3b5459d
BH
2777vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
2778{
2779 a.insert(a.end(), b.begin(), b.end());
2780 return a;
2781}
2782
92011b8f 2783vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
2784{
2785 a.insert(a.end(), b.begin(), b.end());
2786 return a;
2787}
2788
3ddb9247
PD
2789vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
2790{
2791 a.insert(a.end(), b.begin(), b.end());
2792 return a;
2793}
2794
92011b8f 2795
387b9ca6
RG
2796/*
2797 This function should only be called by the handler to gather metrics, wipe the cache,
788eeb4c
RG
2798 reload the Lua script (not the Lua config) or change the current trace regex,
2799 and by the SNMP thread to gather metrics. */
b4e76a18 2800template<class T> T broadcastAccFunction(const boost::function<T*()>& func)
3427fa8a 2801{
b243ca3b 2802 if (!isHandlerThread()) {
788eeb4c 2803 g_log<<Logger::Error<<"broadcastAccFunction has been called by a worker ("<<t_id<<")"<<endl;
d77abca1 2804 exit(1);
d77abca1
RG
2805 }
2806
b243ca3b 2807 unsigned int n = 0;
3427fa8a 2808 T ret=T();
b243ca3b
RG
2809 for (const auto& threadInfo : s_threadInfos) {
2810 if (n++ == t_id) {
2811 continue;
2812 }
2813
2814 const auto& tps = threadInfo.pipes;
00c9b8c1
BH
2815 ThreadMSG* tmsg = new ThreadMSG();
2816 tmsg->func = boost::bind(voider<T>, func);
2817 tmsg->wantAnswer = true;
3ddb9247 2818
b841314c
RG
2819 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2820 delete tmsg;
3427fa8a 2821 unixDie("write to thread pipe returned wrong size or error");
b841314c 2822 }
3ddb9247 2823
49467864 2824 T* resp = nullptr;
3427fa8a
BH
2825 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2826 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 2827
3427fa8a 2828 if(resp) {
3427fa8a
BH
2829 ret += *resp;
2830 delete resp;
49467864 2831 resp = nullptr;
3427fa8a
BH
2832 }
2833 }
2834 return ret;
2835}
2836
b4e76a18
RG
2837template string broadcastAccFunction(const boost::function<string*()>& fun); // explicit instantiation
2838template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun); // explicit instantiation
2839template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun); // explicit instantiation
2840template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun); // explicit instantiation
3427fa8a 2841
d187038c 2842static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
09e6702a
BH
2843{
2844 string remote;
2845 string msg=s_rcc.recv(&remote);
2846 RecursorControlParser rcp;
2847 RecursorControlParser::func_t* command;
3ddb9247 2848
09e6702a 2849 string answer=rcp.getAnswer(msg, &command);
f0f3f0b0
PL
2850
2851 // If we are inside a chroot, we need to strip
2852 if (!arg()["chroot"].empty()) {
a683e8bd 2853 size_t len = arg()["chroot"].length();
f0f3f0b0
PL
2854 remote = remote.substr(len);
2855 }
2856
ab5c053d
BH
2857 try {
2858 s_rcc.send(answer, &remote);
2859 command();
2860 }
fdbf35ac 2861 catch(std::exception& e) {
e6a9dde5 2862 g_log<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
ab5c053d 2863 }
3f81d239 2864 catch(PDNSException& ae) {
e6a9dde5 2865 g_log<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
ab5c053d 2866 }
09e6702a
BH
2867}
2868
d187038c 2869static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2870{
0b18b22e 2871 PacketID* pident=any_cast<PacketID>(&var);
667f7e60 2872 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
09e6702a 2873
667f7e60 2874 shared_array<char> buffer(new char[pident->inNeeded]);
09e6702a 2875
a683e8bd 2876 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
09e6702a 2877 if(ret > 0) {
667f7e60 2878 pident->inMSG.append(&buffer[0], &buffer[ret]);
a683e8bd 2879 pident->inNeeded-=(size_t)ret;
825fa717 2880 if(!pident->inNeeded || pident->inIncompleteOkay) {
667f7e60
BH
2881 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
2882 PacketID pid=*pident;
2883 string msg=pident->inMSG;
3ddb9247 2884
bb4bdbaf 2885 t_fdm->removeReadFD(fd);
3ddb9247 2886 MT->sendEvent(pid, &msg);
09e6702a
BH
2887 }
2888 else {
667f7e60 2889 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
09e6702a
BH
2890 }
2891 }
2892 else {
667f7e60 2893 PacketID tmp=*pident;
bb4bdbaf 2894 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
09e6702a
BH
2895 string empty;
2896 MT->sendEvent(tmp, &empty); // this conveys error status
2897 }
2898}
2899
d187038c 2900static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2901{
0b18b22e 2902 PacketID* pid=any_cast<PacketID>(&var);
a683e8bd 2903 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
09e6702a 2904 if(ret > 0) {
a683e8bd 2905 pid->outPos+=(ssize_t)ret;
667f7e60
BH
2906 if(pid->outPos==pid->outMSG.size()) {
2907 PacketID tmp=*pid;
bb4bdbaf 2908 t_fdm->removeWriteFD(fd);
09e6702a
BH
2909 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
2910 }
2911 }
2912 else { // error or EOF
667f7e60 2913 PacketID tmp(*pid);
bb4bdbaf 2914 t_fdm->removeWriteFD(fd);
09e6702a 2915 string sent;
998a4334 2916 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
09e6702a
BH
2917 }
2918}
2919
34801ab1 2920// resend event to everybody chained onto it
d187038c 2921static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
34801ab1
BH
2922{
2923 if(iter->key.chain.empty())
2924 return;
e27e91a8 2925 // cerr<<"doResends called!\n";
34801ab1
BH
2926 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
2927 resend.fd=-1;
2928 resend.id=*i;
e27e91a8 2929 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
4665c31e 2930
34801ab1
BH
2931 MT->sendEvent(resend, &content);
2932 g_stats.chainResends++;
34801ab1
BH
2933 }
2934}
2935
d187038c 2936static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2937{
600fc20b 2938 PacketID pid=any_cast<PacketID>(var);
a683e8bd 2939 ssize_t len;
fae8fe07
RG
2940 std::string packet;
2941 packet.resize(g_outgoingEDNSBufsize);
996c89cc 2942 ComboAddress fromaddr;
09e6702a
BH
2943 socklen_t addrlen=sizeof(fromaddr);
2944
fae8fe07 2945 len=recvfrom(fd, &packet.at(0), packet.size(), 0, (sockaddr *)&fromaddr, &addrlen);
c1da7976 2946
a683e8bd 2947 if(len < (ssize_t) sizeof(dnsheader)) {
998a4334 2948 if(len < 0)
996c89cc 2949 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
09e6702a 2950 else {
3ddb9247 2951 g_stats.serverParseError++;
09e6702a 2952 if(g_logCommonErrors)
e6a9dde5 2953 g_log<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
e44d9fa7 2954 ": packet smaller than DNS header"<<endl;
998a4334 2955 }
34801ab1 2956
49a699c4 2957 t_udpclientsocks->returnSocket(fd);
34801ab1
BH
2958 string empty;
2959
2960 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3ddb9247 2961 if(iter != MT->d_waiters.end())
34801ab1 2962 doResends(iter, pid, empty);
3ddb9247 2963
34801ab1 2964 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
998a4334 2965 return;
3ddb9247 2966 }
998a4334 2967
fae8fe07 2968 packet.resize(len);
998a4334 2969 dnsheader dh;
fae8fe07 2970 memcpy(&dh, &packet.at(0), sizeof(dh));
3ddb9247 2971
6da3b3ad
PD
2972 PacketID pident;
2973 pident.remote=fromaddr;
2974 pident.id=dh.id;
2975 pident.fd=fd;
34801ab1 2976
33a928af 2977 if(!dh.qr && g_logCommonErrors) {
e6a9dde5 2978 g_log<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
6da3b3ad
PD
2979 }
2980
2981 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
2982 !dh.qr) { // one weird server
2983 pident.domain.clear();
2984 pident.type = 0;
2985 }
2986 else {
2987 try {
0b31e67e 2988 if(len > 12)
fae8fe07 2989 pident.domain=DNSName(&packet.at(0), len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
6da3b3ad
PD
2990 }
2991 catch(std::exception& e) {
2992 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
e6a9dde5 2993 g_log<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
6da3b3ad 2994 return;
34801ab1 2995 }
6da3b3ad 2996 }
34801ab1 2997
6da3b3ad
PD
2998 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
2999 if(iter != MT->d_waiters.end()) {
3000 doResends(iter, pident, packet);
3001 }
c1da7976 3002
6da3b3ad 3003retryWithName:
4957a608 3004
6da3b3ad
PD
3005 if(!MT->sendEvent(pident, &packet)) {
3006 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
3007 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
3008 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
e325f20c 3009 pident.domain == mthread->key.domain) {
6da3b3ad 3010 mthread->key.nearMisses++;
998a4334 3011 }
6da3b3ad
PD
3012
3013 // be a bit paranoid here since we're weakening our matching
3ddb9247 3014 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
6da3b3ad
PD
3015 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
3016 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
3017 pident.domain = mthread->key.domain;
3018 pident.type = mthread->key.type;
3019 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
d4fb76e9 3020 }
09e6702a 3021 }
6da3b3ad
PD
3022 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
3023 if(g_logCommonErrors) {
e6a9dde5 3024 g_log<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
d8f6d49f 3025 }
09e6702a 3026 }
6da3b3ad
PD
3027 else if(fd >= 0) {
3028 t_udpclientsocks->returnSocket(fd);
3029 }
09e6702a
BH
3030}
3031
1f4abb20
BH
3032FDMultiplexer* getMultiplexer()
3033{
3034 FDMultiplexer* ret;
f26bf547 3035 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
1f4abb20 3036 try {
f26bf547 3037 ret=i.second();
1f4abb20
BH
3038 return ret;
3039 }
98d0ee4a 3040 catch(FDMultiplexerException &fe) {
e6a9dde5 3041 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
98d0ee4a
BH
3042 }
3043 catch(...) {
e6a9dde5 3044 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
98d0ee4a 3045 }
1f4abb20 3046 }
e6a9dde5 3047 g_log<<Logger::Error<<"No working multiplexer found!"<<endl;
1f4abb20
BH
3048 exit(1);
3049}
3050
3ddb9247 3051
d187038c 3052static string* doReloadLuaScript()
4485aa35 3053{
674cf0f6 3054 string fname= ::arg()["lua-dns-script"];
4485aa35 3055 try {
674cf0f6 3056 if(fname.empty()) {
f26bf547 3057 t_pdl.reset();
e6a9dde5 3058 g_log<<Logger::Error<<t_id<<" Unloaded current lua script"<<endl;
0f39c1a3 3059 return new string("unloaded\n");
4485aa35
BH
3060 }
3061 else {
9694e14f
AT
3062 t_pdl = std::make_shared<RecursorLua4>();
3063 t_pdl->loadFile(fname);
4485aa35
BH
3064 }
3065 }
fdbf35ac 3066 catch(std::exception& e) {
e6a9dde5 3067 g_log<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
0f39c1a3 3068 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
4485aa35 3069 }
3ddb9247 3070
e6a9dde5 3071 g_log<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
0f39c1a3 3072 return new string("(re)loaded '"+fname+"'\n");
4485aa35
BH
3073}
3074
49a699c4
BH
3075string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3076{
3ddb9247 3077 if(begin != end)
49a699c4 3078 ::arg().set("lua-dns-script") = *begin;
3ddb9247 3079
0f39c1a3 3080 return broadcastAccFunction<string>(doReloadLuaScript);
3ddb9247 3081}
49a699c4 3082
d187038c 3083static string* pleaseUseNewTraceRegex(const std::string& newRegex)
77499b05
BH
3084try
3085{
3086 if(newRegex.empty()) {
f26bf547 3087 t_traceRegex.reset();
77499b05
BH
3088 return new string("unset\n");
3089 }
3090 else {
f26bf547 3091 t_traceRegex = std::make_shared<Regex>(newRegex);
77499b05
BH
3092 return new string("ok\n");
3093 }
3094}
3f81d239 3095catch(PDNSException& ae)
77499b05
BH
3096{
3097 return new string(ae.reason+"\n");
3098}
3099
3100string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3101{
3102 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
3103}
3104
4e9a20e6 3105static void checkLinuxIPv6Limits()
3106{
3107#ifdef __linux__
3108 string line;
3109 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
335da0ba 3110 int lim=std::stoi(line);
4e9a20e6 3111 if(lim < 16384) {
e6a9dde5 3112 g_log<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
4e9a20e6 3113 }
3114 }
3115#endif
3116}
36849ff2 3117static void checkOrFixFDS()
4e9a20e6 3118{
c0063e60 3119 unsigned int availFDs=getFilenumLimit();
3120 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
3121
3122 if(wantFDs > availFDs) {
067ad20e 3123 unsigned int hardlimit= getFilenumLimit(true);
3124 if(hardlimit >= wantFDs) {
c0063e60 3125 setFilenumLimit(wantFDs);
e6a9dde5 3126 g_log<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
36849ff2 3127 }
3128 else {
067ad20e 3129 int newval = (hardlimit - 25) / g_numWorkerThreads;
e6a9dde5 3130 g_log<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
36849ff2 3131 g_maxMThreads = newval;
067ad20e 3132 setFilenumLimit(hardlimit);
36849ff2 3133 }
3134 }
4e9a20e6 3135}
77499b05 3136
c390b2da 3137static void* recursorThread(unsigned int tid, const string& threadName);
51e2144e 3138
f26bf547 3139static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
49a699c4
BH
3140{
3141 t_allowFrom = ng;
f26bf547 3142 return nullptr;
49a699c4
BH
3143}
3144
dbd23fc2
BH
3145int g_argc;
3146char** g_argv;
3147
18af64a8 3148void parseACLs()
f7c1d4e3 3149{
18af64a8 3150 static bool l_initialized;
3ddb9247 3151
49a699c4 3152 if(l_initialized) { // only reload configuration file on second call
18af64a8 3153 string configname=::arg()["config-dir"]+"/recursor.conf";
3e63da83
JR
3154 if(::arg()["config-name"]!="") {
3155 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3156 }
18af64a8 3157 cleanSlashes(configname);
3ddb9247
PD
3158
3159 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
7e818521 3160 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
49a699c4 3161 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
242b90e1 3162 ::arg().preParseFile(configname.c_str(), "include-dir");
829849d6
AT
3163 ::arg().preParse(g_argc, g_argv, "include-dir");
3164
3165 // then process includes
3166 std::vector<std::string> extraConfigs;
242b90e1
AT
3167 ::arg().gatherIncludes(extraConfigs);
3168
1dc8f4d0 3169 for(const std::string& fn : extraConfigs) {
7e818521 3170 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
3171 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
3172 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
3173 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
829849d6 3174 }
ca2c884c
AT
3175
3176 ::arg().preParse(g_argc, g_argv, "allow-from-file");
3177 ::arg().preParse(g_argc, g_argv, "allow-from");
f27e6356 3178 }
49a699c4 3179
f26bf547
RG
3180 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
3181 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
3ddb9247 3182
2c95fc65
BH
3183 if(!::arg()["allow-from-file"].empty()) {
3184 string line;
2c95fc65
BH
3185 ifstream ifs(::arg()["allow-from-file"].c_str());
3186 if(!ifs) {
9c61b9d0 3187 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2c95fc65
BH
3188 }
3189
3190 string::size_type pos;
3191 while(getline(ifs,line)) {
3192 pos=line.find('#');
3193 if(pos!=string::npos)
3194 line.resize(pos);
3195 trim(line);
3196 if(line.empty())
3197 continue;
3198
18af64a8 3199 allowFrom->addMask(line);
2c95fc65 3200 }
e6a9dde5 3201 g_log<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2c95fc65
BH
3202 }
3203 else if(!::arg()["allow-from"].empty()) {
f7c1d4e3
BH
3204 vector<string> ips;
3205 stringtok(ips, ::arg()["allow-from"], ", ");
3ddb9247 3206
e6a9dde5 3207 g_log<<Logger::Warning<<"Only allowing queries from: ";
f7c1d4e3 3208 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
18af64a8 3209 allowFrom->addMask(*i);
f7c1d4e3 3210 if(i!=ips.begin())
e6a9dde5
PL
3211 g_log<<Logger::Warning<<", ";
3212 g_log<<Logger::Warning<<*i;
f7c1d4e3 3213 }
e6a9dde5 3214 g_log<<Logger::Warning<<endl;
f7c1d4e3 3215 }
49a699c4 3216 else {
3ddb9247 3217 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
e6a9dde5 3218 g_log<<Logger::Error<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
f26bf547 3219 allowFrom = nullptr;
49a699c4 3220 }
3ddb9247 3221
49a699c4 3222 g_initialAllowFrom = allowFrom;
d7dae798 3223 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
f26bf547 3224 oldAllowFrom = nullptr;
3ddb9247 3225
49a699c4 3226 l_initialized = true;
18af64a8
BH
3227}
3228
795215f2 3229
756e82cf 3230static void setupDelegationOnly()
3231{
3232 vector<string> parts;
3233 stringtok(parts, ::arg()["delegation-only"], ", \t");
3234 for(const auto& p : parts) {
9065eb05 3235 SyncRes::addDelegationOnly(DNSName(p));
756e82cf 3236 }
3237}
795215f2 3238
8fd25133
RG
3239static std::map<unsigned int, std::set<int> > parseCPUMap()
3240{
3241 std::map<unsigned int, std::set<int> > result;
3242
3243 const std::string value = ::arg()["cpu-map"];
3244
3245 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
e6a9dde5 3246 g_log<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
8fd25133
RG
3247 return result;
3248 }
3249
3250 std::vector<std::string> parts;
3251
3252 stringtok(parts, value, " \t");
3253
3254 for(const auto& part : parts) {
3255 if (part.find('=') == string::npos)
3256 continue;
3257
3258 try {
3259 auto headers = splitField(part, '=');
3260 trim(headers.first);
3261 trim(headers.second);
3262
3263 unsigned int threadId = pdns_stou(headers.first);
3264 std::vector<std::string> cpus;
3265
3266 stringtok(cpus, headers.second, ",");
3267
3268 for(const auto& cpu : cpus) {
3269 int cpuId = std::stoi(cpu);
3270
3271 result[threadId].insert(cpuId);
3272 }
3273 }
3274 catch(const std::exception& e) {
e6a9dde5 3275 g_log<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
8fd25133
RG
3276 }
3277 }
3278
3279 return result;
3280}
3281
3282static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
3283{
3284 const auto& cpuMapping = cpusMap.find(n);
3285 if (cpuMapping != cpusMap.cend()) {
3286 int rc = mapThreadToCPUList(tid, cpuMapping->second);
3287 if (rc == 0) {
e6a9dde5 3288 g_log<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
8fd25133 3289 for (const auto cpu : cpuMapping->second) {
e6a9dde5 3290 g_log<<Logger::Info<<" "<<cpu;
8fd25133 3291 }
e6a9dde5 3292 g_log<<Logger::Info<<endl;
8fd25133
RG
3293 }
3294 else {
e6a9dde5 3295 g_log<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
8fd25133 3296 for (const auto cpu : cpuMapping->second) {
e6a9dde5 3297 g_log<<Logger::Info<<" "<<cpu;
8fd25133 3298 }
e6a9dde5 3299 g_log<<Logger::Info<<strerror(rc)<<endl;
8fd25133
RG
3300 }
3301 }
3302}
3303
af1377b7
NC
3304#ifdef NOD_ENABLED
3305static void setupNODThread()
3306{
3307 if (g_nodEnabled) {
b78727c6
NC
3308 uint32_t num_cells = ::arg().asNum("new-domain-db-size");
3309 t_nodDBp = std::make_shared<nod::NODDB>(num_cells);
af1377b7
NC
3310 try {
3311 t_nodDBp->setCacheDir(::arg()["new-domain-history-dir"]);
3312 }
3313 catch (const PDNSException& e) {
3314 g_log<<Logger::Error<<"new-domain-history-dir (" << ::arg()["new-domain-history-dir"] << ") is not readable or does not exist"<<endl;
3315 _exit(1);
3316 }
3317 if (!t_nodDBp->init()) {
3318 g_log<<Logger::Error<<"Could not initialize domain tracking"<<endl;
3319 _exit(1);
3320 }
41c542ec
NC
3321 std::thread t(nod::NODDB::startHousekeepingThread, t_nodDBp, std::this_thread::get_id());
3322 t.detach();
3323 }
3324 if (g_udrEnabled) {
b78727c6
NC
3325 uint32_t num_cells = ::arg().asNum("unique-response-db-size");
3326 t_udrDBp = std::make_shared<nod::UniqueResponseDB>(num_cells);
41c542ec
NC
3327 try {
3328 t_udrDBp->setCacheDir(::arg()["unique-response-history-dir"]);
3329 }
3330 catch (const PDNSException& e) {
3331 g_log<<Logger::Error<<"unique-response-history-dir (" << ::arg()["unique-response-history-dir"] << ") is not readable or does not exist"<<endl;
3332 _exit(1);
3333 }
3334 if (!t_udrDBp->init()) {
3335 g_log<<Logger::Error<<"Could not initialize unique response tracking"<<endl;
3336 _exit(1);
3337 }
3338 std::thread t(nod::UniqueResponseDB::startHousekeepingThread, t_udrDBp, std::this_thread::get_id());
af1377b7
NC
3339 t.detach();
3340 }
3341}
3342
3343void parseNODWhitelist(const std::string& wlist)
3344{
3345 vector<string> parts;
3346 stringtok(parts, wlist, ",; ");
3347 for(const auto& a : parts) {
3348 g_nodDomainWL.add(DNSName(a));
3349 }
3350}
3351
3352static void setupNODGlobal()
3353{
3354 // Setup NOD subsystem
3355 g_nodEnabled = ::arg().mustDo("new-domain-tracking");
3356 g_nodLookupDomain = DNSName(::arg()["new-domain-lookup"]);
3357 g_nodLog = ::arg().mustDo("new-domain-log");
3358 parseNODWhitelist(::arg()["new-domain-whitelist"]);
41c542ec
NC
3359
3360 // Setup Unique DNS Response subsystem
3361 g_udrEnabled = ::arg().mustDo("unique-response-tracking");
3362 g_udrLog = ::arg().mustDo("unique-response-log");
af1377b7
NC
3363}
3364#endif /* NOD_ENABLED */
3365
d187038c 3366static int serviceMain(int argc, char*argv[])
18af64a8 3367{
e6a9dde5
PL
3368 g_log.setName(s_programname);
3369 g_log.disableSyslog(::arg().mustDo("disable-syslog"));
3370 g_log.setTimestamps(::arg().mustDo("log-timestamp"));
18af64a8
BH
3371
3372 if(!::arg()["logging-facility"].empty()) {
f8499e52
BH
3373 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
3374 if(val >= 0)
e6a9dde5 3375 g_log.setFacility(val);
18af64a8 3376 else
e6a9dde5 3377 g_log<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
18af64a8
BH
3378 }
3379
ba1a571d 3380 showProductVersion();
3afde9b2 3381
06ea9015 3382 g_disthashseed=dns_random(0xffffffff);
3383
b7ef5828
PL
3384 checkLinuxIPv6Limits();
3385 try {
3386 vector<string> addrs;
3387 if(!::arg()["query-local-address6"].empty()) {
3388 SyncRes::s_doIPv6=true;
e6a9dde5 3389 g_log<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
b7ef5828
PL
3390
3391 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
3392 for(const string& addr : addrs) {
3393 g_localQueryAddresses6.push_back(ComboAddress(addr));
3394 }
3395 }
3396 else {
e6a9dde5 3397 g_log<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
b7ef5828
PL
3398 }
3399 addrs.clear();
3400 stringtok(addrs, ::arg()["query-local-address"], ", ;");
3401 for(const string& addr : addrs) {
3402 g_localQueryAddresses4.push_back(ComboAddress(addr));
3403 }
3404 }
3405 catch(std::exception& e) {
e6a9dde5 3406 g_log<<Logger::Error<<"Assigning local query addresses: "<<e.what();
b7ef5828
PL
3407 exit(99);
3408 }
3409
e48c6b8a
PL
3410 // keep this ABOVE loadRecursorLuaConfig!
3411 if(::arg()["dnssec"]=="off")
3412 g_dnssecmode=DNSSECMode::Off;
3413 else if(::arg()["dnssec"]=="process-no-validate")
3414 g_dnssecmode=DNSSECMode::ProcessNoValidate;
3415 else if(::arg()["dnssec"]=="process")
3416 g_dnssecmode=DNSSECMode::Process;
3417 else if(::arg()["dnssec"]=="validate")
3418 g_dnssecmode=DNSSECMode::ValidateAll;
3419 else if(::arg()["dnssec"]=="log-fail")
3420 g_dnssecmode=DNSSECMode::ValidateForLog;
3421 else {
e6a9dde5 3422 g_log<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
e48c6b8a
PL
3423 exit(1);
3424 }
3425
3426 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
d377bb54 3427 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
e48c6b8a 3428
a6f7f5fe 3429 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
3430 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
e6ec15bf
RG
3431
3432 luaConfigDelayedThreads delayedLuaThreads;
0f5785a6 3433 try {
e6ec15bf 3434 loadRecursorLuaConfig(::arg()["lua-config-file"], delayedLuaThreads);
0f5785a6
PL
3435 }
3436 catch (PDNSException &e) {
e6a9dde5 3437 g_log<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
0f5785a6
PL
3438 exit(1);
3439 }
ad42489c 3440
18af64a8 3441 parseACLs();
92011b8f 3442 sortPublicSuffixList();
3443
eb5bae86 3444 if(!::arg()["dont-query"].empty()) {
eb5bae86
BH
3445 vector<string> ips;
3446 stringtok(ips, ::arg()["dont-query"], ", ");
66e0b6ea
BH
3447 ips.push_back("0.0.0.0");
3448 ips.push_back("::");
c36bc97a 3449
e6a9dde5 3450 g_log<<Logger::Warning<<"Will not send queries to: ";
eb5bae86 3451 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
9065eb05 3452 SyncRes::addDontQuery(*i);
eb5bae86 3453 if(i!=ips.begin())
e6a9dde5
PL
3454 g_log<<Logger::Warning<<", ";
3455 g_log<<Logger::Warning<<*i;
eb5bae86 3456 }
e6a9dde5 3457 g_log<<Logger::Warning<<endl;
eb5bae86
BH
3458 }
3459
f7c1d4e3 3460 g_quiet=::arg().mustDo("quiet");
3ddb9247 3461
b243ca3b 3462 /* this needs to be done before parseACLs(), which call broadcastFunction() */
1bc3c142
BH
3463 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
3464 if(g_weDistributeQueries) {
b243ca3b 3465 g_log<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
1bc3c142 3466 }
3ddb9247 3467
756e82cf 3468 setupDelegationOnly();
b33c2462 3469 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
756e82cf 3470
77499b05
BH
3471 if(::arg()["trace"]=="fail") {
3472 SyncRes::setDefaultLogMode(SyncRes::Store);
3473 }
3474 else if(::arg().mustDo("trace")) {
3475 SyncRes::setDefaultLogMode(SyncRes::Log);
f7c1d4e3
BH
3476 ::arg().set("quiet")="no";
3477 g_quiet=false;
3e9c6c0a 3478 g_dnssecLOG=true;
f7c1d4e3 3479 }
43a9b290
PL
3480 string myHostname = getHostname();
3481 if (myHostname == "UNKNOWN"){
3482 g_log<<Logger::Warning<<"Unable to get the hostname, NSID and id.server values will be empty"<<endl;
3483 myHostname = "";
d0983bff 3484 }
3ddb9247 3485
aadceba8 3486 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
3487
1051f8a9
BH
3488 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
3489
f7c1d4e3 3490 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
63637fd8 3491 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
1051f8a9 3492 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
79ec0627
PL
3493 // Cap the packetcache-servfail-ttl to the packetcache-ttl
3494 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
3495 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
628e2c7b
PA
3496 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
3497 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
f7c1d4e3 3498 SyncRes::s_serverID=::arg()["server-id"];
173d790e 3499 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
9de3e034 3500 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
7c3398aa 3501 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
01402d56 3502 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
f7c1d4e3 3503 if(SyncRes::s_serverID.empty()) {
d0983bff 3504 SyncRes::s_serverID = myHostname;
f7c1d4e3 3505 }
3ddb9247 3506
e9f9b8ec
RG
3507 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
3508 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
3509
8a3a3822
RG
3510 if (!::arg().isEmpty("ecs-scope-zero-address")) {
3511 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
3512 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
3513 }
3514 else {
3515 bool found = false;
3516 for (const auto& addr : g_localQueryAddresses4) {
3517 if (!IsAnyAddress(addr)) {
3518 SyncRes::setECSScopeZeroAddress(Netmask(addr, 32));
3519 found = true;
3520 break;
3521 }
3522 }
3523 if (!found) {
3524 for (const auto& addr : g_localQueryAddresses6) {
3525 if (!IsAnyAddress(addr)) {
3526 SyncRes::setECSScopeZeroAddress(Netmask(addr, 128));
3527 found = true;
3528 break;
3529 }
3530 }
3531 if (!found) {
3532 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
3533 }
3534 }
3535 }
3536
2fe3354d
CH
3537 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
3538 SyncRes::parseEDNSSubnetAddFor(::arg()["ecs-add-for"]);
3539 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
3540
5cc8371b 3541 g_XPFAcl.toMasks(::arg()["xpf-allow-from"]);
59cb4a79 3542 g_xpfRRCode = ::arg().asNum("xpf-rr-code");
5cc8371b 3543
5b0ddd18 3544 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
bb4bdbaf 3545
49a699c4 3546 g_initialDomainMap = parseAuthAndForwards();
3ddb9247 3547
08f3f638 3548 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3ddb9247 3549
f7c1d4e3 3550 g_logCommonErrors=::arg().mustDo("log-common-errors");
98d36505 3551 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
e661a20b
PD
3552
3553 g_anyToTcp = ::arg().mustDo("any-to-tcp");
a09a8ce0
PD
3554 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
3555
b3adda56
PD
3556 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
3557
b243ca3b 3558 g_numDistributorThreads = ::arg().asNum("distributor-threads");
810ff705 3559 g_numWorkerThreads = ::arg().asNum("threads");
1c4f2e1b 3560 if (g_numWorkerThreads < 1) {
e6a9dde5 3561 g_log<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
1c4f2e1b
RG
3562 g_numWorkerThreads = 1;
3563 }
3564
b243ca3b 3565 g_numThreads = g_numDistributorThreads + g_numWorkerThreads;
810ff705
RG
3566 g_maxMThreads = ::arg().asNum("max-mthreads");
3567
00b8cadc
RG
3568 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
3569
0ec489bf 3570 g_statisticsInterval = ::arg().asNum("statistics-interval");
3571
810ff705
RG
3572#ifdef SO_REUSEPORT
3573 g_reusePort = ::arg().mustDo("reuseport");
3574#endif
3575
b243ca3b 3576 s_threadInfos.resize(g_numDistributorThreads + g_numWorkerThreads + /* handler */ 1);
810ff705 3577
b243ca3b
RG
3578 if (g_reusePort) {
3579 if (g_weDistributeQueries) {
3580 /* first thread is the handler, then distributors */
3581 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3582 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
adb6cd72 3583 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
b243ca3b 3584 makeUDPServerSockets(deferredAdds);
adb6cd72 3585 makeTCPServerSockets(deferredAdds, tcpSockets);
b243ca3b
RG
3586 }
3587 }
3588 else {
3589 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3590 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3591 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
adb6cd72 3592 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
b243ca3b 3593 makeUDPServerSockets(deferredAdds);
adb6cd72 3594 makeTCPServerSockets(deferredAdds, tcpSockets);
b243ca3b 3595 }
810ff705
RG
3596 }
3597 }
3598 else {
c47f201b 3599 std::set<int> tcpSockets;
b243ca3b
RG
3600 /* we don't have reuseport so we can only open one socket per
3601 listening addr:port and everyone will listen on it */
3602 makeUDPServerSockets(g_deferredAdds);
c47f201b
RG
3603 makeTCPServerSockets(g_deferredAdds, tcpSockets);
3604
3605 /* every listener (so distributor if g_weDistributeQueries, workers otherwise)
3606 needs to listen to the shared sockets */
3607 if (g_weDistributeQueries) {
3608 /* first thread is the handler, then distributors */
3609 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3610 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3611 }
3612 }
3613 else {
3614 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3615 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3616 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3617 }
3618 }
810ff705 3619 }
815099b2 3620
af1377b7
NC
3621#ifdef NOD_ENABLED
3622 // Setup newly observed domain globals
3623 setupNODGlobal();
3624#endif /* NOD_ENABLED */
3625
677e2a46
BH
3626 int forks;
3627 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
1bc3c142
BH
3628 if(!fork()) // we are child
3629 break;
3630 }
3ddb9247 3631
f7c1d4e3 3632 if(::arg().mustDo("daemon")) {
e6a9dde5
PL
3633 g_log<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
3634 g_log.toConsole(Logger::Critical);
f7c1d4e3
BH
3635 daemonize();
3636 }
3637 signal(SIGUSR1,usr1Handler);
3638 signal(SIGUSR2,usr2Handler);
3639 signal(SIGPIPE,SIG_IGN);
810ff705 3640
a6414fdc 3641 checkOrFixFDS();
3ddb9247 3642
d1b28475
KM
3643#ifdef HAVE_LIBSODIUM
3644 if (sodium_init() == -1) {
e6a9dde5 3645 g_log<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
d1b28475
KM
3646 exit(99);
3647 }
3648#endif
3649
3afde9b2
PL
3650 openssl_thread_setup();
3651 openssl_seed();
e97cb679
AT
3652 /* setup rng before chroot */
3653 dns_random_init();
3afde9b2 3654
bdbb07e0 3655 if(::arg()["server-id"].empty()) {
d0983bff 3656 ::arg().set("server-id") = myHostname;
bdbb07e0
PL
3657 }
3658
138435cb
BH
3659 int newgid=0;
3660 if(!::arg()["setgid"].empty())
3661 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
3662 int newuid=0;
3663 if(!::arg()["setuid"].empty())
3664 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
3665
f1d6a7ce
KM
3666 Utility::dropGroupPrivs(newuid, newgid);
3667
138435cb 3668 if (!::arg()["chroot"].empty()) {
75336810
PL
3669#ifdef HAVE_SYSTEMD
3670 char *ns;
3671 ns = getenv("NOTIFY_SOCKET");
3672 if (ns != nullptr) {
e6a9dde5 3673 g_log<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
75336810
PL
3674 exit(1);
3675 }
3676#endif
138435cb 3677 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
e6a9dde5 3678 g_log<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
138435cb
BH
3679 exit(1);
3680 }
f0f3f0b0 3681 else
e6a9dde5 3682 g_log<<Logger::Error<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
138435cb
BH
3683 }
3684
f0f3f0b0
PL
3685 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
3686 if(!s_pidfname.empty())
3687 unlink(s_pidfname.c_str()); // remove possible old pid file
3688 writePid();
3689
3690 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
3691
f1d6a7ce 3692 Utility::dropUserPrivs(newuid);
c0063e60 3693
e6ec15bf
RG
3694 startLuaConfigDelayedThreads(delayedLuaThreads, g_luaconfs.getCopy().generation);
3695
49a699c4 3696 makeThreadPipes();
3ddb9247 3697
5d4dd7fe
BH
3698 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
3699 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
fde296a3 3700 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
a5886e6a 3701 s_maxUDPQueriesPerRound=::arg().asNum("max-udp-queries-per-round");
343257a4 3702
d705aad9
RG
3703 if (::arg().mustDo("snmp-agent")) {
3704 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
3705 g_snmpAgent->run();
3706 }
3707
b47026fd 3708 int port = ::arg().asNum("udp-source-port-min");
58da9034 3709 if(port < 1024 || port > 65535){
e6a9dde5 3710 g_log<<Logger::Error<<"Unable to launch, udp-source-port-min is not a valid port number"<<endl;
bf6f28ca
CHB
3711 exit(99); // this isn't going to fix itself either
3712 }
3713 s_minUdpSourcePort = port;
b47026fd 3714 port = ::arg().asNum("udp-source-port-max");
58da9034 3715 if(port < 1024 || port > 65535 || port < s_minUdpSourcePort){
e6a9dde5 3716 g_log<<Logger::Error<<"Unable to launch, udp-source-port-max is not a valid port number or is smaller than udp-source-port-min"<<endl;
bf6f28ca
CHB
3717 exit(99); // this isn't going to fix itself either
3718 }
3719 s_maxUdpSourcePort = port;
3720 std::vector<string> parts {};
b47026fd 3721 stringtok(parts, ::arg()["udp-source-port-avoid"], ", ");
bf6f28ca
CHB
3722 for (const auto &part : parts)
3723 {
3724 port = std::stoi(part);
58da9034 3725 if(port < 1024 || port > 65535){
e6a9dde5 3726 g_log<<Logger::Error<<"Unable to launch, udp-source-port-avoid contains an invalid port number: "<<part<<endl;
bf6f28ca
CHB
3727 exit(99); // this isn't going to fix itself either
3728 }
3729 s_avoidUdpSourcePorts.insert(port);
3730 }
3731
b243ca3b 3732 unsigned int currentThreadId = 1;
8fd25133 3733 const auto cpusMap = parseCPUMap();
d77abca1 3734
c3828c03 3735 if(g_numThreads == 1) {
e6a9dde5 3736 g_log<<Logger::Warning<<"Operating unthreaded"<<endl;
6b6720de
PL
3737#ifdef HAVE_SYSTEMD
3738 sd_notify(0, "READY=1");
3739#endif
b243ca3b
RG
3740
3741 /* This thread handles the web server, carbon, statistics and the control channel */
3742 auto& handlerInfos = s_threadInfos.at(0);
3743 handlerInfos.isHandler = true;
c390b2da 3744 handlerInfos.thread = std::thread(recursorThread, 0, "main");
b243ca3b
RG
3745
3746 setCPUMap(cpusMap, currentThreadId, pthread_self());
3747
3748 auto& infos = s_threadInfos.at(currentThreadId);
3749 infos.isListener = true;
3750 infos.isWorker = true;
c390b2da 3751 recursorThread(currentThreadId++, "worker");
76698c6e
BH
3752 }
3753 else {
8fd25133 3754
b243ca3b
RG
3755 if (g_weDistributeQueries) {
3756 g_log<<Logger::Warning<<"Launching "<< g_numDistributorThreads <<" distributor threads"<<endl;
3757 for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
3758 auto& infos = s_threadInfos.at(currentThreadId);
3759 infos.isListener = true;
c390b2da 3760 infos.thread = std::thread(recursorThread, currentThreadId++, "distr");
b243ca3b
RG
3761
3762 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
3763 }
3764 }
8fd25133 3765
62b549e0
RG
3766 g_log<<Logger::Warning<<"Launching "<< g_numWorkerThreads <<" worker threads"<<endl;
3767
b243ca3b
RG
3768 for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
3769 auto& infos = s_threadInfos.at(currentThreadId);
3770 infos.isListener = g_weDistributeQueries ? false : true;
3771 infos.isWorker = true;
c390b2da 3772 infos.thread = std::thread(recursorThread, currentThreadId++, "worker");
b243ca3b
RG
3773
3774 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
76698c6e 3775 }
b243ca3b 3776
6b6720de
PL
3777#ifdef HAVE_SYSTEMD
3778 sd_notify(0, "READY=1");
3779#endif
b243ca3b
RG
3780
3781 /* This thread handles the web server, carbon, statistics and the control channel */
3782 auto& infos = s_threadInfos.at(0);
3783 infos.isHandler = true;
c390b2da 3784 infos.thread = std::thread(recursorThread, 0, "web+stat");
b243ca3b
RG
3785
3786 s_threadInfos.at(0).thread.join();
bb4bdbaf 3787 }
bb4bdbaf
BH
3788 return 0;
3789}
3790
c390b2da 3791static void* recursorThread(unsigned int n, const string& threadName)
bb4bdbaf
BH
3792try
3793{
d77abca1 3794 t_id=n;
b243ca3b 3795 auto& threadInfo = s_threadInfos.at(t_id);
c390b2da
PL
3796
3797 static string threadPrefix = "pdns-r/";
519f5484 3798 setThreadName(threadPrefix + threadName);
c390b2da 3799
49a699c4 3800 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
a712cb56 3801 SyncRes::setDomainMap(g_initialDomainMap);
49a699c4 3802 t_allowFrom = g_initialAllowFrom;
f26bf547
RG
3803 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
3804 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
49a699c4 3805 primeHints();
3ddb9247 3806
f26bf547 3807 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
3ddb9247 3808
aa7929a3 3809#ifdef HAVE_PROTOBUF
f26bf547 3810 t_uuidGenerator = std::unique_ptr<boost::uuids::random_generator>(new boost::uuids::random_generator());
aa7929a3 3811#endif
e6a9dde5 3812 g_log<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3ddb9247 3813
af1377b7 3814#ifdef NOD_ENABLED
41c542ec
NC
3815 if (threadInfo.isWorker)
3816 setupNODThread();
af1377b7
NC
3817#endif /* NOD_ENABLED */
3818
8fb594ba 3819 if(threadInfo.isWorker) {
5b388d28
PD
3820 try {
3821 if(!::arg()["lua-dns-script"].empty()) {
3822 t_pdl = std::make_shared<RecursorLua4>();
3823 t_pdl->loadFile(::arg()["lua-dns-script"]);
3824 g_log<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
3825 }
3826 }
3827 catch(std::exception &e) {
3828 g_log<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
3829 _exit(99);
674cf0f6 3830 }
674cf0f6 3831 }
3ddb9247 3832
f8f243b0 3833 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
92011b8f 3834 if(ringsize) {
f26bf547 3835 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
b243ca3b
RG
3836 if(g_weDistributeQueries)
3837 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries") / g_numDistributorThreads);
f8f243b0 3838 else
3ddb9247 3839 t_remotes->set_capacity(ringsize);
f26bf547 3840 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 3841 t_servfailremotes->set_capacity(ringsize);
66f2e6ad
KM
3842 t_bogusremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3843 t_bogusremotes->set_capacity(ringsize);
f26bf547 3844 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 3845 t_largeanswerremotes->set_capacity(ringsize);
621ccf89 3846 t_timeouts = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3847 t_timeouts->set_capacity(ringsize);
92011b8f 3848
f26bf547 3849 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 3850 t_queryring->set_capacity(ringsize);
f26bf547 3851 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 3852 t_servfailqueryring->set_capacity(ringsize);
66f2e6ad
KM
3853 t_bogusqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3854 t_bogusqueryring->set_capacity(ringsize);
92011b8f 3855 }
3ddb9247 3856
f26bf547 3857 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
3ddb9247 3858
63341e8d
RG
3859#ifdef HAVE_PROTOBUF
3860 /* start protobuf export threads if needed */
3861 auto luaconfsLocal = g_luaconfs.getLocal();
3862 checkProtobufExport(luaconfsLocal);
3863 checkOutgoingProtobufExport(luaconfsLocal);
3864#endif /* HAVE_PROTOBUF */
3865
bb4bdbaf
BH
3866 PacketID pident;
3867
3868 t_fdm=getMultiplexer();
d77abca1 3869
b243ca3b 3870 if(threadInfo.isHandler) {
d07bf7ff 3871 if(::arg().mustDo("webserver")) {
e6a9dde5 3872 g_log<<Logger::Warning << "Enabling web server" << endl;
8989097d 3873 try {
1ce57618 3874 new RecursorWebServer(t_fdm);
8989097d
CH
3875 }
3876 catch(PDNSException &e) {
e6a9dde5 3877 g_log<<Logger::Error<<"Exception: "<<e.reason<<endl;
8989097d
CH
3878 exit(99);
3879 }
f3d1d67b 3880 }
e6a9dde5 3881 g_log<<Logger::Error<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
f3d1d67b 3882 }
810ff705 3883 else {
d77abca1 3884
b243ca3b
RG
3885 t_fdm->addReadFD(threadInfo.pipes.readToThread, handlePipeRequest);
3886 t_fdm->addReadFD(threadInfo.pipes.readQueriesToThread, handlePipeRequest);
3887
3888 if (threadInfo.isListener) {
3889 if (g_reusePort) {
3890 /* then every listener has its own FDs */
3891 for(const auto deferred : threadInfo.deferredAdds) {
3892 t_fdm->addReadFD(deferred.first, deferred.second);
3893 }
810ff705 3894 }
b243ca3b
RG
3895 else {
3896 /* otherwise all listeners are listening on the same ones */
3897 for(const auto deferred : g_deferredAdds) {
3898 t_fdm->addReadFD(deferred.first, deferred.second);
d77abca1
RG
3899 }
3900 }
3901 }
810ff705 3902 }
3ddb9247 3903
b0b37121 3904 registerAllStats();
d77abca1 3905
b243ca3b 3906 if(threadInfo.isHandler) {
674cf0f6
BH
3907 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
3908 }
1bc3c142 3909
f7c1d4e3 3910 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3ddb9247 3911
f7c1d4e3 3912 bool listenOnTCP(true);
49a699c4 3913
cb1523d1 3914 time_t last_stat = 0;
a2f87dd1 3915 time_t last_carbon=0, last_lua_maintenance=0;
2c78bd57 3916 time_t carbonInterval=::arg().asNum("carbon-interval");
a2f87dd1 3917 time_t luaMaintenanceInterval=::arg().asNum("lua-maintenance-interval");
ac0995bb 3918 counter.store(0); // used to periodically execute certain tasks
f7c1d4e3 3919 for(;;) {
ac0e821b 3920 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3ddb9247 3921
3427fa8a
BH
3922 if(!(counter%500)) {
3923 MT->makeThread(houseKeeping, 0);
f7c1d4e3
BH
3924 }
3925
d2392145 3926 if(!(counter%55)) {
d8f6d49f 3927 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
bb4bdbaf 3928 expired_t expired=t_fdm->getTimeouts(g_now);
3ddb9247 3929
f7c1d4e3 3930 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
cd989c87 3931 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4957a608 3932 if(g_logCommonErrors)
e6a9dde5 3933 g_log<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4957a608 3934 t_fdm->removeReadFD(i->first);
f7c1d4e3
BH
3935 }
3936 }
3ddb9247 3937
f7c1d4e3
BH
3938 counter++;
3939
b243ca3b 3940 if(threadInfo.isHandler) {
cb1523d1
RG
3941 if(statsWanted || (g_statisticsInterval > 0 && (g_now.tv_sec - last_stat) >= g_statisticsInterval)) {
3942 doStats();
3943 last_stat = g_now.tv_sec;
3944 }
f7c1d4e3 3945
cb1523d1 3946 Utility::gettimeofday(&g_now, 0);
2c78bd57 3947
cb1523d1
RG
3948 if((g_now.tv_sec - last_carbon) >= carbonInterval) {
3949 MT->makeThread(doCarbonDump, 0);
3950 last_carbon = g_now.tv_sec;
3951 }
2c78bd57 3952 }
2a0276a9 3953 if (t_pdl != nullptr) {
9adbe790 3954 // lua-dns-script directive is present, call the maintenance callback if needed
b243ca3b 3955 if (threadInfo.isWorker) {
2a0276a9
CHB
3956 // Only on threads processing queries
3957 if(g_now.tv_sec - last_lua_maintenance >= luaMaintenanceInterval) {
3958 t_pdl->maintenance();
3959 last_lua_maintenance = g_now.tv_sec;
3960 }
9adbe790 3961 }
a2f87dd1 3962 }
2c78bd57 3963
bb4bdbaf 3964 t_fdm->run(&g_now);
3ea54bf0 3965 // 'run' updates g_now for us
f7c1d4e3 3966
b243ca3b 3967 if(threadInfo.isListener) {
5c889cf5 3968 if(listenOnTCP) {
c47f201b
RG
3969 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
3970 for(const auto fd : threadInfo.tcpSockets) {
3971 t_fdm->removeReadFD(fd);
b243ca3b 3972 }
c47f201b
RG
3973 listenOnTCP=false;
3974 }
f7c1d4e3 3975 }
5c889cf5 3976 else {
c47f201b
RG
3977 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
3978 for(const auto fd : threadInfo.tcpSockets) {
3979 t_fdm->addReadFD(fd, handleNewTCPQuestion);
b243ca3b 3980 }
c47f201b
RG
3981 listenOnTCP=true;
3982 }
f7c1d4e3
BH
3983 }
3984 }
3985 }
3986}
3f81d239 3987catch(PDNSException &ae) {
e6a9dde5 3988 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
bb4bdbaf
BH
3989 return 0;
3990}
3991catch(std::exception &e) {
e6a9dde5 3992 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
bb4bdbaf
BH
3993 return 0;
3994}
3995catch(...) {
e6a9dde5 3996 g_log<<Logger::Error<<"any other exception in main: "<<endl;
bb4bdbaf
BH
3997 return 0;
3998}
3999
51e2144e 4000
3ddb9247 4001int main(int argc, char **argv)
288f4aa9 4002{
dbd23fc2
BH
4003 g_argc = argc;
4004 g_argv = argv;
5e3de507 4005 g_stats.startupTime=time(0);
3e135495 4006 versionSetProduct(ProductRecursor);
8a63d3ce 4007 reportBasicTypes();
0007c2e5 4008 reportOtherTypes();
ea634573 4009
22030c37 4010 int ret = EXIT_SUCCESS;
caa6eefa 4011
288f4aa9 4012 try {
f888311c 4013 ::arg().set("stack-size","stack size per mthread")="200000";
2e3d8a19 4014 ::arg().set("soa-minimum-ttl","Don't change")="0";
2e3d8a19 4015 ::arg().set("no-shuffle","Don't change")="off";
2e3d8a19 4016 ::arg().set("local-port","port to listen on")="53";
32252594 4017 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
fec7dd5a 4018 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
77499b05 4019 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
a6415142 4020 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
c87e1876 4021 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
d3f809bf 4022 ::arg().set("daemon","Operate as a daemon")="no";
191f2e47 4023 ::arg().setSwitch("write-pid","Write a PID file")="yes";
9afa8662 4024 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
b6cfa948 4025 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
b18fa400 4026 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
22e0810c 4027 ::arg().set("log-common-errors","If we should log rather common errors")="no";
2e3d8a19
BH
4028 ::arg().set("chroot","switch to chroot jail")="";
4029 ::arg().set("setgid","If set, change group id to this gid for more security")="";
4030 ::arg().set("setuid","If set, change user id to this uid for more security")="";
c83ee49d 4031 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
bb4bdbaf 4032 ::arg().set("threads", "Launch this number of threads")="2";
b243ca3b 4033 ::arg().set("distributor-threads", "Launch this number of distributor threads, distributing queries to other threads")="0";
adabfcb9 4034 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
5124de27 4035 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
d07bf7ff 4036 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
479e0976 4037 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
479e0976 4038 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
d07bf7ff
PL
4039 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
4040 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
4041 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
be3e1477 4042 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
cc08b5a9 4043 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
e12f8407 4044 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
2c78bd57 4045 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
0ec489bf 4046 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
c038218b 4047 ::arg().set("quiet","Suppress logging of questions and answers")="";
f27e6356 4048 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
2e3d8a19 4049 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
fdbf35ac
BH
4050 ::arg().set("socket-owner","Owner of socket")="";
4051 ::arg().set("socket-group","Group of socket")="";
4052 ::arg().set("socket-mode", "Permissions for socket")="";
3ddb9247 4053
f0f3f0b0 4054 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
2e3d8a19
BH
4055 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
4056 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
d4fb76e9 4057 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
2e3d8a19 4058 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
85c32340 4059 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
2e3d8a19 4060 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
324dc148 4061 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
979edd70 4062 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
2e3d8a19 4063 ::arg().set("hint-file", "If set, load root hints from this file")="";
b45eb27c 4064 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
a9af3782 4065 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
c3e753c7 4066 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
1051f8a9 4067 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
927c12b0 4068 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
1051f8a9 4069 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
950626be 4070 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname, set custom or 'disabled'")="";
92011b8f 4071 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
ba1a571d 4072 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
49a699c4 4073 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
2c95fc65 4074 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
51e2144e 4075 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3ddb9247 4076 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4e120339 4077 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
fde296a3 4078 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
0d5f0a9f 4079 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4ef015cd 4080 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
5605c067 4081 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3e61e7f7 4082 ::arg().set("lua-config-file", "More powerful configuration options")="";
644dd1da 4083
5605c067 4084 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
927c12b0
BH
4085 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
4086 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
5605c067 4087 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
ac0b4eb3 4088 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3ea54bf0 4089 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
e498dac1 4090 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
4485aa35 4091 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
a2f87dd1 4092 ::arg().set("lua-maintenance-interval", "Number of seconds between calls to the lua user defined maintenance() function")="1";
08f3f638 4093 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3ddb9247 4094 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
35695d18 4095 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
4096 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
3f975863 4097 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
2fe3354d 4098 ::arg().set("ecs-add-for", "List of client netmasks for which EDNS Client Subnet will be added")="0.0.0.0/0, ::/0, " LOCAL_NETS_INVERSE;
8a3a3822 4099 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
a16c4536 4100 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
e498dac1 4101 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
4ca2d205 4102 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
e661a20b 4103 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
b3adda56 4104 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
00b8cadc 4105 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
a09a8ce0 4106 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1680";
b33c2462 4107 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1680";
aadceba8 4108 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
173d790e 4109 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
c5950146 4110 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
7c3398aa 4111 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
78227847 4112 ::arg().set("max-udp-queries-per-round", "Maximum number of UDP queries processed per recvmsg() round, before returning back to normal processing")="10000";
a09a8ce0 4113
68e6df3c 4114 ::arg().set("include-dir","Include *.conf files from this directory")="";
d67620e4 4115 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
2332f42d 4116
4117 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
2e3d8a19 4118
d705aad9 4119 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
396f126e 4120 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
d705aad9 4121
0735b17e 4122 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
d377bb54 4123 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
0735b17e 4124
8fd25133
RG
4125 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
4126
98d36505
RG
4127 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
4128
5cc8371b 4129 ::arg().set("xpf-allow-from","XPF information is only processed from these subnets")="";
59cb4a79 4130 ::arg().set("xpf-rr-code","XPF option code to use")="0";
5cc8371b 4131
58da9034 4132 ::arg().set("udp-source-port-min", "Minimum UDP port to bind on")="1024";
b47026fd
CHB
4133 ::arg().set("udp-source-port-max", "Maximum UDP port to bind on")="65535";
4134 ::arg().set("udp-source-port-avoid", "List of comma separated UDP port number to avoid")="11211";
e97cb679 4135 ::arg().set("rng", "Specify random number generator to use. Valid values are auto,sodium,openssl,getrandom,arc4random,urandom.")="auto";
af1377b7
NC
4136#ifdef NOD_ENABLED
4137 ::arg().set("new-domain-tracking", "Track newly observed domains (i.e. never seen before).")="no";
4138 ::arg().set("new-domain-log", "Log newly observed domains.")="yes";
4139 ::arg().set("new-domain-lookup", "Perform a DNS lookup newly observed domains as a subdomain of the configured domain")="";
4140 ::arg().set("new-domain-history-dir", "Persist new domain tracking data here to persist between restarts")=string(NODCACHEDIR)+"/nod";
4141 ::arg().set("new-domain-whitelist", "List of domains (and implicitly all subdomains) which will never be considered a new domain")="";
b78727c6 4142 ::arg().set("new-domain-db-size", "Size of the DB used to track new domains in terms of number of cells. Defaults to 67108864")="67108864";
41c542ec
NC
4143 ::arg().set("unique-response-tracking", "Track unique responses (tuple of query name, type and RR).")="no";
4144 ::arg().set("unique-response-log", "Log unique responses")="yes";
4145 ::arg().set("unique-response-history-dir", "Persist unique response tracking data here to persist between restarts")=string(NODCACHEDIR)+"/udr";
b78727c6 4146 ::arg().set("unique-response-db-size", "Size of the DB used to track unique responses in terms of number of cells. Defaults to 67108864")="67108864";
af1377b7 4147#endif /* NOD_ENABLED */
2e3d8a19 4148 ::arg().setCmd("help","Provide a helpful message");
ba1a571d 4149 ::arg().setCmd("version","Print version string");
d5141417 4150 ::arg().setCmd("config","Output blank configuration");
e6a9dde5 4151 g_log.toConsole(Logger::Info);
2e3d8a19 4152 ::arg().laxParse(argc,argv); // do a lax parse
c75a6a9e 4153
2d733c0f
CH
4154 string configname=::arg()["config-dir"]+"/recursor.conf";
4155 if(::arg()["config-name"]!="") {
4156 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
5124de27 4157 s_programname+="-"+::arg()["config-name"];
2d733c0f
CH
4158 }
4159 cleanSlashes(configname);
5124de27 4160
5cc1ea1d
CH
4161 if(!::arg().getCommands().empty()) {
4162 cerr<<"Fatal: non-option on the command line, perhaps a '--setting=123' statement missed the '='?"<<endl;
4163 exit(99);
4164 }
4165
577cf284
BH
4166 if(::arg().mustDo("config")) {
4167 cout<<::arg().configstring()<<endl;
4168 exit(0);
4169 }
4170
3ddb9247 4171 if(!::arg().file(configname.c_str()))
e6a9dde5 4172 g_log<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
c75a6a9e 4173
2e3d8a19 4174 ::arg().parse(argc,argv);
c836dc19 4175
2054afbb
CH
4176 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() ) {
4177 g_log<<Logger::Error<<"Using chroot and enabling the API is not possible"<<endl;
f0f3f0b0
PL
4178 exit(EXIT_FAILURE);
4179 }
4180
4181 if (::arg()["socket-dir"].empty()) {
4182 if (::arg()["chroot"].empty())
4183 ::arg().set("socket-dir") = LOCALSTATEDIR;
4184 else
4185 ::arg().set("socket-dir") = "/";
4186 }
4187
2e3d8a19 4188 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
562588a3 4189
b243ca3b
RG
4190 if(::arg().asNum("threads")==1) {
4191 if (::arg().mustDo("pdns-distributes-queries")) {
4192 g_log<<Logger::Warning<<"Only one thread, no need to distribute queries ourselves"<<endl;
4193 ::arg().set("pdns-distributes-queries")="no";
4194 }
4195 }
4196
4197 if(::arg().mustDo("pdns-distributes-queries") && ::arg().asNum("distributor-threads") <= 0) {
4198 g_log<<Logger::Warning<<"Asked to run with pdns-distributes-queries set but no distributor threads, raising to 1"<<endl;
4199 ::arg().set("distributor-threads")="1";
4200 }
4201
4202 if (!::arg().mustDo("pdns-distributes-queries")) {
4203 ::arg().set("distributor-threads")="0";
4204 }
61d74169 4205
2e3d8a19 4206 if(::arg().mustDo("help")) {
ff5ba4f9
WA
4207 cout<<"syntax:"<<endl<<endl;
4208 cout<<::arg().helpstring(::arg()["help"])<<endl;
4209 exit(0);
b636533b 4210 }
5e3de507 4211 if(::arg().mustDo("version")) {
ba1a571d 4212 showProductVersion();
3613a51c 4213 showBuildConfiguration();
67076869 4214 exit(0);
5e3de507 4215 }
b636533b 4216
34162f8f 4217 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
f48d7b65 4218
34162f8f
CH
4219 if (logUrgency < Logger::Error)
4220 logUrgency = Logger::Error;
f48d7b65 4221 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
4222 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
4223 }
e6a9dde5
PL
4224 g_log.setLoglevel(logUrgency);
4225 g_log.toConsole(logUrgency);
34162f8f 4226
f7c1d4e3 4227 serviceMain(argc, argv);
288f4aa9 4228 }
3f81d239 4229 catch(PDNSException &ae) {
e6a9dde5 4230 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
22030c37 4231 ret=EXIT_FAILURE;
288f4aa9 4232 }
fdbf35ac 4233 catch(std::exception &e) {
e6a9dde5 4234 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
22030c37 4235 ret=EXIT_FAILURE;
288f4aa9
BH
4236 }
4237 catch(...) {
e6a9dde5 4238 g_log<<Logger::Error<<"any other exception in main: "<<endl;
22030c37 4239 ret=EXIT_FAILURE;
288f4aa9 4240 }
3ddb9247 4241
22030c37 4242 return ret;
288f4aa9 4243}