]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/pdns_recursor.cc
Merge pull request #6634 from rgacogne/more-systemd-sandboxing
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
CommitLineData
288f4aa9 1/*
6edbf68a
PL
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
870a0fe4
AT
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
3e61e7f7 25
76473b92
KM
26#include <netdb.h>
27#include <sys/stat.h>
28#include <unistd.h>
f097141b 29#ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
b47026fd 30#include <boost/container/flat_set.hpp>
f097141b 31#endif
2470b36e 32#include "ws-recursor.hh"
c390b2da 33#include <thread>
519f5484 34#include "threadname.hh"
3ea54bf0 35#include "recpacketcache.hh"
3ddb9247 36#include "utility.hh"
51e2144e 37#include "dns_random.hh"
d1b28475
KM
38#ifdef HAVE_LIBSODIUM
39#include <sodium.h>
40#endif
3afde9b2 41#include "opensslsigners.hh"
288f4aa9
BH
42#include <iostream>
43#include <errno.h>
81859ba5 44#include <boost/static_assert.hpp>
288f4aa9
BH
45#include <map>
46#include <set>
97bb160b 47#include "recursor_cache.hh"
38c9ceaa 48#include "cachecleaner.hh"
288f4aa9 49#include <stdio.h>
c75a6a9e 50#include <signal.h>
288f4aa9 51#include <stdlib.h>
bb4bdbaf 52#include "misc.hh"
288f4aa9
BH
53#include "mtasker.hh"
54#include <utility>
288f4aa9
BH
55#include "arguments.hh"
56#include "syncres.hh"
88def049
BH
57#include <fcntl.h>
58#include <fstream>
3e61e7f7 59#include "sortlist.hh"
5c633640
BH
60#include "sstuff.hh"
61#include <boost/tuple/tuple.hpp>
62#include <boost/tuple/tuple_comparison.hpp>
72df400f 63#include <boost/shared_array.hpp>
7f1fa77d 64#include <boost/function.hpp>
5605c067 65#include <boost/algorithm/string.hpp>
8f7473d7 66#ifdef MALLOC_TRACE
67#include "malloctrace.hh"
68#endif
40a3dd64 69#include <netinet/tcp.h>
f12666f2 70#include "capabilities.hh"
ea634573
BH
71#include "dnsparser.hh"
72#include "dnswriter.hh"
73#include "dnsrecords.hh"
f814d7c8 74#include "zoneparser-tng.hh"
1d5b3ce6 75#include "rec_channel.hh"
aaacf7f2 76#include "logger.hh"
c8ddb7c2 77#include "iputils.hh"
09e6702a 78#include "mplexer.hh"
c038218b 79#include "config.h"
808c5ef7 80#include "lua-recursor4.hh"
ba1a571d 81#include "version.hh"
79332bff 82#include "responsestats.hh"
d67620e4 83#include "secpoll-recursor.hh"
c5c066bf 84#include "dnsname.hh"
644dd1da 85#include "filterpo.hh"
86#include "rpzloader.hh"
b3f0ed10 87#include "validate-recursor.hh"
f3c18728 88#include "rec-lua-conf.hh"
5c3b5e7f 89#include "ednsoptions.hh"
85c7ca75 90#include "gettime.hh"
d6f3fcfa 91#include "pubsuffix.hh"
af1377b7
NC
92#ifdef NOD_ENABLED
93#include "nod.hh"
94#endif /* NOD_ENABLED */
f3c18728 95
d9d3f9c1 96#include "rec-protobuf.hh"
d705aad9 97#include "rec-snmp.hh"
aa7929a3 98
6b6720de
PL
99#ifdef HAVE_SYSTEMD
100#include <systemd/sd-daemon.h>
101#endif
102
d187038c
RG
103#include "namespaces.hh"
104
d61aa945
RG
105#ifdef HAVE_PROTOBUF
106#include "uuid-utils.hh"
107#endif
108
5cc8371b
RG
109#include "xpf.hh"
110
d187038c
RG
111typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
112
f26bf547 113static thread_local std::shared_ptr<RecursorLua4> t_pdl;
b243ca3b 114static thread_local unsigned int t_id = 0;
f26bf547
RG
115static thread_local std::shared_ptr<Regex> t_traceRegex;
116static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
63341e8d 117#ifdef HAVE_PROTOBUF
3fe06137 118static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_protobufServers{nullptr};
b773359c 119static thread_local uint64_t t_protobufServersGeneration;
3fe06137 120static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_outgoingProtobufServers{nullptr};
b773359c 121static thread_local uint64_t t_outgoingProtobufServersGeneration;
63341e8d 122#endif /* HAVE_PROTOBUF */
f26bf547
RG
123
124thread_local std::unique_ptr<MT_t> MT; // the big MTasker
125thread_local std::unique_ptr<MemRecursorCache> t_RC;
126thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
3337c2f7 127thread_local FDMultiplexer* t_fdm{nullptr};
be9078b3 128thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes, t_bogusremotes;
66f2e6ad 129thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring, t_bogusqueryring;
f26bf547 130thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
af1377b7
NC
131#ifdef NOD_ENABLED
132thread_local std::shared_ptr<nod::NODDB> t_nodDBp;
41c542ec 133thread_local std::shared_ptr<nod::UniqueResponseDB> t_udrDBp;
af1377b7 134#endif /* NOD_ENABLED */
d187038c 135__thread struct timeval g_now; // timestamp, updated (too) frequently
d7dae798 136
b243ca3b
RG
137typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
138
d7dae798 139// for communicating with our threads
b243ca3b
RG
140// effectively readonly after startup
141struct RecThreadInfo
142{
143 struct ThreadPipeSet
144 {
145 int writeToThread{-1};
146 int readToThread{-1};
147 int writeFromThread{-1};
148 int readFromThread{-1};
149 int writeQueriesToThread{-1}; // this one is non-blocking
150 int readQueriesToThread{-1};
151 };
152
adb6cd72 153 /* FD corresponding to TCP sockets this thread is listening
c47f201b 154 on.
adb6cd72
RG
155 These FDs are also in deferredAdds when we have one
156 socket per listener, and in g_deferredAdds instead. */
157 std::set<int> tcpSockets;
b243ca3b
RG
158 /* FD corresponding to listening sockets if we have one socket per
159 listener (with reuseport), otherwise all listeners share the
160 same FD and g_deferredAdds is then used instead */
161 deferredAdd_t deferredAdds;
162 struct ThreadPipeSet pipes;
163 std::thread thread;
164 /* handle the web server, carbon, statistics and the control channel */
165 bool isHandler{false};
166 /* accept incoming queries (and distributes them to the workers if pdns-distributes-queries is set) */
167 bool isListener{false};
168 /* process queries */
169 bool isWorker{false};
49a699c4 170};
810ff705 171
b243ca3b
RG
172/* first we have the handler thread, t_id == 0 (some other
173 helper threads like SNMP might have t_id == 0 as well)
174 then the distributor threads if any
175 and finally the workers */
176static std::vector<RecThreadInfo> s_threadInfos;
177/* without reuseport, all listeners share the same sockets */
178static deferredAdd_t g_deferredAdds;
faf580f5 179
d187038c
RG
180typedef vector<int> tcpListenSockets_t;
181typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
3ea54bf0 182
d187038c 183static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
d187038c 184static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
d187038c
RG
185static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
186static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
187static AtomicCounter counter;
9065eb05 188static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
f26bf547 189static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
5cc8371b 190static NetmaskGroup g_XPFAcl;
d187038c 191static size_t g_tcpMaxQueriesPerConn;
a5886e6a 192static size_t s_maxUDPQueriesPerRound;
d187038c
RG
193static uint64_t g_latencyStatSize;
194static uint32_t g_disthashseed;
195static unsigned int g_maxTCPPerClient;
d187038c 196static unsigned int g_maxMThreads;
b243ca3b 197static unsigned int g_numDistributorThreads;
d187038c
RG
198static unsigned int g_numWorkerThreads;
199static int g_tcpTimeout;
200static uint16_t g_udpTruncationThreshold;
59cb4a79 201static uint16_t g_xpfRRCode{0};
d187038c
RG
202static std::atomic<bool> statsWanted;
203static std::atomic<bool> g_quiet;
204static bool g_logCommonErrors;
205static bool g_anyToTcp;
b243ca3b 206static bool g_weDistributeQueries; // if true, 1 or more threads listen on the incoming query sockets and distribute them to workers
810ff705 207static bool g_reusePort{false};
00b8cadc 208static bool g_gettagNeedsEDNSOptions{false};
0ec489bf 209static time_t g_statisticsInterval;
9065eb05 210static bool g_useIncomingECS;
a6f7f5fe 211std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
af1377b7
NC
212#ifdef NOD_ENABLED
213static bool g_nodEnabled;
214static DNSName g_nodLookupDomain;
215static bool g_nodLog;
216static SuffixMatchNode g_nodDomainWL;
ca2526f5 217static std::string g_nod_pbtag;
41c542ec
NC
218static bool g_udrEnabled;
219static bool g_udrLog;
ca2526f5 220static std::string g_udr_pbtag;
af1377b7 221#endif /* NOD_ENABLED */
f097141b 222#ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
bf6f28ca 223static boost::container::flat_set<uint16_t> s_avoidUdpSourcePorts;
f097141b
CHB
224#else
225static std::set<uint16_t> s_avoidUdpSourcePorts;
226#endif
bf6f28ca
CHB
227static uint16_t s_minUdpSourcePort;
228static uint16_t s_maxUdpSourcePort;
49a699c4 229
b243ca3b 230RecursorControlChannel s_rcc; // only active in the handler thread
d187038c 231RecursorStats g_stats;
2d733c0f 232string s_programname="pdns_recursor";
d187038c 233string s_pidfname;
c1c29961 234bool g_lowercaseOutgoing;
bf19ccfd 235unsigned int g_networkTimeoutMsec;
d187038c
RG
236unsigned int g_numThreads;
237uint16_t g_outgoingEDNSBufsize;
98d36505 238bool g_logRPZChanges{false};
c3828c03 239
12cd44ee 240#define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
2fe3354d 241#define LOCAL_NETS_INVERSE "!127.0.0.0/8, !10.0.0.0/8, !100.64.0.0/10, !169.254.0.0/16, !192.168.0.0/16, !172.16.0.0/12, !::1/128, !fc00::/7, !fe80::/10"
12cd44ee 242// Bad Nets taken from both:
3ddb9247 243// http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
12cd44ee 244// and
245// http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
246// where such a network may not be considered a valid destination
247#define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
248#define DONT_QUERY LOCAL_NETS ", " BAD_NETS
49a699c4 249
d7dae798 250//! used to send information to a newborn mthread
ea634573 251struct DNSComboWriter {
08b02366 252 DNSComboWriter(const std::string& query, const struct timeval& now): d_mdp(true, query), d_now(now), d_query(query)
2749c3fe
RG
253 {
254 }
5cc8371b 255
08b02366 256 DNSComboWriter(const std::string& query, const struct timeval& now, std::vector<std::string>&& policyTags, LuaContext::LuaObject&& data): d_mdp(true, query), d_now(now), d_query(query), d_policyTags(std::move(policyTags)), d_data(std::move(data))
5164bac3
RG
257 {
258 }
259
5cc8371b
RG
260 void setRemote(const ComboAddress& sa)
261 {
262 d_remote=sa;
263 }
264
265 void setSource(const ComboAddress& sa)
ea634573 266 {
5cc8371b 267 d_source=sa;
ea634573
BH
268 }
269
b71b60ee 270 void setLocal(const ComboAddress& sa)
271 {
272 d_local=sa;
273 }
274
5cc8371b
RG
275 void setDestination(const ComboAddress& sa)
276 {
277 d_destination=sa;
278 }
b71b60ee 279
ea634573
BH
280 void setSocket(int sock)
281 {
282 d_socket=sock;
283 }
a1754c6a
BH
284
285 string getRemote() const
286 {
5cc8371b
RG
287 if (d_source == d_remote) {
288 return d_source.toStringWithPort();
289 }
290 return d_source.toStringWithPort() + " (proxied by " + d_remote.toStringWithPort() + ")";
a1754c6a
BH
291 }
292
5cc8371b 293 MOADNSParser d_mdp;
c9e9e5e0 294 struct timeval d_now;
5cc8371b
RG
295 /* Remote client, might differ from d_source
296 in case of XPF, in which case d_source holds
297 the IP of the client and d_remote of the proxy
298 */
299 ComboAddress d_remote;
300 ComboAddress d_source;
301 /* Destination address, might differ from
302 d_destination in case of XPF, in which case
303 d_destination holds the IP of the proxy and
304 d_local holds our own. */
305 ComboAddress d_local;
306 ComboAddress d_destination;
aa7929a3
RG
307#ifdef HAVE_PROTOBUF
308 boost::uuids::uuid d_uuid;
67e31ebe 309 string d_requestorId;
590388d2 310 string d_deviceId;
aa7929a3 311#endif
08b02366 312 std::string d_query;
5164bac3
RG
313 std::vector<std::string> d_policyTags;
314 LuaContext::LuaObject d_data;
b40562da 315 EDNSSubnetOpts d_ednssubnet;
5164bac3 316 shared_ptr<TCPConnection> d_tcpConnection;
ea634573 317 int d_socket;
b673817a 318 unsigned int d_tag{0};
e9f63d47 319 uint32_t d_qhash{0};
70fb28d9 320 uint32_t d_ttlCap{std::numeric_limits<uint32_t>::max()};
08b02366
RG
321 uint16_t d_ecsBegin{0};
322 uint16_t d_ecsEnd{0};
70fb28d9 323 bool d_variable{false};
5164bac3
RG
324 bool d_ecsFound{false};
325 bool d_ecsParsed{false};
326 bool d_tcp;
ea634573
BH
327};
328
06857845
RG
329MT_t* getMT()
330{
331 return MT ? MT.get() : nullptr;
332}
ea634573 333
288f4aa9
BH
334ArgvMap &arg()
335{
336 static ArgvMap theArg;
337 return theArg;
338}
4ef015cd 339
8fb594ba 340unsigned int getRecursorThreadId()
b4015453 341{
30da2030 342 return t_id;
b4015453 343}
09e6702a 344
30ee601a
RG
345int getMTaskerTID()
346{
347 return MT->getTid();
348}
349
b243ca3b
RG
350static bool isDistributorThread()
351{
352 if (t_id == 0) {
353 return false;
354 }
355
356 return g_weDistributeQueries && s_threadInfos.at(t_id).isListener;
357}
358
359static bool isHandlerThread()
360{
361 if (t_id == 0) {
362 return true;
363 }
364
365 return s_threadInfos.at(t_id).isHandler;
366}
367
d187038c 368static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 369
50c81227 370// -1 is error, 0 is timeout, 1 is success
3ddb9247 371int asendtcp(const string& data, Socket* sock)
5c633640
BH
372{
373 PacketID pident;
374 pident.sock=sock;
375 pident.outMSG=data;
3ddb9247 376
bb4bdbaf 377 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
50c81227 378 string packet;
5c633640 379
5b0ddd18 380 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
23db0a09 381
9170fbaf 382 if(!ret || ret==-1) { // timeout
bb4bdbaf 383 t_fdm->removeWriteFD(sock->getHandle());
5c633640 384 }
50c81227
BH
385 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
386 return -1;
387 }
9170fbaf 388 return ret;
5c633640
BH
389}
390
d187038c 391static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 392
9170fbaf 393// -1 is error, 0 is timeout, 1 is success
a683e8bd 394int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
288f4aa9 395{
50c81227 396 data.clear();
5c633640
BH
397 PacketID pident;
398 pident.sock=sock;
399 pident.inNeeded=len;
825fa717 400 pident.inIncompleteOkay=incompleteOkay;
bb4bdbaf 401 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
5c633640 402
bb4bdbaf 403 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
9170fbaf 404 if(!ret || ret==-1) { // timeout
bb4bdbaf 405 t_fdm->removeReadFD(sock->getHandle());
288f4aa9 406 }
50c81227
BH
407 else if(data.empty()) {// error, EOF or other
408 return -1;
409 }
410
9170fbaf 411 return ret;
288f4aa9
BH
412}
413
d187038c 414static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
4465e941 415{
fba1e944 416 PacketID pident=*any_cast<PacketID>(&var);
4465e941 417 char resp[512];
7c77ce63
RG
418 ComboAddress fromaddr;
419 socklen_t addrlen=sizeof(fromaddr);
420
421 ssize_t ret=recvfrom(fd, resp, sizeof(resp), 0, (sockaddr *)&fromaddr, &addrlen);
422 if (fromaddr != pident.remote) {
e6a9dde5 423 g_log<<Logger::Notice<<"Response received from the wrong remote host ("<<fromaddr.toStringWithPort()<<" instead of "<<pident.remote.toStringWithPort()<<"), discarding"<<endl;
7c77ce63
RG
424
425 }
426
4465e941 427 t_fdm->removeReadFD(fd);
428 if(ret >= 0) {
a683e8bd 429 string data(resp, (size_t) ret);
fba1e944 430 MT->sendEvent(pident, &data);
4465e941 431 }
432 else {
fba1e944 433 string empty;
434 MT->sendEvent(pident, &empty);
435 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
4465e941 436 }
437}
fba1e944 438string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
4465e941 439{
4465e941 440 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
441 s.setNonBlocking();
442 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
443
444 s.bind(local);
445 s.connect(dest);
4465e941 446 s.send(query);
447
448 PacketID pident;
449 pident.sock=&s;
7c77ce63 450 pident.remote=dest;
4465e941 451 pident.type=0;
fba1e944 452 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
4465e941 453
454 string data;
fba1e944 455
4465e941 456 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
fba1e944 457
4465e941 458 if(!ret || ret==-1) { // timeout
4465e941 459 t_fdm->removeReadFD(s.getHandle());
460 }
461 else if(data.empty()) {// error, EOF or other
fba1e944 462 // we could special case this
4465e941 463 return data;
464 }
4465e941 465 return data;
466}
467
d7dae798 468//! pick a random query local address
1652a63e 469ComboAddress getQueryLocalAddress(int family, uint16_t port)
5a38281c 470{
1652a63e 471 ComboAddress ret;
5a38281c 472 if(family==AF_INET) {
3ddb9247 473 if(g_localQueryAddresses4.empty())
1652a63e 474 ret = g_local4;
3ddb9247 475 else
1652a63e
BH
476 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
477 ret.sin4.sin_port = htons(port);
5a38281c
BH
478 }
479 else {
480 if(g_localQueryAddresses6.empty())
1652a63e
BH
481 ret = g_local6;
482 else
483 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
3ddb9247 484
1652a63e 485 ret.sin6.sin6_port = htons(port);
5a38281c 486 }
1652a63e 487 return ret;
5a38281c 488}
4ef015cd 489
d187038c 490static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
09e6702a 491
d187038c 492static void setSocketBuffer(int fd, int optname, uint32_t size)
d7dae798
BH
493{
494 uint32_t psize=0;
495 socklen_t len=sizeof(psize);
3ddb9247 496
d7dae798 497 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
e6a9dde5 498 g_log<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
3ddb9247 499 return;
d7dae798
BH
500 }
501
502 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
e6a9dde5 503 g_log<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
d7dae798
BH
504}
505
506
507static void setSocketReceiveBuffer(int fd, uint32_t size)
508{
509 setSocketBuffer(fd, SO_RCVBUF, size);
510}
511
512static void setSocketSendBuffer(int fd, uint32_t size)
513{
514 setSocketBuffer(fd, SO_SNDBUF, size);
515}
516
517
4ef015cd
BH
518// you can ask this class for a UDP socket to send a query from
519// this socket is not yours, don't even think about deleting it
520// but after you call 'returnSocket' on it, don't assume anything anymore
521class UDPClientSocks
522{
4ef015cd 523 unsigned int d_numsocks;
4ef015cd 524public:
e2642526 525 UDPClientSocks() : d_numsocks(0)
4ef015cd
BH
526 {
527 }
528
996c89cc 529 typedef set<int> socks_t;
4ef015cd
BH
530 socks_t d_socks;
531
2ee280cf 532 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
d8f6d49f 533 int getSocket(const ComboAddress& toaddr, int* fd)
4ef015cd 534 {
d8f6d49f
BH
535 *fd=makeClientSocket(toaddr.sin4.sin_family);
536 if(*fd < 0) // temporary error - receive exception otherwise
2ee280cf 537 return -2;
d8f6d49f
BH
538
539 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
540 int err = errno;
41ff43f8 541 // returnSocket(*fd);
a7b68ae7
RG
542 try {
543 closesocket(*fd);
544 }
545 catch(const PDNSException& e) {
e6a9dde5 546 g_log<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
a7b68ae7
RG
547 }
548
d8f6d49f 549 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
4957a608 550 return -2;
998a4334 551 return -1;
d8f6d49f 552 }
998a4334 553
d8f6d49f 554 d_socks.insert(*fd);
998a4334 555 d_numsocks++;
d8f6d49f 556 return 0;
4ef015cd
BH
557 }
558
095c3045
BH
559 void returnSocket(int fd)
560 {
561 socks_t::iterator i=d_socks.find(fd);
34801ab1 562 if(i==d_socks.end()) {
335da0ba 563 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd)+") not in the pool");
34801ab1 564 }
bb4bdbaf 565 returnSocketLocked(i);
095c3045
BH
566 }
567
4ef015cd 568 // return a socket to the pool, or simply erase it
bb4bdbaf 569 void returnSocketLocked(socks_t::iterator& i)
4ef015cd 570 {
600fc20b 571 if(i==d_socks.end()) {
3f81d239 572 throw PDNSException("Trying to return a socket not in the pool");
600fc20b 573 }
80baf329 574 try {
bb4bdbaf 575 t_fdm->removeReadFD(*i);
80baf329
BH
576 }
577 catch(FDMultiplexerException& e) {
bb4bdbaf 578 // we sometimes return a socket that has not yet been assigned to t_fdm
80baf329 579 }
a7b68ae7
RG
580 try {
581 closesocket(*i);
582 }
583 catch(const PDNSException& e) {
e6a9dde5 584 g_log<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
a7b68ae7 585 }
3ddb9247 586
998a4334
BH
587 d_socks.erase(i++);
588 --d_numsocks;
4ef015cd 589 }
d8f6d49f
BH
590
591 // returns -1 for errors which might go away, throws for ones that won't
bb4bdbaf 592 static int makeClientSocket(int family)
d8f6d49f 593 {
a683e8bd 594 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
42c235e5 595
d8f6d49f
BH
596 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
597 return ret;
3ddb9247
PD
598
599 if(ret<0)
335da0ba 600 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
36855b53 601
7eb73ffa 602 // setCloseOnExec(ret); // we're not going to exec
5a38281c 603
d8f6d49f 604 int tries=10;
3aa91c3e 605 ComboAddress sin;
d8f6d49f 606 while(--tries) {
1652a63e 607 uint16_t port;
3ddb9247 608
d8f6d49f 609 if(tries==1) // fall back to kernel 'random'
4957a608 610 port = 0;
bf6f28ca
CHB
611 else {
612 do {
613 port = s_minUdpSourcePort + dns_random(s_maxUdpSourcePort - s_minUdpSourcePort + 1);
614 }
615 while (s_avoidUdpSourcePorts.count(port));
616 }
5a38281c 617
3aa91c3e 618 sin=getQueryLocalAddress(family, port); // does htons for us
5a38281c 619
3ddb9247 620 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
4957a608 621 break;
d8f6d49f
BH
622 }
623 if(!tries)
3aa91c3e 624 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
3ddb9247 625
3897b9e1 626 setNonBlocking(ret);
d8f6d49f
BH
627 return ret;
628 }
49a699c4
BH
629};
630
f26bf547 631static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
4ef015cd 632
288f4aa9 633/* these two functions are used by LWRes */
34801ab1 634// -2 is OS error, -1 is error that depends on the remote, > 0 is success
a683e8bd 635int asendto(const char *data, size_t len, int flags,
3ddb9247 636 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
288f4aa9 637{
34801ab1
BH
638
639 PacketID pident;
787e5eab
BH
640 pident.domain = domain;
641 pident.remote = toaddr;
642 pident.type = qtype;
34801ab1
BH
643
644 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
645 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
646
647 for(; chain.first != chain.second; chain.first++) {
648 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
e27e91a8 649 /*
4665c31e
BH
650 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
651 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
4957a608 652 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
e27e91a8 653 */
34801ab1
BH
654 chain.first->key.chain.insert(id); // we can chain
655 *fd=-1; // gets used in waitEvent / sendEvent later on
656 return 1;
657 }
658 }
659
49a699c4 660 int ret=t_udpclientsocks->getSocket(toaddr, fd);
d8f6d49f
BH
661 if(ret < 0)
662 return ret;
34801ab1 663
998a4334
BH
664 pident.fd=*fd;
665 pident.id=id;
3ddb9247 666
bb4bdbaf
BH
667 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
668 ret = send(*fd, data, len, 0);
669
5b0ddd18 670 int tmp = errno;
bb4bdbaf 671
7302ed0a 672 if(ret < 0)
49a699c4 673 t_udpclientsocks->returnSocket(*fd);
bb4bdbaf 674
5b0ddd18 675 errno = tmp; // this is for logging purposes only
7302ed0a 676 return ret;
288f4aa9
BH
677}
678
9170fbaf 679// -1 is error, 0 is timeout, 1 is success
f128d20d 680int arecvfrom(std::string& packet, int flags, const ComboAddress& fromaddr, size_t *d_len,
c5c066bf 681 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
288f4aa9 682{
0d5f0a9f 683 static optional<unsigned int> nearMissLimit;
3ddb9247 684 if(!nearMissLimit)
0d5f0a9f
BH
685 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
686
288f4aa9 687 PacketID pident;
4ef015cd 688 pident.fd=fd;
288f4aa9 689 pident.id=id;
0d5f0a9f 690 pident.domain=domain;
787e5eab 691 pident.type = qtype;
996c89cc 692 pident.remote=fromaddr;
b636533b 693
5b0ddd18 694 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
34801ab1 695
9170fbaf 696 if(ret > 0) {
996c89cc 697 if(packet.empty()) // means "error"
3ddb9247 698 return -1;
998a4334 699
a683e8bd 700 *d_len=packet.size();
f128d20d 701
0d5f0a9f 702 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
e6a9dde5 703 g_log<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
0d5f0a9f 704 g_stats.spoofCount++;
35ce8576
BH
705 return -1;
706 }
288f4aa9 707 }
09e6702a 708 else {
34801ab1 709 if(fd >= 0)
49a699c4 710 t_udpclientsocks->returnSocket(fd);
09e6702a 711 }
9170fbaf 712 return ret;
288f4aa9
BH
713}
714
88def049
BH
715static void writePid(void)
716{
191f2e47 717 if(!::arg().mustDo("write-pid"))
718 return;
18e7758c 719 ofstream of(s_pidfname.c_str(), std::ios_base::app);
88def049 720 if(of)
705f31ae 721 of<< Utility::getpid() <<endl;
88def049 722 else
e6a9dde5 723 g_log<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
88def049
BH
724}
725
2749c3fe 726TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : data(2, 0), d_remote(addr), d_fd(fd)
3ddb9247
PD
727{
728 ++s_currentConnections;
cd989c87 729 (*t_tcpClientCounts)[d_remote]++;
0e408828 730}
cd989c87
BH
731
732TCPConnection::~TCPConnection()
0e408828 733{
a7b68ae7
RG
734 try {
735 if(closesocket(d_fd) < 0)
e6a9dde5 736 g_log<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
a7b68ae7
RG
737 }
738 catch(const PDNSException& e) {
e6a9dde5 739 g_log<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
a7b68ae7
RG
740 }
741
3ddb9247 742 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
cd989c87 743 t_tcpClientCounts->erase(d_remote);
1bc9e6bd 744 --s_currentConnections;
0e408828 745}
0e9d9ce2 746
3ddb9247 747AtomicCounter TCPConnection::s_currentConnections;
d187038c
RG
748
749static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
6dcd28c3 750
92011b8f 751// the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
d187038c 752static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
2cc13433 753{
92011b8f 754 if(packetsize > 1000 && t_largeanswerremotes)
755 t_largeanswerremotes->push_back(remote);
2cc13433
BH
756 switch(res) {
757 case RCode::ServFail:
92011b8f 758 if(t_servfailremotes) {
759 t_servfailremotes->push_back(remote);
5af86fdc 760 if(query && t_servfailqueryring) // packet cache
92011b8f 761 t_servfailqueryring->push_back(make_pair(*query, qtype));
762 }
2cc13433
BH
763 g_stats.servFails++;
764 break;
765 case RCode::NXDomain:
766 g_stats.nxDomains++;
767 break;
768 case RCode::NoError:
769 g_stats.noErrors++;
770 break;
771 }
772}
773
9a864da4 774static string makeLoginfo(const std::unique_ptr<DNSComboWriter>& dc)
a903b39c 775try
776{
5cc8371b 777 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->getRemote())+")";
a903b39c 778}
779catch(...)
780{
781 return "Exception making error message for exception";
782}
783
aa7929a3 784#ifdef HAVE_PROTOBUF
b773359c 785static void protobufLogQuery(uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId)
aa7929a3 786{
b773359c
RG
787 if (!t_protobufServers) {
788 return;
789 }
790
e1c8a4bb
RG
791 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
792 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
793 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
c165308b 794 message.setServerIdentity(SyncRes::s_serverID);
a94bc5d7 795 message.setEDNSSubnet(ednssubnet, ednssubnet.isIpv4() ? maskV4 : maskV6);
67e31ebe 796 message.setRequestorId(requestorId);
590388d2 797 message.setDeviceId(deviceId);
02b47f43 798
02b47f43 799 if (!policyTags.empty()) {
d9d3f9c1 800 message.setPolicyTags(policyTags);
02b47f43 801 }
aa7929a3 802
d9d3f9c1 803// cerr <<message.toDebugString()<<endl;
aa7929a3 804 std::string str;
d9d3f9c1 805 message.serialize(str);
b773359c
RG
806
807 for (auto& server : *t_protobufServers) {
808 server->queueData(str);
809 }
aa7929a3
RG
810}
811
b773359c 812static void protobufLogResponse(const RecProtoBufMessage& message)
aa7929a3 813{
b773359c
RG
814 if (!t_protobufServers) {
815 return;
816 }
817
d9d3f9c1 818// cerr <<message.toDebugString()<<endl;
aa7929a3 819 std::string str;
d9d3f9c1 820 message.serialize(str);
b773359c
RG
821
822 for (auto& server : *t_protobufServers) {
823 server->queueData(str);
824 }
aa7929a3
RG
825}
826#endif
827
53508135
PL
828/**
829 * Chases the CNAME provided by the PolicyCustom RPZ policy.
830 *
831 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
832 * @param qtype: The QType of the original query
833 * @param sr: A SyncRes
834 * @param res: An integer that will contain the RCODE of the lookup we do
835 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
836 */
d187038c 837static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
53508135
PL
838{
839 if (spoofed.d_type == QType::CNAME) {
30ee601a
RG
840 bool oldWantsRPZ = sr.getWantsRPZ();
841 sr.setWantsRPZ(false);
53508135 842 vector<DNSRecord> ans;
6da513b2 843 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, QClass::IN, ans);
53508135
PL
844 for (const auto& rec : ans) {
845 if(rec.d_place == DNSResourceRecord::ANSWER) {
846 ret.push_back(rec);
847 }
848 }
849 // Reset the RPZ state of the SyncRes
30ee601a 850 sr.setWantsRPZ(oldWantsRPZ);
53508135
PL
851 }
852}
853
70fb28d9 854static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, uint32_t ttlCap, const uint16_t maxAnswerSize)
97c6d7e5 855{
70fb28d9 856 pw.startRecord(rec.d_name, rec.d_type, (rec.d_ttl > ttlCap ? ttlCap : rec.d_ttl), rec.d_class, rec.d_place);
97c6d7e5
RG
857
858 if(rec.d_type != QType::OPT) // their TTL ain't real
859 minTTL = min(minTTL, rec.d_ttl);
860
861 rec.d_content->toPacket(pw);
862 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
863 pw.rollback();
864 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
865 pw.getHeader()->tc=1;
866 pw.truncate();
867 }
868 return false;
869 }
870
871 return true;
872}
873
63341e8d 874#ifdef HAVE_PROTOBUF
3fe06137 875static std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> startProtobufServers(const ProtobufExportConfig& config)
63341e8d 876{
3fe06137 877 auto result = std::make_shared<std::vector<std::unique_ptr<RemoteLogger>>>();
b773359c
RG
878
879 for (const auto& server : config.servers) {
880 try {
3fe06137 881 result->emplace_back(new RemoteLogger(server, config.timeout, config.maxQueuedEntries, config.reconnectWaitTime, config.asyncConnect));
b773359c
RG
882 }
883 catch(const std::exception& e) {
884 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.what()<<endl;
885 }
886 catch(const PDNSException& e) {
887 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.reason<<endl;
888 }
63341e8d
RG
889 }
890
891 return result;
892}
893
894static bool checkProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
895{
896 if (!luaconfsLocal->protobufExportConfig.enabled) {
b773359c
RG
897 if (t_protobufServers) {
898 for (auto& server : *t_protobufServers) {
899 server->stop();
900 }
901 t_protobufServers.reset();
63341e8d
RG
902 }
903
904 return false;
905 }
906
907 /* if the server was not running, or if it was running according to a
908 previous configuration */
b773359c
RG
909 if (!t_protobufServers ||
910 t_protobufServersGeneration < luaconfsLocal->generation) {
63341e8d 911
b773359c
RG
912 if (t_protobufServers) {
913 for (auto& server : *t_protobufServers) {
914 server->stop();
915 }
63341e8d 916 }
b773359c 917 t_protobufServers.reset();
63341e8d 918
b773359c
RG
919 t_protobufServers = startProtobufServers(luaconfsLocal->protobufExportConfig);
920 t_protobufServersGeneration = luaconfsLocal->generation;
63341e8d
RG
921 }
922
923 return true;
924}
925
926static bool checkOutgoingProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
927{
928 if (!luaconfsLocal->outgoingProtobufExportConfig.enabled) {
b773359c
RG
929 if (t_outgoingProtobufServers) {
930 for (auto& server : *t_outgoingProtobufServers) {
931 server->stop();
932 }
63341e8d 933 }
b773359c 934 t_outgoingProtobufServers.reset();
63341e8d
RG
935
936 return false;
937 }
938
939 /* if the server was not running, or if it was running according to a
940 previous configuration */
b773359c
RG
941 if (!t_outgoingProtobufServers ||
942 t_outgoingProtobufServersGeneration < luaconfsLocal->generation) {
63341e8d 943
b773359c
RG
944 if (t_outgoingProtobufServers) {
945 for (auto& server : *t_outgoingProtobufServers) {
946 server->stop();
947 }
63341e8d 948 }
b773359c 949 t_outgoingProtobufServers.reset();
63341e8d 950
b773359c
RG
951 t_outgoingProtobufServers = startProtobufServers(luaconfsLocal->outgoingProtobufExportConfig);
952 t_outgoingProtobufServersGeneration = luaconfsLocal->generation;
63341e8d
RG
953 }
954
955 return true;
956}
957#endif /* HAVE_PROTOBUF */
958
af1377b7 959#ifdef NOD_ENABLED
41c542ec 960static bool nodCheckNewDomain(const DNSName& dname)
af1377b7
NC
961{
962 static const QType qt(QType::A);
963 static const uint16_t qc(QClass::IN);
41c542ec 964 bool ret = false;
af1377b7
NC
965 // First check the (sub)domain isn't whitelisted for NOD purposes
966 if (!g_nodDomainWL.check(dname)) {
967 // Now check the NODDB (note this is probablistic so can have FNs/FPs)
968 if (t_nodDBp && t_nodDBp->isNewDomain(dname)) {
969 if (g_nodLog) {
970 // This should probably log to a dedicated log file
971 g_log<<Logger::Notice<<"Newly observed domain nod="<<dname.toLogString()<<endl;
972 }
973 if (!(g_nodLookupDomain.isRoot())) {
974 // Send a DNS A query to <domain>.g_nodLookupDomain
975 DNSName qname = dname;
976 vector<DNSRecord> dummy;
977 qname += g_nodLookupDomain;
978 directResolve(qname, qt, qc, dummy);
979 }
41c542ec 980 ret = true;
af1377b7
NC
981 }
982 }
41c542ec 983 return ret;
af1377b7
NC
984}
985
986static void nodAddDomain(const DNSName& dname)
987{
988 // Don't bother adding domains on the nod whitelist
989 if (!g_nodDomainWL.check(dname)) {
990 if (t_nodDBp) {
991 // This keeps the nod info up to date
992 t_nodDBp->addDomain(dname);
993 }
994 }
995}
41c542ec
NC
996
997static bool udrCheckUniqueDNSRecord(const DNSName& dname, uint16_t qtype, const DNSRecord& record)
998{
999 bool ret = false;
1000 if (record.d_place == DNSResourceRecord::ANSWER ||
1001 record.d_place == DNSResourceRecord::ADDITIONAL) {
1002 // Create a string that represent a triplet of (qname, qtype and RR[type, name, content])
1003 std::stringstream ss;
1004 ss << dname.toDNSStringLC() << ":" << qtype << ":" << qtype << ":" << record.d_type << ":" << record.d_name.toDNSStringLC() << ":" << record.d_content->getZoneRepresentation();
1005 if (t_udrDBp && t_udrDBp->isUniqueResponse(ss.str())) {
ff4d391d
NC
1006 if (g_udrLog) {
1007 // This should also probably log to a dedicated file.
1008 g_log<<Logger::Notice<<"Unique response observed: qname="<<dname.toLogString()<<" qtype="<<QType(qtype).getName()<< " rrtype=" << QType(record.d_type).getName() << " rrname=" << record.d_name.toLogString() << " rrcontent=" << record.d_content->getZoneRepresentation() << endl;
41c542ec
NC
1009 }
1010 ret = true;
1011 }
1012 }
1013 return ret;
1014}
af1377b7
NC
1015#endif /* NOD_ENABLED */
1016
d187038c 1017static void startDoResolve(void *p)
288f4aa9 1018{
9a864da4 1019 auto dc=std::unique_ptr<DNSComboWriter>(reinterpret_cast<DNSComboWriter*>(p));
288f4aa9 1020 try {
5af86fdc
RG
1021 if (t_queryring)
1022 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
92011b8f 1023
32015748 1024 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
7f7b8d55 1025 EDNSOpts edo;
5164bac3 1026 std::vector<pair<uint16_t, string> > ednsOpts;
eb9444be 1027 bool variableAnswer = dc->d_variable;
8e079f3a 1028 bool haveEDNS=false;
ca2526f5
NC
1029#ifdef NOD_ENABLED
1030 bool hasUDR = false;
1031#endif /* NOD_ENABLED */
f1db0de2
PL
1032 DNSPacketWriter::optvect_t returnedEdnsOptions; // Here we stuff all the options for the return packet
1033 uint8_t ednsExtRCode = 0;
8e079f3a 1034 if(getEDNSOpts(dc->d_mdp, &edo)) {
f1db0de2
PL
1035 haveEDNS=true;
1036 if (edo.d_version != 0) {
1037 ednsExtRCode = ERCode::BADVERS;
1038 }
1039
32015748
RG
1040 if(!dc->d_tcp) {
1041 /* rfc6891 6.2.3:
1042 "Values lower than 512 MUST be treated as equal to 512."
1043 */
1044 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
1045 }
5164bac3 1046 ednsOpts = edo.d_options;
8e079f3a 1047 haveEDNS=true;
3af35968 1048 maxanswersize -= 11; // EDNS header size
b40562da 1049
1f691b94
PL
1050 for (const auto& o : edo.d_options) {
1051 if (o.first == EDNSOptionCode::ECS && g_useIncomingECS && !dc->d_ecsParsed) {
1052 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
1053 } else if (o.first == EDNSOptionCode::NSID) {
f1db0de2 1054 const static string mode_server_id = ::arg()["server-id"];
8a42919a
PL
1055 if(mode_server_id != "disabled" && !mode_server_id.empty() &&
1056 maxanswersize > (2 + 2 + mode_server_id.size())) {
f1db0de2
PL
1057 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::NSID, mode_server_id));
1058 variableAnswer = true; // Can't packetcache an answer with NSID
1059 // Option Code and Option Length are both 2
1060 maxanswersize -= 2 + 2 + mode_server_id.size();
1061 }
b40562da
RG
1062 }
1063 }
10321a98 1064 }
b40562da
RG
1065 /* perhaps there was no EDNS or no ECS but by now we looked */
1066 dc->d_ecsParsed = true;
e325f20c 1067 vector<DNSRecord> ret;
ea634573 1068 vector<uint8_t> packet;
b23b8614 1069
ad42489c 1070 auto luaconfsLocal = g_luaconfs.getLocal();
b8470add
PL
1071 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
1072 bool wantsRPZ(true);
1fbc6dc5 1073 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
f1c7929a 1074 bool logResponse = false;
aa7929a3 1075#ifdef HAVE_PROTOBUF
63341e8d 1076 if (checkProtobufExport(luaconfsLocal)) {
b773359c 1077 logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
5cc8371b 1078 Netmask requestorNM(dc->d_source, dc->d_source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
e1c8a4bb 1079 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
0bd2e252 1080 pbMessage = RecProtoBufMessage(RecProtoBufMessage::Response, dc->d_uuid, &requestor, &dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass, dc->d_mdp.d_header.id, dc->d_tcp, 0);
c165308b 1081 pbMessage->setServerIdentity(SyncRes::s_serverID);
d362f7c1 1082 pbMessage->setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
d9d3f9c1
RG
1083 }
1084#endif /* HAVE_PROTOBUF */
ad42489c 1085
3ddb9247 1086 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
ea634573
BH
1087
1088 pw.getHeader()->aa=0;
1089 pw.getHeader()->ra=1;
c154c8a4 1090 pw.getHeader()->qr=1;
bb4bdbaf 1091 pw.getHeader()->tc=0;
ea634573 1092 pw.getHeader()->id=dc->d_mdp.d_header.id;
10321a98 1093 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
57769f13 1094 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
ea634573 1095
70fb28d9
RG
1096 /* This is the lowest TTL seen in the records of the response,
1097 so we can't cache it for longer than this value.
1098 If we have a TTL cap, this value can't be larger than the
1099 cap no matter what. */
1100 uint32_t minTTL = dc->d_ttlCap;
904d3219
PD
1101
1102 SyncRes sr(dc->d_now);
0c43f455 1103
2e921ec6 1104 bool DNSSECOK=false;
3457a2a0 1105 if(t_pdl) {
f26bf547 1106 sr.setLuaEngine(t_pdl);
3457a2a0 1107 }
9eec8c98 1108 if(g_dnssecmode != DNSSECMode::Off) {
30ee601a 1109 sr.setDoDNSSEC(true);
9eec8c98
PL
1110
1111 // Does the requestor want DNSSEC records?
d6c335ab 1112 if(edo.d_extFlags & EDNSOpts::DNSSECOK) {
9eec8c98
PL
1113 DNSSECOK=true;
1114 g_stats.dnssecQueries++;
1115 }
88c33dca
RG
1116 if (dc->d_mdp.d_header.cd) {
1117 /* Per rfc6840 section 5.9, "When processing a request with
1118 the Checking Disabled (CD) bit set, a resolver SHOULD attempt
1119 to return all response data, even data that has failed DNSSEC
1120 validation. */
1121 ++g_stats.dnssecCheckDisabledQueries;
1122 }
1123 if (dc->d_mdp.d_header.ad) {
1124 /* Per rfc6840 section 5.7, "the AD bit in a query as a signal
1125 indicating that the requester understands and is interested in the
1126 value of the AD bit in the response. This allows a requester to
1127 indicate that it understands the AD bit without also requesting
1128 DNSSEC data via the DO bit. */
1129 ++g_stats.dnssecAuthenticDataQueries;
1130 }
9eec8c98
PL
1131 } else {
1132 // Ignore the client-set CD flag
1133 pw.getHeader()->cd=0;
5b9853c9 1134 }
0c43f455
RG
1135 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
1136
4898a348 1137#ifdef HAVE_PROTOBUF
30ee601a 1138 sr.setInitialRequestId(dc->d_uuid);
b773359c 1139 sr.setOutgoingProtobufServers(t_outgoingProtobufServers);
4898a348 1140#endif
0c43f455 1141
2fe3354d 1142 sr.setQuerySource(dc->d_remote, g_useIncomingECS && !dc->d_ednssubnet.source.empty() ? boost::optional<const EDNSSubnetOpts&>(dc->d_ednssubnet) : boost::none);
57769f13 1143
904d3219 1144 bool tracedQuery=false; // we could consider letting Lua know about this too
9fc36e90 1145 bool shouldNotValidate = false;
904d3219 1146
ef3b6cd7
RG
1147 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
1148 int res = RCode::NoError;
1f1ca368 1149 DNSFilterEngine::Policy appliedPolicy;
6da513b2 1150 std::vector<DNSRecord> spoofed;
f1c7929a 1151 RecursorLua4::DNSQuestion dq(dc->d_source, dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ, logResponse);
d6c335ab 1152 dq.ednsFlags = &edo.d_extFlags;
5164bac3 1153 dq.ednsOptions = &ednsOpts;
6e505c5e
RG
1154 dq.tag = dc->d_tag;
1155 dq.discardedPolicies = &sr.d_discardedPolicies;
1156 dq.policyTags = &dc->d_policyTags;
1157 dq.appliedPolicy = &appliedPolicy;
1158 dq.currentRecords = &ret;
1159 dq.dh = &dc->d_mdp.d_header;
05c74122 1160 dq.data = dc->d_data;
67e31ebe
RG
1161#ifdef HAVE_PROTOBUF
1162 dq.requestorId = dc->d_requestorId;
590388d2 1163 dq.deviceId = dc->d_deviceId;
67e31ebe 1164#endif
ba21fcfe 1165
6cf96227
PL
1166 if(ednsExtRCode != 0) {
1167 goto sendit;
1168 }
1169
e661a20b 1170 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
56b4d21b
PD
1171 pw.getHeader()->tc = 1;
1172 res = 0;
1173 variableAnswer = true;
e661a20b
PD
1174 goto sendit;
1175 }
1176
f26bf547 1177 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
77499b05
BH
1178 sr.setLogMode(SyncRes::Store);
1179 tracedQuery=true;
1180 }
3ddb9247 1181
8f7473d7 1182
976ec823 1183 if(!g_quiet || tracedQuery) {
e6a9dde5 1184 g_log<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
976ec823 1185 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
b40562da 1186 if(!dc->d_ednssubnet.source.empty()) {
e6a9dde5 1187 g_log<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
6e986f5e 1188 }
e6a9dde5 1189 g_log<<endl;
976ec823 1190 }
c75a6a9e 1191
fededf47 1192 sr.setId(MT->getTid());
67828389 1193 if(!dc->d_mdp.d_header.rd)
c836dc19
BH
1194 sr.setCacheOnly();
1195
f26bf547
RG
1196 if (t_pdl) {
1197 t_pdl->prerpz(dq, res);
0a273054
RG
1198 }
1199
db486de5 1200 // Check if the query has a policy attached to it
0a273054 1201 if (wantsRPZ) {
5cc8371b 1202 appliedPolicy = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_source, sr.d_discardedPolicies);
0a273054 1203 }
644dd1da 1204
54be222b 1205 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
f26bf547 1206 if(!t_pdl || !t_pdl->preresolve(dq, res)) {
b8470add 1207
30ee601a 1208 sr.setWantsRPZ(wantsRPZ);
b8470add
PL
1209 if(wantsRPZ) {
1210 switch(appliedPolicy.d_kind) {
1211 case DNSFilterEngine::PolicyKind::NoAction:
1212 break;
1213 case DNSFilterEngine::PolicyKind::Drop:
1214 g_stats.policyDrops++;
7a25883a 1215 g_stats.policyResults[appliedPolicy.d_kind]++;
b8470add
PL
1216 return;
1217 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1218 g_stats.policyResults[appliedPolicy.d_kind]++;
1219 res=RCode::NXDomain;
1220 goto haveAnswer;
1221 case DNSFilterEngine::PolicyKind::NODATA:
1222 g_stats.policyResults[appliedPolicy.d_kind]++;
1223 res=RCode::NoError;
db486de5 1224 goto haveAnswer;
b8470add
PL
1225 case DNSFilterEngine::PolicyKind::Custom:
1226 g_stats.policyResults[appliedPolicy.d_kind]++;
1227 res=RCode::NoError;
6da513b2
RG
1228 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1229 for (const auto& dr : spoofed) {
1230 ret.push_back(dr);
1231 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1232 }
b8470add
PL
1233 goto haveAnswer;
1234 case DNSFilterEngine::PolicyKind::Truncate:
1235 if(!dc->d_tcp) {
1236 g_stats.policyResults[appliedPolicy.d_kind]++;
1237 res=RCode::NoError;
1238 pw.getHeader()->tc=1;
1239 goto haveAnswer;
1240 }
1241 break;
1242 }
db486de5
PL
1243 }
1244
b8470add 1245 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
44971ca0
PD
1246 try {
1247 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
9fc36e90 1248 shouldNotValidate = sr.wasOutOfBand();
44971ca0
PD
1249 }
1250 catch(ImmediateServFailException &e) {
854d44e3 1251 if(g_logCommonErrors)
e6a9dde5 1252 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
44971ca0
PD
1253 res = RCode::ServFail;
1254 }
4485aa35 1255
1921a4c2
RG
1256 dq.validationState = sr.getValidationState();
1257
b8470add
PL
1258 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
1259 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
1260 appliedPolicy = sr.d_appliedPolicy;
1261 g_stats.policyResults[appliedPolicy.d_kind]++;
1262 switch(appliedPolicy.d_kind) {
1263 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
1264 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
1265 case DNSFilterEngine::PolicyKind::Drop:
1266 g_stats.policyDrops++;
b8470add
PL
1267 return;
1268 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1269 ret.clear();
1270 res=RCode::NXDomain;
1271 goto haveAnswer;
1272
1273 case DNSFilterEngine::PolicyKind::NODATA:
1274 ret.clear();
1275 res=RCode::NoError;
1276 goto haveAnswer;
1277
1278 case DNSFilterEngine::PolicyKind::Truncate:
1279 if(!dc->d_tcp) {
1280 ret.clear();
1281 res=RCode::NoError;
1282 pw.getHeader()->tc=1;
1283 goto haveAnswer;
1284 }
1285 break;
1286
1287 case DNSFilterEngine::PolicyKind::Custom:
1288 ret.clear();
1289 res=RCode::NoError;
6da513b2
RG
1290 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1291 for (const auto& dr : spoofed) {
1292 ret.push_back(dr);
1293 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1294 }
b8470add
PL
1295 goto haveAnswer;
1296 }
1297 }
1298
1299 if (wantsRPZ) {
1f1ca368 1300 appliedPolicy = luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies);
b8470add 1301 }
db486de5 1302
f26bf547 1303 if(t_pdl) {
db486de5
PL
1304 if(res == RCode::NoError) {
1305 auto i=ret.cbegin();
1306 for(; i!= ret.cend(); ++i)
1307 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
1308 break;
f26bf547 1309 if(i == ret.cend() && t_pdl->nodata(dq, res))
3ca4e735
PL
1310 shouldNotValidate = true;
1311
db486de5 1312 }
f26bf547 1313 else if(res == RCode::NXDomain && t_pdl->nxdomain(dq, res))
3ca4e735 1314 shouldNotValidate = true;
db486de5 1315
f26bf547 1316 if(t_pdl->postresolve(dq, res))
3ca4e735 1317 shouldNotValidate = true;
db486de5
PL
1318 }
1319
b8470add
PL
1320 if (wantsRPZ) { //XXX This block is repeated, see above
1321 g_stats.policyResults[appliedPolicy.d_kind]++;
1322 switch(appliedPolicy.d_kind) {
1323 case DNSFilterEngine::PolicyKind::NoAction:
1324 break;
1325 case DNSFilterEngine::PolicyKind::Drop:
1326 g_stats.policyDrops++;
b8470add
PL
1327 return;
1328 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1329 ret.clear();
1330 res=RCode::NXDomain;
1331 goto haveAnswer;
1332
1333 case DNSFilterEngine::PolicyKind::NODATA:
1334 ret.clear();
1335 res=RCode::NoError;
1336 goto haveAnswer;
1337
1338 case DNSFilterEngine::PolicyKind::Truncate:
1339 if(!dc->d_tcp) {
1340 ret.clear();
1341 res=RCode::NoError;
1342 pw.getHeader()->tc=1;
1343 goto haveAnswer;
1344 }
1345 break;
1346
1347 case DNSFilterEngine::PolicyKind::Custom:
1348 ret.clear();
1349 res=RCode::NoError;
6da513b2
RG
1350 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1351 for (const auto& dr : spoofed) {
1352 ret.push_back(dr);
1353 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1354 }
b8470add
PL
1355 goto haveAnswer;
1356 }
644dd1da 1357 }
4485aa35 1358 }
644dd1da 1359 haveAnswer:;
3e8216c8 1360 if(res == PolicyDecision::DROP) {
e9c2ad3a 1361 g_stats.policyDrops++;
ae7e77ad 1362 return;
3ddb9247 1363 }
9cdfab64 1364 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1dc8f4d0 1365 {
85ffbc53
PD
1366 string trace(sr.getTrace());
1367 if(!trace.empty()) {
1368 vector<string> lines;
1369 boost::split(lines, trace, boost::is_any_of("\n"));
1dc8f4d0 1370 for(const string& line : lines) {
85ffbc53 1371 if(!line.empty())
e6a9dde5 1372 g_log<<Logger::Warning<< line << endl;
85ffbc53
PD
1373 }
1374 }
1375 }
3ddb9247 1376
9cdfab64 1377 if(res == -1) {
0fe1d080
PD
1378 pw.getHeader()->rcode=RCode::ServFail;
1379 // no commit here, because no record
1380 g_stats.servFails++;
1381 }
288f4aa9 1382 else {
ea634573 1383 pw.getHeader()->rcode=res;
92011b8f 1384
f3fe4ae6 1385 // Does the validation mode or query demand validation?
0c43f455 1386 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
b25cae9a 1387 try {
f3fe4ae6 1388 if(sr.doLog()) {
e6a9dde5 1389 g_log<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<endl;
2e921ec6 1390 }
4d2be65d
RG
1391
1392 auto state = sr.getValidationState();
1393
b25cae9a 1394 if(state == Secure) {
2e921ec6 1395 if(sr.doLog()) {
e6a9dde5 1396 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates correctly"<<endl;
2e921ec6 1397 }
b25cae9a 1398
1399 // Is the query source interested in the value of the ad-bit?
885c8881 1400 if (dc->d_mdp.d_header.ad || DNSSECOK)
b25cae9a 1401 pw.getHeader()->ad=1;
1402 }
1403 else if(state == Insecure) {
f3fe4ae6 1404 if(sr.doLog()) {
e6a9dde5 1405 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Insecure"<<endl;
12ce523e 1406 }
b25cae9a 1407
1408 pw.getHeader()->ad=0;
f3fe4ae6 1409 }
b25cae9a 1410 else if(state == Bogus) {
66f2e6ad
KM
1411 if(t_bogusremotes)
1412 t_bogusremotes->push_back(dc->d_source);
1413 if(t_bogusqueryring)
1414 t_bogusqueryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
c87e1876 1415 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
e6a9dde5 1416 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Bogus"<<endl;
b25cae9a 1417 }
1418
1419 // Does the query or validation mode sending out a SERVFAIL on validation errors?
885c8881 1420 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
b25cae9a 1421 if(sr.doLog()) {
e6a9dde5 1422 g_log<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
b25cae9a 1423 }
1424
1425 pw.getHeader()->rcode=RCode::ServFail;
1426 goto sendit;
1427 } else {
1428 if(sr.doLog()) {
e6a9dde5 1429 g_log<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
b25cae9a 1430 }
1431 }
1432 }
1433 }
1434 catch(ImmediateServFailException &e) {
1435 if(g_logCommonErrors)
e6a9dde5 1436 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
b25cae9a 1437 pw.getHeader()->rcode=RCode::ServFail;
1438 goto sendit;
f3fe4ae6 1439 }
b3f0ed10 1440 }
1441
c154c8a4 1442 if(ret.size()) {
92476c8b 1443 orderAndShuffle(ret);
5cc8371b 1444 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_source)) {
20d84f77 1445 stable_sort(ret.begin(), ret.end(), *sl);
3e61e7f7 1446 variableAnswer=true;
1447 }
8e079f3a 1448 }
0afa32d4
RG
1449
1450 bool needCommit = false;
8e079f3a 1451 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
3e80ebce
KM
1452 if( ! DNSSECOK &&
1453 ( i->d_type == QType::NSEC3 ||
1454 (
1455 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1456 (
1457 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1458 i->d_place != DNSResourceRecord::ANSWER
1459 )
1460 )
1461 )
1462 ) {
2e921ec6 1463 continue;
3e80ebce
KM
1464 }
1465
70fb28d9 1466 if (!addRecordToPacket(pw, *i, minTTL, dc->d_ttlCap, maxanswersize)) {
97c6d7e5
RG
1467 needCommit = false;
1468 break;
1469 }
1470 needCommit = true;
1471
41c542ec
NC
1472#ifdef NOD_ENABLED
1473 bool udr = false;
1474 if (g_udrEnabled) {
1475 udr = udrCheckUniqueDNSRecord(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, *i);
ca2526f5
NC
1476 if (!hasUDR && udr)
1477 hasUDR = true;
41c542ec
NC
1478 }
1479#endif /* NOD ENABLED */
1480
aa7929a3 1481#ifdef HAVE_PROTOBUF
b773359c 1482 if (t_protobufServers) {
41c542ec
NC
1483#ifdef NOD_ENABLED
1484 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes, udr);
1485#else
0bd2e252 1486 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes);
41c542ec 1487#endif /* NOD_ENABLED */
aa7929a3
RG
1488 }
1489#endif
ea634573 1490 }
0afa32d4 1491 if(needCommit)
8e079f3a 1492 pw.commit();
288f4aa9 1493 }
10321a98 1494 sendit:;
b3f0ed10 1495
a0ddd130 1496 if(g_useIncomingECS && dc->d_ecsFound && !sr.wasVariable() && !variableAnswer) {
9837850d 1497 // cerr<<"Stuffing in a 0 scope because answer is static"<<endl;
5a7f99b4 1498 EDNSSubnetOpts eo;
1499 eo.source = dc->d_ednssubnet.source;
1500 ComboAddress sa;
1ef18cab 1501 sa.reset();
5a7f99b4 1502 sa.sin4.sin_family = eo.source.getNetwork().sin4.sin_family;
1503 eo.scope = Netmask(sa, 0);
1504
1505 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::ECS, makeEDNSSubnetOptsString(eo)));
1506 }
1507
97c6d7e5
RG
1508 if (haveEDNS) {
1509 /* we try to add the EDNS OPT RR even for truncated answers,
1510 as rfc6891 states:
1511 "The minimal response MUST be the DNS header, question section, and an
1512 OPT record. This MUST also occur when a truncated response (using
1513 the DNS header's TC bit) is returned."
1514 */
9b60fb71 1515 pw.addOpt(512, ednsExtRCode, DNSSECOK ? EDNSOpts::DNSSECOK : 0, returnedEdnsOptions);
1f691b94 1516 pw.commit();
97c6d7e5
RG
1517 }
1518
79332bff 1519 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
5cc8371b 1520 updateResponseStats(res, dc->d_source, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
ff4d391d
NC
1521#ifdef NOD_ENABLED
1522 bool nod = false;
1523 if (g_nodEnabled) {
1524 if (nodCheckNewDomain(dc->d_mdp.d_qname))
1525 nod = true;
1526 }
1527#endif /* NOD_ENABLED */
aa7929a3 1528#ifdef HAVE_PROTOBUF
b773359c 1529 if (t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && (!appliedPolicy.d_name || appliedPolicy.d_name->empty()) && dc->d_policyTags.empty())) {
d362f7c1
RG
1530 pbMessage->setBytes(packet.size());
1531 pbMessage->setResponseCode(pw.getHeader()->rcode);
0a273054 1532 if (appliedPolicy.d_name) {
d362f7c1
RG
1533 pbMessage->setAppliedPolicy(*appliedPolicy.d_name);
1534 pbMessage->setAppliedPolicyType(appliedPolicy.d_type);
0a273054 1535 }
d362f7c1
RG
1536 pbMessage->setPolicyTags(dc->d_policyTags);
1537 pbMessage->setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1538 pbMessage->setRequestorId(dq.requestorId);
1539 pbMessage->setDeviceId(dq.deviceId);
41c542ec
NC
1540#ifdef NOD_ENABLED
1541 if (g_nodEnabled) {
ca2526f5 1542 if (nod) {
41c542ec 1543 pbMessage->setNOD(true);
ca2526f5
NC
1544 pbMessage->addPolicyTag(g_nod_pbtag);
1545 }
1546 if (hasUDR) {
1547 pbMessage->addPolicyTag(g_udr_pbtag);
1548 }
41c542ec
NC
1549 }
1550#endif /* NOD_ENABLED */
b773359c 1551 protobufLogResponse(*pbMessage);
ac238ea7 1552#ifdef NOD_ENABLED
ca2526f5
NC
1553 if (g_nodEnabled) {
1554 pbMessage->setNOD(false);
1555 pbMessage->clearUDR();
1556 if (nod)
1557 pbMessage->removePolicyTag(g_nod_pbtag);
1558 if (hasUDR)
1559 pbMessage->removePolicyTag(g_udr_pbtag);
1560 }
ac238ea7 1561#endif /* NOD_ENABLED */
aa7929a3
RG
1562 }
1563#endif
ea634573 1564 if(!dc->d_tcp) {
b71b60ee 1565 struct msghdr msgh;
1566 struct iovec iov;
1567 char cbuf[256];
1568 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
2c0af54f
PD
1569 msgh.msg_control=NULL;
1570
cbc03320 1571 if(g_fromtosockets.count(dc->d_socket)) {
fbe2a2e0 1572 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
2c0af54f 1573 }
cbc03320 1574 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
e6a9dde5 1575 g_log<<Logger::Warning<<"Sending UDP reply to client "<<dc->getRemote()<<" failed with: "<<strerror(errno)<<endl;
70fb28d9 1576
49dc532e 1577 if(variableAnswer || sr.wasVariable()) {
1ef18cab 1578 g_stats.variableResponses++;
49dc532e 1579 }
3762e821 1580 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
b5e675a7 1581 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, std::move(dc->d_query), dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
76e2b9e3 1582 string((const char*)&*packet.begin(), packet.size()),
3ddb9247 1583 g_now.tv_sec,
76e2b9e3 1584 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
d9d3f9c1 1585 min(minTTL,SyncRes::s_packetcachettl),
88694a6a 1586 dq.validationState,
08b02366
RG
1587 dc->d_ecsBegin,
1588 dc->d_ecsEnd,
4b0bdd5f 1589 std::move(pbMessage));
1051f8a9 1590 }
3762e821 1591 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
feccc9fc 1592 }
9c495589
BH
1593 else {
1594 char buf[2];
ea634573
BH
1595 buf[0]=packet.size()/256;
1596 buf[1]=packet.size()%256;
feccc9fc 1597
c038218b 1598 Utility::iovec iov[2];
feccc9fc 1599
ea634573
BH
1600 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1601 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
feccc9fc 1602
dd079764 1603 int wret=Utility::writev(dc->d_socket, iov, 2);
0e9d9ce2 1604 bool hadError=true;
feccc9fc 1605
dd079764 1606 if(wret == 0)
e6a9dde5 1607 g_log<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
dd079764 1608 else if(wret < 0 )
e6a9dde5 1609 g_log<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
dd079764 1610 else if((unsigned int)wret != 2 + packet.size())
e6a9dde5 1611 g_log<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
0e9d9ce2 1612 else
18af64a8 1613 hadError=false;
3ddb9247 1614
09e6702a 1615 // update tcp connection status, either by closing or moving to 'BYTE0'
3ddb9247 1616
09e6702a 1617 if(hadError) {
18af64a8 1618 // no need to remove us from FDM, we weren't there
c36bc97a 1619 dc->d_socket = -1;
09e6702a 1620 }
a6ae6414 1621 else {
fde296a3
RG
1622 dc->d_tcpConnection->queriesCount++;
1623 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1624 dc->d_socket = -1;
1625 }
1626 else {
1627 dc->d_tcpConnection->state=TCPConnection::BYTE0;
1628 Utility::gettimeofday(&g_now, 0); // needs to be updated
1629 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection);
1630 t_fdm->setReadTTD(dc->d_socket, g_now, g_tcpTimeout);
1631 }
0e9d9ce2 1632 }
9c495589 1633 }
2c9119cd 1634 float spent=makeFloat(sr.getNow()-dc->d_now);
1d5b3ce6 1635 if(!g_quiet) {
e6a9dde5
PL
1636 g_log<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1637 g_log<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
2c9119cd 1638 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
1639 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
1640
1641 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
e6a9dde5 1642 g_log<< ", dnssec="<<vStates[sr.getValidationState()];
2c9119cd 1643 }
1644
e6a9dde5 1645 g_log<<endl;
2c9119cd 1646
c75a6a9e 1647 }
b23b8614 1648
f7b8cffa
RG
1649 if (sr.d_outqueries || sr.d_authzonequeries) {
1650 t_RC->cacheMisses++;
1651 }
1652 else {
1653 t_RC->cacheHits++;
1654 }
2c9119cd 1655
fe213470
BH
1656 if(spent < 0.001)
1657 g_stats.answers0_1++;
1658 else if(spent < 0.010)
1659 g_stats.answers1_10++;
1660 else if(spent < 0.1)
1661 g_stats.answers10_100++;
1662 else if(spent < 1.0)
1663 g_stats.answers100_1000++;
1664 else
1665 g_stats.answersSlow++;
1666
574af7ea 1667 uint64_t newLat=(uint64_t)(spent*1000000);
b841314c 1668 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
08f3f638 1669 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
0a6b1027 1670 // no worries, we do this for packet cache hits elsewhere
19178da9 1671
1672 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
1673 if(ourtime < 1)
1674 g_stats.ourtime0_1++;
1675 else if(ourtime < 2)
1676 g_stats.ourtime1_2++;
1677 else if(ourtime < 4)
1678 g_stats.ourtime2_4++;
1679 else if(ourtime < 8)
1680 g_stats.ourtime4_8++;
1681 else if(ourtime < 16)
1682 g_stats.ourtime8_16++;
1683 else if(ourtime < 32)
1684 g_stats.ourtime16_32++;
1685 else {
1686 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1687 g_stats.ourtimeSlow++;
1688 }
042da1a1 1689 if(ourtime >= 0.0) {
1690 newLat=ourtime*1000; // usec
1691 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
1692 }
c6d04bdc 1693 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
288f4aa9 1694 }
3f81d239 1695 catch(PDNSException &ae) {
e6a9dde5 1696 g_log<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
288f4aa9 1697 }
16ce7f18
JS
1698 catch(const MOADNSException &mde) {
1699 g_log<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<mde.what()<<endl;
7b1469bb 1700 }
fdbf35ac 1701 catch(std::exception& e) {
e6a9dde5 1702 g_log<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
068c7634
PD
1703
1704 // Luawrapper nests the exception from Lua, so we unnest it here
1705 try {
1706 std::rethrow_if_nested(e);
2010ac95 1707 } catch(const std::exception& ne) {
e6a9dde5 1708 g_log<<". Extra info: "<<ne.what();
068c7634
PD
1709 } catch(...) {}
1710
e6a9dde5 1711 g_log<<endl;
c154c8a4 1712 }
288f4aa9 1713 catch(...) {
e6a9dde5 1714 g_log<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
288f4aa9 1715 }
3ddb9247 1716
ec6eacbc 1717 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
288f4aa9
BH
1718}
1719
d187038c 1720static void makeControlChannelSocket(int processNum=-1)
1d5b3ce6 1721{
2d733c0f 1722 string sockname=::arg()["socket-dir"]+"/"+s_programname;
677e2a46 1723 if(processNum >= 0)
335da0ba 1724 sockname += "."+std::to_string(processNum);
677e2a46 1725 sockname+=".controlsocket";
41f7a068 1726 s_rcc.listen(sockname);
3ddb9247 1727
387de317
BH
1728 int sockowner = -1;
1729 int sockgroup = -1;
1730
1731 if (!::arg().isEmpty("socket-group"))
1732 sockgroup=::arg().asGid("socket-group");
1733 if (!::arg().isEmpty("socket-owner"))
1734 sockowner=::arg().asUid("socket-owner");
3ddb9247 1735
f838ad8d
BH
1736 if (sockgroup > -1 || sockowner > -1) {
1737 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1738 unixDie("Failed to chown control socket");
1739 }
1740 }
387de317
BH
1741
1742 // do mode change if socket-mode is given
1743 if(!::arg().isEmpty("socket-mode")) {
1744 mode_t sockmode=::arg().asMode("socket-mode");
34c513f9
RG
1745 if(chmod(sockname.c_str(), sockmode) < 0) {
1746 unixDie("Failed to chmod control socket");
1747 }
387de317 1748 }
1d5b3ce6
BH
1749}
1750
5cc8371b 1751static void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass,
29e6303a 1752 bool& foundECS, EDNSSubnetOpts* ednssubnet, EDNSOptionViewMap* options,
5cc8371b 1753 bool& foundXPF, ComboAddress* xpfSource, ComboAddress* xpfDest)
02b47f43 1754{
59cb4a79 1755 const bool lookForXPF = xpfSource != nullptr && g_xpfRRCode != 0;
5cc8371b
RG
1756 const bool lookForECS = ednssubnet != nullptr;
1757 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(question.c_str());
02b47f43
RG
1758 size_t questionLen = question.length();
1759 unsigned int consumed=0;
1760 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1761
1762 size_t pos= sizeof(dnsheader)+consumed+4;
5cc8371b
RG
1763 const size_t headerSize = /* root */ 1 + sizeof(dnsrecordheader);
1764 const uint16_t arcount = ntohs(dh->arcount);
1765
1766 for (uint16_t arpos = 0; arpos < arcount && questionLen > (pos + headerSize) && ((lookForECS && !foundECS) || (lookForXPF && !foundXPF)); arpos++) {
1767 if (question.at(pos) != 0) {
1768 /* not an OPT or a XPF, bye. */
1769 return;
1770 }
1771
1772 pos += 1;
1773 const dnsrecordheader* drh = reinterpret_cast<const dnsrecordheader*>(&question.at(pos));
1774 pos += sizeof(dnsrecordheader);
1775
1776 if (pos >= questionLen) {
1777 return;
1778 }
1779
02b47f43 1780 /* OPT root label (1) followed by type (2) */
5cc8371b 1781 if(lookForECS && ntohs(drh->d_type) == QType::OPT) {
00b8cadc
RG
1782 if (!options) {
1783 char* ecsStart = nullptr;
1784 size_t ecsLen = 0;
5cc8371b
RG
1785 /* we need to pass the record len */
1786 int res = getEDNSOption(const_cast<char*>(reinterpret_cast<const char*>(&question.at(pos - sizeof(drh->d_clen)))), questionLen - pos + sizeof(drh->d_clen), EDNSOptionCode::ECS, &ecsStart, &ecsLen);
00b8cadc
RG
1787 if (res == 0 && ecsLen > 4) {
1788 EDNSSubnetOpts eso;
1789 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1790 *ednssubnet=eso;
5cc8371b 1791 foundECS = true;
00b8cadc
RG
1792 }
1793 }
1794 }
1795 else {
5cc8371b
RG
1796 /* we need to pass the record len */
1797 int res = getEDNSOptions(reinterpret_cast<const char*>(&question.at(pos -sizeof(drh->d_clen))), questionLen - pos + (sizeof(drh->d_clen)), *options);
00b8cadc
RG
1798 if (res == 0) {
1799 const auto& it = options->find(EDNSOptionCode::ECS);
29e6303a 1800 if (it != options->end() && !it->second.values.empty() && it->second.values.at(0).content != nullptr && it->second.values.at(0).size > 0) {
00b8cadc 1801 EDNSSubnetOpts eso;
29e6303a 1802 if(getEDNSSubnetOptsFromString(it->second.values.at(0).content, it->second.values.at(0).size, &eso)) {
00b8cadc 1803 *ednssubnet=eso;
5cc8371b 1804 foundECS = true;
00b8cadc
RG
1805 }
1806 }
02b47f43
RG
1807 }
1808 }
1809 }
59cb4a79 1810 else if (lookForXPF && ntohs(drh->d_type) == g_xpfRRCode && ntohs(drh->d_class) == QClass::IN && drh->d_ttl == 0) {
5cc8371b
RG
1811 if ((questionLen - pos) < ntohs(drh->d_clen)) {
1812 return;
1813 }
1814
1815 foundXPF = parseXPFPayload(reinterpret_cast<const char*>(&question.at(pos)), ntohs(drh->d_clen), *xpfSource, xpfDest);
1816 }
1817
1818 pos += ntohs(drh->d_clen);
02b47f43
RG
1819 }
1820}
1821
d187038c 1822static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1823{
cd989c87 1824 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
c038218b 1825
879b3f70 1826 if(conn->state==TCPConnection::BYTE0) {
2749c3fe 1827 ssize_t bytes=recv(conn->getFD(), &conn->data[0], 2, 0);
09e6702a 1828 if(bytes==1)
667f7e60 1829 conn->state=TCPConnection::BYTE1;
3ddb9247 1830 if(bytes==2) {
a0aa4f64 1831 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2749c3fe 1832 conn->data.resize(conn->qlen);
667f7e60
BH
1833 conn->bytesread=0;
1834 conn->state=TCPConnection::GETQUESTION;
09e6702a
BH
1835 }
1836 if(!bytes || bytes < 0) {
bb4bdbaf 1837 t_fdm->removeReadFD(fd);
09e6702a
BH
1838 return;
1839 }
1840 }
667f7e60 1841 else if(conn->state==TCPConnection::BYTE1) {
2749c3fe 1842 ssize_t bytes=recv(conn->getFD(), &conn->data[1], 1, 0);
09e6702a 1843 if(bytes==1) {
667f7e60 1844 conn->state=TCPConnection::GETQUESTION;
a0aa4f64 1845 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2749c3fe 1846 conn->data.resize(conn->qlen);
667f7e60 1847 conn->bytesread=0;
09e6702a
BH
1848 }
1849 if(!bytes || bytes < 0) {
1850 if(g_logCommonErrors)
e6a9dde5 1851 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected after first byte"<<endl;
bb4bdbaf 1852 t_fdm->removeReadFD(fd);
09e6702a
BH
1853 return;
1854 }
1855 }
667f7e60 1856 else if(conn->state==TCPConnection::GETQUESTION) {
2749c3fe 1857 ssize_t bytes=recv(conn->getFD(), &conn->data[conn->bytesread], conn->qlen - conn->bytesread, 0);
f9d67b41 1858 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
c0f9be19
RG
1859 if(g_logCommonErrors) {
1860 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected while reading question body"<<endl;
1861 }
bb4bdbaf 1862 t_fdm->removeReadFD(fd);
09e6702a
BH
1863 return;
1864 }
b841314c 1865 conn->bytesread+=(uint16_t)bytes;
667f7e60 1866 if(conn->bytesread==conn->qlen) {
bb4bdbaf 1867 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
879b3f70 1868
9a864da4 1869 std::unique_ptr<DNSComboWriter> dc;
09e6702a 1870 try {
9a864da4 1871 dc=std::unique_ptr<DNSComboWriter>(new DNSComboWriter(conn->data, g_now));
09e6702a 1872 }
16ce7f18 1873 catch(const MOADNSException &mde) {
3ddb9247 1874 g_stats.clientParseError++;
4957a608 1875 if(g_logCommonErrors)
e6a9dde5 1876 g_log<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4957a608 1877 return;
09e6702a 1878 }
cd989c87
BH
1879 dc->d_tcpConnection = conn; // carry the torch
1880 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
09e6702a 1881 dc->d_tcp=true;
5cc8371b
RG
1882 dc->setRemote(conn->d_remote);
1883 dc->setSource(conn->d_remote);
a6147cd2 1884 ComboAddress dest;
d38e2ba9 1885 dest.reset();
a6147cd2 1886 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1887 socklen_t len = dest.getSocklen();
1888 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1889 dc->setLocal(dest);
5cc8371b 1890 dc->setDestination(dest);
33dcceba
RG
1891 DNSName qname;
1892 uint16_t qtype=0;
1893 uint16_t qclass=0;
1894 bool needECS = false;
5cc8371b 1895 bool needXPF = g_XPFAcl.match(conn->d_remote);
67e31ebe 1896 string requestorId;
590388d2 1897 string deviceId;
16bbc6e3 1898 bool logQuery = false;
aa7929a3 1899#ifdef HAVE_PROTOBUF
02b47f43 1900 auto luaconfsLocal = g_luaconfs.getLocal();
63341e8d 1901 if (checkProtobufExport(luaconfsLocal)) {
33dcceba
RG
1902 needECS = true;
1903 }
b773359c 1904 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
33dcceba
RG
1905#endif
1906
70fb28d9 1907 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag_ffi || t_pdl->d_gettag))) {
33dcceba
RG
1908
1909 try {
29e6303a 1910 EDNSOptionViewMap ednsOptions;
5cc8371b 1911 bool xpfFound = false;
b40562da 1912 dc->d_ecsParsed = true;
5cc8371b 1913 dc->d_ecsFound = false;
2749c3fe 1914 getQNameAndSubnet(conn->data, &qname, &qtype, &qclass,
5cc8371b
RG
1915 dc->d_ecsFound, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
1916 xpfFound, needXPF ? &dc->d_source : nullptr, needXPF ? &dc->d_destination : nullptr);
02b47f43 1917
70fb28d9 1918 if(t_pdl) {
33dcceba 1919 try {
70fb28d9 1920 if (t_pdl->d_gettag_ffi) {
f1c7929a 1921 dc->d_tag = t_pdl->gettag_ffi(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, dc->d_ttlCap, dc->d_variable, logQuery);
70fb28d9
RG
1922 }
1923 else if (t_pdl->d_gettag) {
1924 dc->d_tag = t_pdl->gettag(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId);
1925 }
33dcceba 1926 }
70fb28d9 1927 catch(const std::exception& e) {
33dcceba 1928 if(g_logCommonErrors)
e6a9dde5 1929 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
33dcceba
RG
1930 }
1931 }
1932 }
70fb28d9 1933 catch(const std::exception& e)
33dcceba
RG
1934 {
1935 if(g_logCommonErrors)
e6a9dde5 1936 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
33dcceba
RG
1937 }
1938 }
f52177c3
RG
1939
1940 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(&conn->data[0]);
1941
33dcceba 1942#ifdef HAVE_PROTOBUF
b773359c 1943 if(t_protobufServers || t_outgoingProtobufServers) {
67e31ebe 1944 dc->d_requestorId = requestorId;
590388d2 1945 dc->d_deviceId = deviceId;
d61aa945 1946 dc->d_uuid = getUniqueID();
4898a348 1947 }
02b47f43 1948
b773359c 1949 if(t_protobufServers) {
02b47f43 1950 try {
02b47f43 1951
845cbf4c 1952 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && dc->d_policyTags.empty())) {
b773359c 1953 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, dc->d_source, dc->d_destination, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId);
b790ef3d 1954 }
02b47f43
RG
1955 }
1956 catch(std::exception& e) {
1957 if(g_logCommonErrors)
e6a9dde5 1958 g_log<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
02b47f43
RG
1959 }
1960 }
aa7929a3 1961#endif
5034517a
RG
1962 if(t_pdl) {
1963 if(t_pdl->ipfilter(dc->d_source, dc->d_destination, *dh)) {
1964 if(!g_quiet)
1965 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED TCP question from "<<dc->d_source.toStringWithPort()<<(dc->d_source != dc->d_remote ? " (via "+dc->d_remote.toStringWithPort()+")" : "")<<" based on policy"<<endl;
1966 g_stats.policyDrops++;
1967 return;
1968 }
1969 }
1970
879b3f70 1971 if(dc->d_mdp.d_header.qr) {
048f5db6 1972 g_stats.ignoredCount++;
c0f9be19
RG
1973 if(g_logCommonErrors) {
1974 g_log<<Logger::Error<<"Ignoring answer from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
1975 }
4957a608 1976 return;
879b3f70 1977 }
3abcdab2 1978 if(dc->d_mdp.d_header.opcode) {
048f5db6 1979 g_stats.ignoredCount++;
c0f9be19
RG
1980 if(g_logCommonErrors) {
1981 g_log<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
1982 }
c0f9be19
RG
1983 return;
1984 }
1985 else if (dh->qdcount == 0) {
1986 g_stats.emptyQueriesCount++;
1987 if(g_logCommonErrors) {
1988 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<< dc->getRemote() <<" on server socket!"<<endl;
1989 }
3abcdab2
PD
1990 return;
1991 }
09e6702a 1992 else {
4957a608
BH
1993 ++g_stats.qcounter;
1994 ++g_stats.tcpqcounter;
9a864da4 1995 MT->makeThread(startDoResolve, dc.release()); // deletes dc, will set state to BYTE0 again
4957a608 1996 return;
09e6702a
BH
1997 }
1998 }
1999 }
2000}
2001
6dcd28c3 2002//! Handle new incoming TCP connection
d187038c 2003static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
09e6702a 2004{
37d3f960 2005 ComboAddress addr;
09e6702a 2006 socklen_t addrlen=sizeof(addr);
a683e8bd 2007 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
b841314c 2008 if(newsock>=0) {
85c32340
BH
2009 if(MT->numProcesses() > g_maxMThreads) {
2010 g_stats.overCapacityDrops++;
a7b68ae7
RG
2011 try {
2012 closesocket(newsock);
2013 }
2014 catch(const PDNSException& e) {
e6a9dde5 2015 g_log<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
a7b68ae7 2016 }
85c32340
BH
2017 return;
2018 }
2019
92011b8f 2020 if(t_remotes)
2021 t_remotes->push_back(addr);
49a699c4 2022 if(t_allowFrom && !t_allowFrom->match(&addr)) {
3ddb9247 2023 if(!g_quiet)
e6a9dde5 2024 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2914b022 2025
09e6702a 2026 g_stats.unauthorizedTCP++;
a7b68ae7
RG
2027 try {
2028 closesocket(newsock);
2029 }
2030 catch(const PDNSException& e) {
e6a9dde5 2031 g_log<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
a7b68ae7 2032 }
09e6702a
BH
2033 return;
2034 }
bd0289fc 2035 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
09e6702a 2036 g_stats.tcpClientOverflow++;
a7b68ae7
RG
2037 try {
2038 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
2039 }
2040 catch(const PDNSException& e) {
e6a9dde5 2041 g_log<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
a7b68ae7 2042 }
09e6702a
BH
2043 return;
2044 }
3ddb9247 2045
3897b9e1 2046 setNonBlocking(newsock);
f26bf547 2047 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
cd989c87 2048 tc->state=TCPConnection::BYTE0;
3ddb9247 2049
cd989c87 2050 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc);
c038218b 2051
0bff046b 2052 struct timeval now;
c038218b 2053 Utility::gettimeofday(&now, 0);
cd989c87 2054 t_fdm->setReadTTD(tc->getFD(), now, g_tcpTimeout);
09e6702a
BH
2055 }
2056}
3ddb9247 2057
d187038c 2058static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
1bc3c142 2059{
183eb877 2060 gettimeofday(&g_now, 0);
b71b60ee 2061 struct timeval diff = g_now - tv;
2062 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
183eb877 2063
22cf1fda 2064 if(tv.tv_sec && delta > 1000.0) {
b71b60ee 2065 g_stats.tooOldDrops++;
2066 return 0;
2067 }
2068
1bc3c142 2069 ++g_stats.qcounter;
d7f10541
BH
2070 if(fromaddr.sin4.sin_family==AF_INET6)
2071 g_stats.ipv6qcounter++;
1bc3c142
BH
2072
2073 string response;
93f0da94 2074 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
49a3500d 2075 unsigned int ctag=0;
f57486f1 2076 uint32_t qhash = 0;
12aff2e5 2077 bool needECS = false;
5cc8371b 2078 bool needXPF = g_XPFAcl.match(fromaddr);
02b47f43 2079 std::vector<std::string> policyTags;
5fd2577f 2080 LuaContext::LuaObject data;
5cc8371b
RG
2081 ComboAddress source = fromaddr;
2082 ComboAddress destination = destaddr;
67e31ebe 2083 string requestorId;
590388d2 2084 string deviceId;
16bbc6e3 2085 bool logQuery = false;
12aff2e5 2086#ifdef HAVE_PROTOBUF
02b47f43 2087 boost::uuids::uuid uniqueId;
02b47f43 2088 auto luaconfsLocal = g_luaconfs.getLocal();
63341e8d 2089 if (checkProtobufExport(luaconfsLocal)) {
d61aa945 2090 uniqueId = getUniqueID();
02b47f43 2091 needECS = true;
63341e8d 2092 } else if (checkOutgoingProtobufExport(luaconfsLocal)) {
d61aa945 2093 uniqueId = getUniqueID();
02b47f43 2094 }
b773359c
RG
2095 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
2096 bool logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
12aff2e5 2097#endif
b40562da
RG
2098 EDNSSubnetOpts ednssubnet;
2099 bool ecsFound = false;
2100 bool ecsParsed = false;
08b02366
RG
2101 uint16_t ecsBegin = 0;
2102 uint16_t ecsEnd = 0;
70fb28d9
RG
2103 uint32_t ttlCap = std::numeric_limits<uint32_t>::max();
2104 bool variable = false;
1bc3c142 2105 try {
02b47f43
RG
2106 DNSName qname;
2107 uint16_t qtype=0;
2108 uint16_t qclass=0;
1bc3c142 2109 uint32_t age;
c15ff3df 2110 bool qnameParsed=false;
8f7473d7 2111#ifdef MALLOC_TRACE
2112 /*
2113 static uint64_t last=0;
2114 if(!last)
2115 g_mtracer->clearAllocators();
2116 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
2117 last=g_mtracer->getAllocs();
2118 cout<<g_mtracer->topAllocatorsString()<<endl;
2119 g_mtracer->clearAllocators();
2120 */
2121#endif
55a1378f 2122
70fb28d9 2123 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag || t_pdl->d_gettag_ffi))) {
b2eacd67 2124 try {
29e6303a 2125 EDNSOptionViewMap ednsOptions;
5cc8371b
RG
2126 bool xpfFound = false;
2127
2128 ecsFound = false;
2129
2130 getQNameAndSubnet(question, &qname, &qtype, &qclass,
2131 ecsFound, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2132 xpfFound, needXPF ? &source : nullptr, needXPF ? &destination : nullptr);
2133
c15ff3df
RG
2134 qnameParsed = true;
2135 ecsParsed = true;
12aff2e5 2136
70fb28d9 2137 if(t_pdl) {
12aff2e5 2138 try {
70fb28d9 2139 if (t_pdl->d_gettag_ffi) {
f1c7929a 2140 ctag = t_pdl->gettag_ffi(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, ttlCap, variable, logQuery);
70fb28d9
RG
2141 }
2142 else if (t_pdl->d_gettag) {
2143 ctag = t_pdl->gettag(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId);
2144 }
12aff2e5 2145 }
70fb28d9 2146 catch(const std::exception& e) {
12aff2e5 2147 if(g_logCommonErrors)
e6a9dde5 2148 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 2149 }
8ea8c302 2150 }
b2eacd67 2151 }
70fb28d9 2152 catch(const std::exception& e)
b2eacd67 2153 {
2154 if(g_logCommonErrors)
e6a9dde5 2155 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 2156 }
12ce523e 2157 }
3ddb9247 2158
02b47f43 2159 bool cacheHit = false;
1fbc6dc5 2160 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
02b47f43 2161#ifdef HAVE_PROTOBUF
b773359c 2162 if (t_protobufServers) {
d362f7c1 2163 pbMessage = RecProtoBufMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
c165308b 2164 pbMessage->setServerIdentity(SyncRes::s_serverID);
845cbf4c 2165 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && policyTags.empty())) {
b773359c 2166 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, source, destination, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId);
b790ef3d 2167 }
d9d3f9c1
RG
2168 }
2169#endif /* HAVE_PROTOBUF */
02b47f43 2170
70fb28d9
RG
2171 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
2172 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
2173 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
8467ec26 2174 vState valState;
c15ff3df 2175 if (qnameParsed) {
08b02366 2176 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
c15ff3df
RG
2177 }
2178 else {
08b02366 2179 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, &qtype, &qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
c15ff3df
RG
2180 }
2181
d9d3f9c1 2182 if (cacheHit) {
8467ec26
KM
2183 if(valState == Bogus) {
2184 if(t_bogusremotes)
2185 t_bogusremotes->push_back(source);
2186 if(t_bogusqueryring)
2187 t_bogusqueryring->push_back(make_pair(qname, qtype));
2188 }
2189
d9d3f9c1 2190#ifdef HAVE_PROTOBUF
b773359c 2191 if(t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && pbMessage->getAppliedPolicy().empty() && pbMessage->getPolicyTags().empty())) {
5cc8371b 2192 Netmask requestorNM(source, source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
e1c8a4bb 2193 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
d362f7c1
RG
2194 pbMessage->update(uniqueId, &requestor, &destination, false, dh->id);
2195 pbMessage->setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
2196 pbMessage->setQueryTime(g_now.tv_sec, g_now.tv_usec);
2197 pbMessage->setRequestorId(requestorId);
2198 pbMessage->setDeviceId(deviceId);
b773359c 2199 protobufLogResponse(*pbMessage);
02b47f43 2200 }
d9d3f9c1 2201#endif /* HAVE_PROTOBUF */
49a3500d 2202 if(!g_quiet)
e6a9dde5 2203 g_log<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<endl;
8f7473d7 2204
1bc3c142
BH
2205 g_stats.packetCacheHits++;
2206 SyncRes::s_queries++;
2207 ageDNSPacket(response, age);
b71b60ee 2208 struct msghdr msgh;
2209 struct iovec iov;
2210 char cbuf[256];
2211 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
2c0af54f
PD
2212 msgh.msg_control=NULL;
2213
cbc03320 2214 if(g_fromtosockets.count(fd)) {
fbe2a2e0 2215 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
b71b60ee 2216 }
cbc03320 2217 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
e6a9dde5 2218 g_log<<Logger::Warning<<"Sending UDP reply to client "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" failed with: "<<strerror(errno)<<endl;
b71b60ee 2219
97bee66d 2220 if(response.length() >= sizeof(struct dnsheader)) {
dd079764
RG
2221 struct dnsheader tmpdh;
2222 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
5cc8371b 2223 updateResponseStats(tmpdh.rcode, source, response.length(), 0, 0);
97bee66d 2224 }
08f3f638 2225 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
19178da9 2226 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
1bc3c142
BH
2227 return 0;
2228 }
3ddb9247 2229 }
1bc3c142 2230 catch(std::exception& e) {
e6a9dde5 2231 g_log<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1bc3c142
BH
2232 return 0;
2233 }
3ddb9247 2234
f26bf547 2235 if(t_pdl) {
5cc8371b 2236 if(t_pdl->ipfilter(source, destination, *dh)) {
4ea94941 2237 if(!g_quiet)
e6a9dde5 2238 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" based on policy"<<endl;
4ea94941 2239 g_stats.policyDrops++;
2240 return 0;
2241 }
2242 }
2243
1bc3c142 2244 if(MT->numProcesses() > g_maxMThreads) {
461df9d2 2245 if(!g_quiet)
e6a9dde5 2246 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<", over capacity"<<endl;
461df9d2 2247
1bc3c142
BH
2248 g_stats.overCapacityDrops++;
2249 return 0;
2250 }
3ddb9247 2251
9a864da4 2252 auto dc = std::unique_ptr<DNSComboWriter>(new DNSComboWriter(question, g_now, std::move(policyTags), std::move(data)));
1bc3c142 2253 dc->setSocket(fd);
49a3500d 2254 dc->d_tag=ctag;
e9f63d47 2255 dc->d_qhash=qhash;
5cc8371b
RG
2256 dc->setRemote(fromaddr);
2257 dc->setSource(source);
b71b60ee 2258 dc->setLocal(destaddr);
5cc8371b 2259 dc->setDestination(destination);
1bc3c142 2260 dc->d_tcp=false;
b40562da
RG
2261 dc->d_ecsFound = ecsFound;
2262 dc->d_ecsParsed = ecsParsed;
08b02366
RG
2263 dc->d_ecsBegin = ecsBegin;
2264 dc->d_ecsEnd = ecsEnd;
b40562da 2265 dc->d_ednssubnet = ednssubnet;
70fb28d9
RG
2266 dc->d_ttlCap = ttlCap;
2267 dc->d_variable = variable;
aa7929a3 2268#ifdef HAVE_PROTOBUF
b773359c 2269 if (t_protobufServers || t_outgoingProtobufServers) {
5164bac3 2270 dc->d_uuid = std::move(uniqueId);
d9d3f9c1 2271 }
67e31ebe 2272 dc->d_requestorId = requestorId;
590388d2 2273 dc->d_deviceId = deviceId;
aa7929a3
RG
2274#endif
2275
9a864da4 2276 MT->makeThread(startDoResolve, (void*) dc.release()); // deletes dc
1bc3c142 2277 return 0;
3ddb9247
PD
2278}
2279
b71b60ee 2280
d187038c 2281static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
5db529f8 2282{
a683e8bd 2283 ssize_t len;
12c2f2b9 2284 static const size_t maxIncomingQuerySize = 512;
04896b99 2285 static thread_local std::string data;
5db529f8 2286 ComboAddress fromaddr;
b71b60ee 2287 struct msghdr msgh;
2288 struct iovec iov;
2289 char cbuf[256];
390f1dab 2290 bool firstQuery = true;
b71b60ee 2291
c0a00acd
RG
2292 for(size_t queriesCounter = 0; queriesCounter < s_maxUDPQueriesPerRound; queriesCounter++) {
2293 data.resize(maxIncomingQuerySize);
2294 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
2295 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), &data[0], data.size(), &fromaddr);
b71b60ee 2296
c0a00acd 2297 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
390f1dab 2298
c0a00acd 2299 firstQuery = false;
390f1dab 2300
c0a00acd
RG
2301 if (static_cast<size_t>(len) < sizeof(dnsheader)) {
2302 g_stats.ignoredCount++;
2303 if (!g_quiet) {
2304 g_log<<Logger::Error<<"Ignoring too-short ("<<std::to_string(len)<<") query from "<<fromaddr.toString()<<endl;
2305 }
2306 return;
04896b99 2307 }
04896b99 2308
c0a00acd
RG
2309 if (msgh.msg_flags & MSG_TRUNC) {
2310 g_stats.truncatedDrops++;
2311 if (!g_quiet) {
2312 g_log<<Logger::Error<<"Ignoring truncated query from "<<fromaddr.toString()<<endl;
2313 }
2314 return;
ba892c7f 2315 }
b23b8614 2316
c0a00acd
RG
2317 if(t_remotes) {
2318 t_remotes->push_back(fromaddr);
2319 }
81859ba5 2320
c0a00acd
RG
2321 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
2322 if(!g_quiet) {
2323 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2324 }
3ddb9247 2325
c0a00acd
RG
2326 g_stats.unauthorizedUDP++;
2327 return;
5db529f8 2328 }
c0a00acd
RG
2329 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
2330 if(!fromaddr.sin4.sin_port) { // also works for IPv6
2331 if(!g_quiet) {
2332 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
2333 }
2334
2335 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
2336 return;
3abcdab2 2337 }
c0a00acd
RG
2338
2339 try {
2340 data.resize(static_cast<size_t>(len));
2341 dnsheader* dh=(dnsheader*)&data[0];
2342
2343 if(dh->qr) {
2344 g_stats.ignoredCount++;
2345 if(g_logCommonErrors) {
2346 g_log<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
2347 }
2348 }
2349 else if(dh->opcode) {
2350 g_stats.ignoredCount++;
2351 if(g_logCommonErrors) {
2352 g_log<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
2353 }
a6147cd2 2354 }
c0f9be19
RG
2355 else if (dh->qdcount == 0) {
2356 g_stats.emptyQueriesCount++;
2357 if(g_logCommonErrors) {
2358 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<<fromaddr.toString()<<" on server socket!"<<endl;
2359 }
2360 }
a6147cd2 2361 else {
c0a00acd
RG
2362 struct timeval tv={0,0};
2363 HarvestTimestamp(&msgh, &tv);
2364 ComboAddress dest;
2365 dest.reset(); // this makes sure we ignore this address if not returned by recvmsg above
2366 auto loc = rplookup(g_listenSocketsAddresses, fd);
2367 if(HarvestDestinationAddress(&msgh, &dest)) {
2368 // but.. need to get port too
2369 if(loc) {
2370 dest.sin4.sin_port = loc->sin4.sin_port;
2371 }
a6147cd2 2372 }
2373 else {
c0a00acd
RG
2374 if(loc) {
2375 dest = *loc;
2376 }
2377 else {
2378 dest.sin4.sin_family = fromaddr.sin4.sin_family;
2379 socklen_t slen = dest.getSocklen();
2380 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
2381 }
2382 }
2383
2384 if(g_weDistributeQueries) {
2385 distributeAsyncFunction(data, boost::bind(doProcessUDPQuestion, data, fromaddr, dest, tv, fd));
2386 }
2387 else {
2388 doProcessUDPQuestion(data, fromaddr, dest, tv, fd);
a6147cd2 2389 }
2390 }
c0a00acd 2391 }
16ce7f18 2392 catch(const MOADNSException &mde) {
c0a00acd
RG
2393 g_stats.clientParseError++;
2394 if(g_logCommonErrors) {
2395 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
2396 }
2397 }
2398 catch(const std::runtime_error& e) {
2399 g_stats.clientParseError++;
2400 if(g_logCommonErrors) {
2401 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
2402 }
5db529f8
BH
2403 }
2404 }
c0a00acd
RG
2405 else {
2406 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
2407 if(firstQuery && errno == EAGAIN) {
2408 g_stats.noPacketError++;
2409 }
390f1dab 2410
c0a00acd
RG
2411 break;
2412 }
ac0e821b 2413 }
5db529f8
BH
2414}
2415
adb6cd72 2416static void makeTCPServerSockets(deferredAdd_t& deferredAdds, std::set<int>& tcpSockets)
9c495589 2417{
37d3f960 2418 int fd;
f28307ad 2419 vector<string>locals;
2e3d8a19 2420 stringtok(locals,::arg()["local-address"]," ,");
9c495589 2421
f28307ad 2422 if(locals.empty())
3f81d239 2423 throw PDNSException("No local address specified");
3ddb9247 2424
f28307ad 2425 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
2426 ServiceTuple st;
2427 st.port=::arg().asNum("local-port");
2428 parseService(*i, st);
3ddb9247 2429
32252594
BH
2430 ComboAddress sin;
2431
d38e2ba9 2432 sin.reset();
37d3f960 2433 sin.sin4.sin_family = AF_INET;
32252594 2434 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 2435 sin.sin6.sin6_family = AF_INET6;
f71bc087 2436 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 2437 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
37d3f960
BH
2438 }
2439
2440 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
3ddb9247 2441 if(fd<0)
3f81d239 2442 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
f28307ad 2443
3897b9e1 2444 setCloseOnExec(fd);
a903b39c 2445
f28307ad 2446 int tmp=1;
810ff705 2447 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
e6a9dde5 2448 g_log<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
c8ddb7c2 2449 exit(1);
f28307ad 2450 }
0dfa94ab 2451 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
e6a9dde5 2452 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
0dfa94ab 2453 }
2454
c8ddb7c2 2455#ifdef TCP_DEFER_ACCEPT
38ac0821 2456 if(setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
37d3f960 2457 if(i==locals.begin())
377602e3 2458 g_log<<Logger::Info<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
c8ddb7c2
BH
2459 }
2460#endif
2461
fec7dd5a
SS
2462 if( ::arg().mustDo("non-local-bind") )
2463 Utility::setBindAny(AF_INET, fd);
2464
2332f42d 2465#ifdef SO_REUSEPORT
810ff705
RG
2466 if(g_reusePort) {
2467 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
2332f42d 2468 throw PDNSException("SO_REUSEPORT: "+stringerror());
2469 }
2470#endif
2471
0735b17e
RG
2472 if (::arg().asNum("tcp-fast-open") > 0) {
2473#ifdef TCP_FASTOPEN
2474 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
2475 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
e6a9dde5 2476 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno)<<endl;
0735b17e
RG
2477 }
2478#else
e6a9dde5 2479 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
0735b17e
RG
2480#endif
2481 }
2482
32252594 2483 sin.sin4.sin_port = htons(st.port);
a683e8bd 2484 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
3ddb9247 2485 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
3f81d239 2486 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
3ddb9247 2487
3897b9e1 2488 setNonBlocking(fd);
49a699c4 2489 setSocketSendBuffer(fd, 65000);
37d3f960 2490 listen(fd, 128);
b243ca3b 2491 deferredAdds.push_back(make_pair(fd, handleNewTCPQuestion));
adb6cd72
RG
2492 tcpSockets.insert(fd);
2493
84433b79 2494 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
2495 // - fd is not that which we know here, but returned from accept()
3ddb9247 2496 if(sin.sin4.sin_family == AF_INET)
377602e3 2497 g_log<<Logger::Info<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 2498 else
377602e3 2499 g_log<<Logger::Info<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 2500 }
9c495589
BH
2501}
2502
b243ca3b 2503static void makeUDPServerSockets(deferredAdd_t& deferredAdds)
288f4aa9 2504{
fec7dd5a 2505 int one=1;
f28307ad 2506 vector<string>locals;
2e3d8a19 2507 stringtok(locals,::arg()["local-address"]," ,");
288f4aa9 2508
f28307ad 2509 if(locals.empty())
3f81d239 2510 throw PDNSException("No local address specified");
3ddb9247 2511
f28307ad 2512 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
2513 ServiceTuple st;
2514 st.port=::arg().asNum("local-port");
2515 parseService(*i, st);
2516
37d3f960 2517 ComboAddress sin;
996c89cc 2518
d38e2ba9 2519 sin.reset();
37d3f960 2520 sin.sin4.sin_family = AF_INET;
32252594 2521 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 2522 sin.sin6.sin6_family = AF_INET6;
f71bc087 2523 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 2524 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
37d3f960 2525 }
3ddb9247 2526
bb4bdbaf 2527 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
d3b4137e 2528 if(fd < 0) {
3f81d239 2529 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
d3b4137e 2530 }
915b0c39 2531 if (!setSocketTimestamps(fd))
e6a9dde5 2532 g_log<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
0dfa94ab 2533
b71b60ee 2534 if(IsAnyAddress(sin)) {
cbc03320 2535 if(sin.sin4.sin_family == AF_INET)
2536 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
2537 g_fromtosockets.insert(fd);
757d3179 2538#ifdef IPV6_RECVPKTINFO
cbc03320 2539 if(sin.sin4.sin_family == AF_INET6)
2540 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
2541 g_fromtosockets.insert(fd);
757d3179 2542#endif
0dfa94ab 2543 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
e6a9dde5 2544 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
0dfa94ab 2545 }
b71b60ee 2546 }
fec7dd5a
SS
2547 if( ::arg().mustDo("non-local-bind") )
2548 Utility::setBindAny(AF_INET6, fd);
2549
3897b9e1 2550 setCloseOnExec(fd);
a903b39c 2551
4e9a20e6 2552 setSocketReceiveBuffer(fd, 250000);
32252594 2553 sin.sin4.sin_port = htons(st.port);
37d3f960 2554
2332f42d 2555
2573d4a6 2556#ifdef SO_REUSEPORT
810ff705 2557 if(g_reusePort) {
2332f42d 2558 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
2559 throw PDNSException("SO_REUSEPORT: "+stringerror());
2560 }
2561#endif
a683e8bd 2562 socklen_t socklen=sin.getSocklen();
3ddb9247 2563 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
335da0ba 2564 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
3ddb9247 2565
3897b9e1 2566 setNonBlocking(fd);
c2136bf0 2567
b243ca3b 2568 deferredAdds.push_back(make_pair(fd, handleNewUDPQuestion));
40a3dd64 2569 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
3ddb9247 2570 if(sin.sin4.sin_family == AF_INET)
377602e3 2571 g_log<<Logger::Info<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 2572 else
377602e3 2573 g_log<<Logger::Info<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 2574 }
c836dc19 2575}
caa6eefa 2576
d187038c 2577static void daemonize(void)
c836dc19
BH
2578{
2579 if(fork())
2580 exit(0); // bye bye
3ddb9247
PD
2581
2582 setsid();
c836dc19 2583
27a5ead5 2584 int i=open("/dev/null",O_RDWR); /* open stdin */
3ddb9247 2585 if(i < 0)
e6a9dde5 2586 g_log<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
27a5ead5
BH
2587 else {
2588 dup2(i,0); /* stdin */
2589 dup2(i,1); /* stderr */
2590 dup2(i,2); /* stderr */
2591 close(i);
2592 }
288f4aa9 2593}
caa6eefa 2594
d187038c 2595static void usr1Handler(int)
c75a6a9e
BH
2596{
2597 statsWanted=true;
2598}
ae1b2e98 2599
d187038c 2600static void usr2Handler(int)
9170fbaf 2601{
f1f34cc2 2602 g_quiet= !g_quiet;
2603 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
2604 ::arg().set("quiet")=g_quiet ? "" : "no";
9170fbaf
BH
2605}
2606
d187038c 2607static void doStats(void)
c75a6a9e 2608{
16beeaa4
BH
2609 static time_t lastOutputTime;
2610 static uint64_t lastQueryCount;
d299d4f5 2611
2612 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
2613 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
3ddb9247 2614
d299d4f5 2615 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
e6a9dde5 2616 g_log<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
3427fa8a
BH
2617 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
2618 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
3ddb9247
PD
2619 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
2620
e6a9dde5 2621 g_log<<Logger::Notice<<"stats: throttle map: "
3427fa8a 2622 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
3ddb9247 2623 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
e6a9dde5
PL
2624 g_log<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2625 g_log<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
525b8a7c 2626 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
e6a9dde5 2627 g_log<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
3427fa8a 2628 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
81883dcc 2629
e6a9dde5 2630 //g_log<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
16beeaa4 2631 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
3ddb9247 2632
e6a9dde5 2633 g_log<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
16beeaa4 2634 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
3ddb9247 2635
16beeaa4
BH
2636 time_t now = time(0);
2637 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
e6a9dde5 2638 g_log<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
16beeaa4
BH
2639 }
2640 lastOutputTime = now;
2641 lastQueryCount = SyncRes::s_queries;
c75a6a9e 2642 }
3ddb9247 2643 else if(statsWanted)
e6a9dde5 2644 g_log<<Logger::Notice<<"stats: no stats yet!"<<endl;
7becf07f 2645
c75a6a9e
BH
2646 statsWanted=false;
2647}
c836dc19 2648
29f0b1ce 2649static void houseKeeping(void *)
c836dc19 2650{
e4ae55e5 2651 static thread_local time_t last_rootupdate, last_prune, last_secpoll, last_trustAnchorUpdate{0};
3337c2f7
RG
2652 static thread_local int cleanCounter=0;
2653 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
e4ae55e5
PL
2654 auto luaconfsLocal = g_luaconfs.getLocal();
2655
2656 if (last_trustAnchorUpdate == 0 && !luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0) {
2657 // Loading the Lua config file already "refreshed" the TAs
2658 last_trustAnchorUpdate = g_now.tv_sec + luaconfsLocal->trustAnchorFileInfo.interval * 3600;
2659 }
2660
cc59bce6 2661 try {
2662 if(s_running)
2663 return;
2664 s_running=true;
3ddb9247 2665
cc59bce6 2666 struct timeval now;
2667 Utility::gettimeofday(&now, 0);
3ddb9247
PD
2668
2669 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
cc59bce6 2670 DTime dt;
2671 dt.setTimeval(now);
a6f7f5fe 2672 t_RC->doPrune(g_maxCacheEntries / g_numThreads); // this function is local to a thread, so fine anyhow
2673 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
3ddb9247 2674
a6f7f5fe 2675 SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
3ddb9247 2676
cc59bce6 2677 if(!((cleanCounter++)%40)) { // this is a full scan!
2678 time_t limit=now.tv_sec-300;
a712cb56 2679 SyncRes::pruneNSSpeeds(limit);
cc59bce6 2680 }
2681 last_prune=time(0);
d67620e4 2682 }
3ddb9247 2683
cc59bce6 2684 if(now.tv_sec - last_rootupdate > 7200) {
30ee601a 2685 int res = SyncRes::getRootNS(g_now, nullptr);
7836f7b4
PL
2686 if (!res)
2687 last_rootupdate=now.tv_sec;
cc59bce6 2688 }
3ddb9247 2689
b243ca3b 2690 if(isHandlerThread()) {
3ddb9247 2691
cc59bce6 2692 if(now.tv_sec - last_secpoll >= 3600) {
2693 try {
2694 doSecPoll(&last_secpoll);
2695 }
581d4ea3 2696 catch(std::exception& e)
2697 {
e6a9dde5 2698 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
581d4ea3 2699 }
47e9b74f 2700 catch(PDNSException& e)
2701 {
e6a9dde5 2702 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
47e9b74f 2703 }
d0992a65
CH
2704 catch(ImmediateServFailException &e)
2705 {
e6a9dde5 2706 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
d0992a65 2707 }
47e9b74f 2708 catch(...)
2709 {
e6a9dde5 2710 g_log<<Logger::Error<<"Exception while performing security poll"<<endl;
47e9b74f 2711 }
18b73338 2712 }
e4ae55e5
PL
2713
2714 if (!luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0 &&
2715 g_now.tv_sec - last_trustAnchorUpdate >= (luaconfsLocal->trustAnchorFileInfo.interval * 3600)) {
2716 g_log<<Logger::Debug<<"Refreshing Trust Anchors from file"<<endl;
2717 try {
2718 map<DNSName, dsmap_t> dsAnchors;
2719 if (updateTrustAnchorsFromFile(luaconfsLocal->trustAnchorFileInfo.fname, dsAnchors)) {
2720 g_luaconfs.modify([&dsAnchors](LuaConfigItems& lci) {
2721 lci.dsAnchors = dsAnchors;
2722 });
2723 }
2724 last_trustAnchorUpdate = now.tv_sec;
2725 } catch (const PDNSException &pe) {
2726 g_log<<Logger::Error<<"Unable to update Trust Anchors: "<<pe.reason<<endl;
2727 }
2728 }
2729 s_running=false;
d67620e4 2730 }
2731 }
cc59bce6 2732 catch(PDNSException& ae)
2733 {
2734 s_running=false;
e6a9dde5 2735 g_log<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
cc59bce6 2736 throw;
2737 }
779828c4 2738}
d6d5dea7 2739
d187038c 2740static void makeThreadPipes()
49a699c4 2741{
b243ca3b
RG
2742 /* thread 0 is the handler / SNMP, we start at 1 */
2743 for(unsigned int n = 1; n <= (g_numWorkerThreads + g_numDistributorThreads); ++n) {
2744 auto& threadInfos = s_threadInfos.at(n);
2745
49a699c4
BH
2746 int fd[2];
2747 if(pipe(fd) < 0)
2748 unixDie("Creating pipe for inter-thread communications");
3ddb9247 2749
b243ca3b
RG
2750 threadInfos.pipes.readToThread = fd[0];
2751 threadInfos.pipes.writeToThread = fd[1];
3ddb9247 2752
49a699c4
BH
2753 if(pipe(fd) < 0)
2754 unixDie("Creating pipe for inter-thread communications");
b243ca3b
RG
2755
2756 threadInfos.pipes.readFromThread = fd[0];
2757 threadInfos.pipes.writeFromThread = fd[1];
3ddb9247 2758
cf8cda18
RG
2759 if(pipe(fd) < 0)
2760 unixDie("Creating pipe for inter-thread communications");
d10307c5 2761
b243ca3b
RG
2762 threadInfos.pipes.readQueriesToThread = fd[0];
2763 threadInfos.pipes.writeQueriesToThread = fd[1];
2764
2765 if (!setNonBlocking(threadInfos.pipes.writeQueriesToThread)) {
d10307c5
RG
2766 unixDie("Making pipe for inter-thread communications non-blocking");
2767 }
49a699c4
BH
2768 }
2769}
2770
00c9b8c1
BH
2771struct ThreadMSG
2772{
2773 pipefunc_t func;
2774 bool wantAnswer;
2775};
2776
b4e76a18 2777void broadcastFunction(const pipefunc_t& func)
49a699c4 2778{
b243ca3b
RG
2779 /* This function might be called by the worker with t_id 0 during startup
2780 for the initialization of ACLs and domain maps. After that it should only
2781 be called by the handler. */
d77abca1 2782
b243ca3b
RG
2783 if (s_threadInfos.empty() && isHandlerThread()) {
2784 /* the handler and distributors will call themselves below, but
2785 during startup we get called while s_threadInfos has not been
2786 populated yet to update the ACL or domain maps, so we need to
2787 handle that case.
2788 */
2789 func();
2790 }
b4e76a18 2791
b243ca3b
RG
2792 unsigned int n = 0;
2793 for (const auto& threadInfo : s_threadInfos) {
49a699c4 2794 if(n++ == t_id) {
b4e76a18 2795 func(); // don't write to ourselves!
49a699c4
BH
2796 continue;
2797 }
3ddb9247 2798
00c9b8c1
BH
2799 ThreadMSG* tmsg = new ThreadMSG();
2800 tmsg->func = func;
2801 tmsg->wantAnswer = true;
b243ca3b 2802 if(write(threadInfo.pipes.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
b841314c 2803 delete tmsg;
b243ca3b 2804
49a699c4 2805 unixDie("write to thread pipe returned wrong size or error");
b841314c 2806 }
3ddb9247 2807
49467864 2808 string* resp = nullptr;
b243ca3b 2809 if(read(threadInfo.pipes.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
49a699c4 2810 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 2811
49a699c4 2812 if(resp) {
49a699c4 2813 delete resp;
49467864 2814 resp = nullptr;
49a699c4
BH
2815 }
2816 }
2817}
06ea9015 2818
592d7ade 2819static bool trySendingQueryToWorker(unsigned int target, ThreadMSG* tmsg)
00c9b8c1 2820{
b243ca3b
RG
2821 const auto& targetInfo = s_threadInfos[target];
2822 if(!targetInfo.isWorker) {
2823 g_log<<Logger::Error<<"distributeAsyncFunction() tried to assign a query to a non-worker thread"<<endl;
d77abca1 2824 exit(1);
00c9b8c1 2825 }
d77abca1 2826
b243ca3b 2827 const auto& tps = targetInfo.pipes;
3ddb9247 2828
cf8cda18
RG
2829 ssize_t written = write(tps.writeQueriesToThread, &tmsg, sizeof(tmsg));
2830 if (written > 0) {
2831 if (static_cast<size_t>(written) != sizeof(tmsg)) {
2832 delete tmsg;
2833 unixDie("write to thread pipe returned wrong size or error");
2834 }
2835 }
2836 else {
2837 int error = errno;
cf8cda18 2838 if (error == EAGAIN || error == EWOULDBLOCK) {
592d7ade 2839 return false;
cf8cda18 2840 } else {
592d7ade 2841 delete tmsg;
17634427 2842 unixDie("write to thread pipe returned wrong size or error:" + std::to_string(error));
cf8cda18 2843 }
b841314c 2844 }
592d7ade
RG
2845
2846 return true;
2847}
2848
2849// This function is only called by the distributor threads, when pdns-distributes-queries is set
2850void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
2851{
2852 if (!isDistributorThread()) {
2853 g_log<<Logger::Error<<"distributeAsyncFunction() has been called by a worker ("<<t_id<<")"<<endl;
2854 exit(1);
2855 }
2856
2857 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
2858 unsigned int target = /* skip handler */ 1 + g_numDistributorThreads + (hash % g_numWorkerThreads);
2859
2860 ThreadMSG* tmsg = new ThreadMSG();
2861 tmsg->func = func;
2862 tmsg->wantAnswer = false;
2863
2864 if (!trySendingQueryToWorker(target, tmsg)) {
2865 /* if this function failed but did not raise an exception, it means that the pipe
2866 was full, let's try another one */
2867 unsigned int newTarget = 0;
2868 do {
2869 newTarget = /* skip handler */ 1 + g_numDistributorThreads + dns_random(g_numWorkerThreads);
2870 } while (newTarget == target);
2871
2872 if (!trySendingQueryToWorker(newTarget, tmsg)) {
2873 g_stats.queryPipeFullDrops++;
2874 delete tmsg;
2875 }
2876 }
00c9b8c1 2877}
3427fa8a 2878
d187038c 2879static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
49a699c4 2880{
f26bf547 2881 ThreadMSG* tmsg = nullptr;
3ddb9247 2882
cf8cda18 2883 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread || fd == readQueriesToThread
49a699c4
BH
2884 unixDie("read from thread pipe returned wrong size or error");
2885 }
3ddb9247 2886
2f22827a 2887 void *resp=0;
2888 try {
2889 resp = tmsg->func();
2890 }
2891 catch(std::exception& e) {
6d2010a8 2892 if(g_logCommonErrors)
e6a9dde5 2893 g_log<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2f22827a 2894 }
2895 catch(PDNSException& e) {
6d2010a8 2896 if(g_logCommonErrors)
e6a9dde5 2897 g_log<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2f22827a 2898 }
d7c676a5 2899 if(tmsg->wantAnswer) {
b243ca3b
RG
2900 const auto& threadInfo = s_threadInfos.at(t_id);
2901 if(write(threadInfo.pipes.writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
d7c676a5 2902 delete tmsg;
00c9b8c1 2903 unixDie("write to thread pipe returned wrong size or error");
d7c676a5
RG
2904 }
2905 }
3ddb9247 2906
00c9b8c1 2907 delete tmsg;
49a699c4 2908}
09e6702a 2909
13034931
BH
2910template<class T> void *voider(const boost::function<T*()>& func)
2911{
2912 return func();
2913}
2914
b3b5459d
BH
2915vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
2916{
2917 a.insert(a.end(), b.begin(), b.end());
2918 return a;
2919}
2920
92011b8f 2921vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
2922{
2923 a.insert(a.end(), b.begin(), b.end());
2924 return a;
2925}
2926
3ddb9247
PD
2927vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
2928{
2929 a.insert(a.end(), b.begin(), b.end());
2930 return a;
2931}
2932
92011b8f 2933
387b9ca6
RG
2934/*
2935 This function should only be called by the handler to gather metrics, wipe the cache,
788eeb4c
RG
2936 reload the Lua script (not the Lua config) or change the current trace regex,
2937 and by the SNMP thread to gather metrics. */
b4e76a18 2938template<class T> T broadcastAccFunction(const boost::function<T*()>& func)
3427fa8a 2939{
b243ca3b 2940 if (!isHandlerThread()) {
788eeb4c 2941 g_log<<Logger::Error<<"broadcastAccFunction has been called by a worker ("<<t_id<<")"<<endl;
d77abca1 2942 exit(1);
d77abca1
RG
2943 }
2944
b243ca3b 2945 unsigned int n = 0;
3427fa8a 2946 T ret=T();
b243ca3b
RG
2947 for (const auto& threadInfo : s_threadInfos) {
2948 if (n++ == t_id) {
2949 continue;
2950 }
2951
2952 const auto& tps = threadInfo.pipes;
00c9b8c1
BH
2953 ThreadMSG* tmsg = new ThreadMSG();
2954 tmsg->func = boost::bind(voider<T>, func);
2955 tmsg->wantAnswer = true;
3ddb9247 2956
b841314c
RG
2957 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2958 delete tmsg;
3427fa8a 2959 unixDie("write to thread pipe returned wrong size or error");
b841314c 2960 }
3ddb9247 2961
49467864 2962 T* resp = nullptr;
3427fa8a
BH
2963 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2964 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 2965
3427fa8a 2966 if(resp) {
3427fa8a
BH
2967 ret += *resp;
2968 delete resp;
49467864 2969 resp = nullptr;
3427fa8a
BH
2970 }
2971 }
2972 return ret;
2973}
2974
b4e76a18
RG
2975template string broadcastAccFunction(const boost::function<string*()>& fun); // explicit instantiation
2976template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun); // explicit instantiation
2977template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun); // explicit instantiation
2978template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun); // explicit instantiation
3427fa8a 2979
d187038c 2980static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
09e6702a
BH
2981{
2982 string remote;
2983 string msg=s_rcc.recv(&remote);
2984 RecursorControlParser rcp;
2985 RecursorControlParser::func_t* command;
3ddb9247 2986
09e6702a 2987 string answer=rcp.getAnswer(msg, &command);
f0f3f0b0
PL
2988
2989 // If we are inside a chroot, we need to strip
2990 if (!arg()["chroot"].empty()) {
a683e8bd 2991 size_t len = arg()["chroot"].length();
f0f3f0b0
PL
2992 remote = remote.substr(len);
2993 }
2994
ab5c053d
BH
2995 try {
2996 s_rcc.send(answer, &remote);
2997 command();
2998 }
fdbf35ac 2999 catch(std::exception& e) {
e6a9dde5 3000 g_log<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
ab5c053d 3001 }
3f81d239 3002 catch(PDNSException& ae) {
e6a9dde5 3003 g_log<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
ab5c053d 3004 }
09e6702a
BH
3005}
3006
d187038c 3007static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3008{
0b18b22e 3009 PacketID* pident=any_cast<PacketID>(&var);
667f7e60 3010 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
09e6702a 3011
667f7e60 3012 shared_array<char> buffer(new char[pident->inNeeded]);
09e6702a 3013
a683e8bd 3014 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
09e6702a 3015 if(ret > 0) {
667f7e60 3016 pident->inMSG.append(&buffer[0], &buffer[ret]);
a683e8bd 3017 pident->inNeeded-=(size_t)ret;
825fa717 3018 if(!pident->inNeeded || pident->inIncompleteOkay) {
667f7e60
BH
3019 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
3020 PacketID pid=*pident;
3021 string msg=pident->inMSG;
3ddb9247 3022
bb4bdbaf 3023 t_fdm->removeReadFD(fd);
3ddb9247 3024 MT->sendEvent(pid, &msg);
09e6702a
BH
3025 }
3026 else {
667f7e60 3027 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
09e6702a
BH
3028 }
3029 }
3030 else {
667f7e60 3031 PacketID tmp=*pident;
bb4bdbaf 3032 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
09e6702a
BH
3033 string empty;
3034 MT->sendEvent(tmp, &empty); // this conveys error status
3035 }
3036}
3037
d187038c 3038static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3039{
0b18b22e 3040 PacketID* pid=any_cast<PacketID>(&var);
a683e8bd 3041 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
09e6702a 3042 if(ret > 0) {
a683e8bd 3043 pid->outPos+=(ssize_t)ret;
667f7e60
BH
3044 if(pid->outPos==pid->outMSG.size()) {
3045 PacketID tmp=*pid;
bb4bdbaf 3046 t_fdm->removeWriteFD(fd);
09e6702a
BH
3047 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
3048 }
3049 }
3050 else { // error or EOF
667f7e60 3051 PacketID tmp(*pid);
bb4bdbaf 3052 t_fdm->removeWriteFD(fd);
09e6702a 3053 string sent;
998a4334 3054 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
09e6702a
BH
3055 }
3056}
3057
34801ab1 3058// resend event to everybody chained onto it
d187038c 3059static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
34801ab1
BH
3060{
3061 if(iter->key.chain.empty())
3062 return;
e27e91a8 3063 // cerr<<"doResends called!\n";
34801ab1
BH
3064 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
3065 resend.fd=-1;
3066 resend.id=*i;
e27e91a8 3067 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
4665c31e 3068
34801ab1
BH
3069 MT->sendEvent(resend, &content);
3070 g_stats.chainResends++;
34801ab1
BH
3071 }
3072}
3073
d187038c 3074static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3075{
600fc20b 3076 PacketID pid=any_cast<PacketID>(var);
a683e8bd 3077 ssize_t len;
fae8fe07
RG
3078 std::string packet;
3079 packet.resize(g_outgoingEDNSBufsize);
996c89cc 3080 ComboAddress fromaddr;
09e6702a
BH
3081 socklen_t addrlen=sizeof(fromaddr);
3082
fae8fe07 3083 len=recvfrom(fd, &packet.at(0), packet.size(), 0, (sockaddr *)&fromaddr, &addrlen);
c1da7976 3084
a683e8bd 3085 if(len < (ssize_t) sizeof(dnsheader)) {
998a4334 3086 if(len < 0)
996c89cc 3087 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
09e6702a 3088 else {
3ddb9247 3089 g_stats.serverParseError++;
09e6702a 3090 if(g_logCommonErrors)
e6a9dde5 3091 g_log<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
e44d9fa7 3092 ": packet smaller than DNS header"<<endl;
998a4334 3093 }
34801ab1 3094
49a699c4 3095 t_udpclientsocks->returnSocket(fd);
34801ab1
BH
3096 string empty;
3097
3098 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3ddb9247 3099 if(iter != MT->d_waiters.end())
34801ab1 3100 doResends(iter, pid, empty);
3ddb9247 3101
34801ab1 3102 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
998a4334 3103 return;
3ddb9247 3104 }
998a4334 3105
fae8fe07 3106 packet.resize(len);
998a4334 3107 dnsheader dh;
fae8fe07 3108 memcpy(&dh, &packet.at(0), sizeof(dh));
3ddb9247 3109
6da3b3ad
PD
3110 PacketID pident;
3111 pident.remote=fromaddr;
3112 pident.id=dh.id;
3113 pident.fd=fd;
34801ab1 3114
33a928af 3115 if(!dh.qr && g_logCommonErrors) {
e6a9dde5 3116 g_log<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
6da3b3ad
PD
3117 }
3118
3119 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
3120 !dh.qr) { // one weird server
3121 pident.domain.clear();
3122 pident.type = 0;
3123 }
3124 else {
3125 try {
0b31e67e 3126 if(len > 12)
fae8fe07 3127 pident.domain=DNSName(&packet.at(0), len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
6da3b3ad
PD
3128 }
3129 catch(std::exception& e) {
3130 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
e6a9dde5 3131 g_log<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
6da3b3ad 3132 return;
34801ab1 3133 }
6da3b3ad 3134 }
34801ab1 3135
6da3b3ad
PD
3136 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
3137 if(iter != MT->d_waiters.end()) {
3138 doResends(iter, pident, packet);
3139 }
c1da7976 3140
6da3b3ad 3141retryWithName:
4957a608 3142
6da3b3ad
PD
3143 if(!MT->sendEvent(pident, &packet)) {
3144 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
3145 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
3146 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
e325f20c 3147 pident.domain == mthread->key.domain) {
6da3b3ad 3148 mthread->key.nearMisses++;
998a4334 3149 }
6da3b3ad
PD
3150
3151 // be a bit paranoid here since we're weakening our matching
3ddb9247 3152 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
6da3b3ad
PD
3153 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
3154 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
3155 pident.domain = mthread->key.domain;
3156 pident.type = mthread->key.type;
3157 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
d4fb76e9 3158 }
09e6702a 3159 }
6da3b3ad
PD
3160 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
3161 if(g_logCommonErrors) {
e6a9dde5 3162 g_log<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
d8f6d49f 3163 }
09e6702a 3164 }
6da3b3ad
PD
3165 else if(fd >= 0) {
3166 t_udpclientsocks->returnSocket(fd);
3167 }
09e6702a
BH
3168}
3169
1f4abb20
BH
3170FDMultiplexer* getMultiplexer()
3171{
3172 FDMultiplexer* ret;
f26bf547 3173 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
1f4abb20 3174 try {
f26bf547 3175 ret=i.second();
1f4abb20
BH
3176 return ret;
3177 }
98d0ee4a 3178 catch(FDMultiplexerException &fe) {
e6a9dde5 3179 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
98d0ee4a
BH
3180 }
3181 catch(...) {
e6a9dde5 3182 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
98d0ee4a 3183 }
1f4abb20 3184 }
e6a9dde5 3185 g_log<<Logger::Error<<"No working multiplexer found!"<<endl;
1f4abb20
BH
3186 exit(1);
3187}
3188
3ddb9247 3189
d187038c 3190static string* doReloadLuaScript()
4485aa35 3191{
674cf0f6 3192 string fname= ::arg()["lua-dns-script"];
4485aa35 3193 try {
674cf0f6 3194 if(fname.empty()) {
f26bf547 3195 t_pdl.reset();
377602e3 3196 g_log<<Logger::Info<<t_id<<" Unloaded current lua script"<<endl;
0f39c1a3 3197 return new string("unloaded\n");
4485aa35
BH
3198 }
3199 else {
9694e14f
AT
3200 t_pdl = std::make_shared<RecursorLua4>();
3201 t_pdl->loadFile(fname);
4485aa35
BH
3202 }
3203 }
fdbf35ac 3204 catch(std::exception& e) {
e6a9dde5 3205 g_log<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
0f39c1a3 3206 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
4485aa35 3207 }
3ddb9247 3208
e6a9dde5 3209 g_log<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
0f39c1a3 3210 return new string("(re)loaded '"+fname+"'\n");
4485aa35
BH
3211}
3212
49a699c4
BH
3213string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3214{
3ddb9247 3215 if(begin != end)
49a699c4 3216 ::arg().set("lua-dns-script") = *begin;
3ddb9247 3217
0f39c1a3 3218 return broadcastAccFunction<string>(doReloadLuaScript);
3ddb9247 3219}
49a699c4 3220
d187038c 3221static string* pleaseUseNewTraceRegex(const std::string& newRegex)
77499b05
BH
3222try
3223{
3224 if(newRegex.empty()) {
f26bf547 3225 t_traceRegex.reset();
77499b05
BH
3226 return new string("unset\n");
3227 }
3228 else {
f26bf547 3229 t_traceRegex = std::make_shared<Regex>(newRegex);
77499b05
BH
3230 return new string("ok\n");
3231 }
3232}
3f81d239 3233catch(PDNSException& ae)
77499b05
BH
3234{
3235 return new string(ae.reason+"\n");
3236}
3237
3238string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3239{
3240 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
3241}
3242
4e9a20e6 3243static void checkLinuxIPv6Limits()
3244{
3245#ifdef __linux__
3246 string line;
3247 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
335da0ba 3248 int lim=std::stoi(line);
4e9a20e6 3249 if(lim < 16384) {
e6a9dde5 3250 g_log<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
4e9a20e6 3251 }
3252 }
3253#endif
3254}
36849ff2 3255static void checkOrFixFDS()
4e9a20e6 3256{
c0063e60 3257 unsigned int availFDs=getFilenumLimit();
3258 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
3259
3260 if(wantFDs > availFDs) {
067ad20e 3261 unsigned int hardlimit= getFilenumLimit(true);
3262 if(hardlimit >= wantFDs) {
c0063e60 3263 setFilenumLimit(wantFDs);
e6a9dde5 3264 g_log<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
36849ff2 3265 }
3266 else {
067ad20e 3267 int newval = (hardlimit - 25) / g_numWorkerThreads;
e6a9dde5 3268 g_log<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
36849ff2 3269 g_maxMThreads = newval;
067ad20e 3270 setFilenumLimit(hardlimit);
36849ff2 3271 }
3272 }
4e9a20e6 3273}
77499b05 3274
c390b2da 3275static void* recursorThread(unsigned int tid, const string& threadName);
51e2144e 3276
f26bf547 3277static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
49a699c4
BH
3278{
3279 t_allowFrom = ng;
f26bf547 3280 return nullptr;
49a699c4
BH
3281}
3282
dbd23fc2
BH
3283int g_argc;
3284char** g_argv;
3285
18af64a8 3286void parseACLs()
f7c1d4e3 3287{
18af64a8 3288 static bool l_initialized;
3ddb9247 3289
49a699c4 3290 if(l_initialized) { // only reload configuration file on second call
18af64a8 3291 string configname=::arg()["config-dir"]+"/recursor.conf";
3e63da83
JR
3292 if(::arg()["config-name"]!="") {
3293 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3294 }
18af64a8 3295 cleanSlashes(configname);
3ddb9247
PD
3296
3297 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
7e818521 3298 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
49a699c4 3299 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
242b90e1 3300 ::arg().preParseFile(configname.c_str(), "include-dir");
829849d6
AT
3301 ::arg().preParse(g_argc, g_argv, "include-dir");
3302
3303 // then process includes
3304 std::vector<std::string> extraConfigs;
242b90e1
AT
3305 ::arg().gatherIncludes(extraConfigs);
3306
1dc8f4d0 3307 for(const std::string& fn : extraConfigs) {
7e818521 3308 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
3309 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
3310 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
3311 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
829849d6 3312 }
ca2c884c
AT
3313
3314 ::arg().preParse(g_argc, g_argv, "allow-from-file");
3315 ::arg().preParse(g_argc, g_argv, "allow-from");
f27e6356 3316 }
49a699c4 3317
f26bf547
RG
3318 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
3319 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
3ddb9247 3320
2c95fc65
BH
3321 if(!::arg()["allow-from-file"].empty()) {
3322 string line;
2c95fc65
BH
3323 ifstream ifs(::arg()["allow-from-file"].c_str());
3324 if(!ifs) {
9c61b9d0 3325 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2c95fc65
BH
3326 }
3327
3328 string::size_type pos;
3329 while(getline(ifs,line)) {
3330 pos=line.find('#');
3331 if(pos!=string::npos)
3332 line.resize(pos);
3333 trim(line);
3334 if(line.empty())
3335 continue;
3336
18af64a8 3337 allowFrom->addMask(line);
2c95fc65 3338 }
e6a9dde5 3339 g_log<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2c95fc65
BH
3340 }
3341 else if(!::arg()["allow-from"].empty()) {
f7c1d4e3
BH
3342 vector<string> ips;
3343 stringtok(ips, ::arg()["allow-from"], ", ");
3ddb9247 3344
e6a9dde5 3345 g_log<<Logger::Warning<<"Only allowing queries from: ";
f7c1d4e3 3346 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
18af64a8 3347 allowFrom->addMask(*i);
f7c1d4e3 3348 if(i!=ips.begin())
e6a9dde5
PL
3349 g_log<<Logger::Warning<<", ";
3350 g_log<<Logger::Warning<<*i;
f7c1d4e3 3351 }
e6a9dde5 3352 g_log<<Logger::Warning<<endl;
f7c1d4e3 3353 }
49a699c4 3354 else {
3ddb9247 3355 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
377602e3 3356 g_log<<Logger::Warning<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
f26bf547 3357 allowFrom = nullptr;
49a699c4 3358 }
3ddb9247 3359
49a699c4 3360 g_initialAllowFrom = allowFrom;
d7dae798 3361 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
f26bf547 3362 oldAllowFrom = nullptr;
3ddb9247 3363
49a699c4 3364 l_initialized = true;
18af64a8
BH
3365}
3366
795215f2 3367
756e82cf 3368static void setupDelegationOnly()
3369{
3370 vector<string> parts;
3371 stringtok(parts, ::arg()["delegation-only"], ", \t");
3372 for(const auto& p : parts) {
9065eb05 3373 SyncRes::addDelegationOnly(DNSName(p));
756e82cf 3374 }
3375}
795215f2 3376
8fd25133
RG
3377static std::map<unsigned int, std::set<int> > parseCPUMap()
3378{
3379 std::map<unsigned int, std::set<int> > result;
3380
3381 const std::string value = ::arg()["cpu-map"];
3382
3383 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
e6a9dde5 3384 g_log<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
8fd25133
RG
3385 return result;
3386 }
3387
3388 std::vector<std::string> parts;
3389
3390 stringtok(parts, value, " \t");
3391
3392 for(const auto& part : parts) {
3393 if (part.find('=') == string::npos)
3394 continue;
3395
3396 try {
3397 auto headers = splitField(part, '=');
3398 trim(headers.first);
3399 trim(headers.second);
3400
3401 unsigned int threadId = pdns_stou(headers.first);
3402 std::vector<std::string> cpus;
3403
3404 stringtok(cpus, headers.second, ",");
3405
3406 for(const auto& cpu : cpus) {
3407 int cpuId = std::stoi(cpu);
3408
3409 result[threadId].insert(cpuId);
3410 }
3411 }
3412 catch(const std::exception& e) {
e6a9dde5 3413 g_log<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
8fd25133
RG
3414 }
3415 }
3416
3417 return result;
3418}
3419
3420static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
3421{
3422 const auto& cpuMapping = cpusMap.find(n);
3423 if (cpuMapping != cpusMap.cend()) {
3424 int rc = mapThreadToCPUList(tid, cpuMapping->second);
3425 if (rc == 0) {
e6a9dde5 3426 g_log<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
8fd25133 3427 for (const auto cpu : cpuMapping->second) {
e6a9dde5 3428 g_log<<Logger::Info<<" "<<cpu;
8fd25133 3429 }
e6a9dde5 3430 g_log<<Logger::Info<<endl;
8fd25133
RG
3431 }
3432 else {
e6a9dde5 3433 g_log<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
8fd25133 3434 for (const auto cpu : cpuMapping->second) {
e6a9dde5 3435 g_log<<Logger::Info<<" "<<cpu;
8fd25133 3436 }
e6a9dde5 3437 g_log<<Logger::Info<<strerror(rc)<<endl;
8fd25133
RG
3438 }
3439 }
3440}
3441
af1377b7
NC
3442#ifdef NOD_ENABLED
3443static void setupNODThread()
3444{
3445 if (g_nodEnabled) {
b78727c6
NC
3446 uint32_t num_cells = ::arg().asNum("new-domain-db-size");
3447 t_nodDBp = std::make_shared<nod::NODDB>(num_cells);
af1377b7
NC
3448 try {
3449 t_nodDBp->setCacheDir(::arg()["new-domain-history-dir"]);
3450 }
3451 catch (const PDNSException& e) {
3452 g_log<<Logger::Error<<"new-domain-history-dir (" << ::arg()["new-domain-history-dir"] << ") is not readable or does not exist"<<endl;
3453 _exit(1);
3454 }
3455 if (!t_nodDBp->init()) {
3456 g_log<<Logger::Error<<"Could not initialize domain tracking"<<endl;
3457 _exit(1);
3458 }
41c542ec 3459 std::thread t(nod::NODDB::startHousekeepingThread, t_nodDBp, std::this_thread::get_id());
af1377b7 3460 t.detach();
ca2526f5 3461 g_nod_pbtag = ::arg()["new-domain-pb-tag"];
41c542ec
NC
3462 }
3463 if (g_udrEnabled) {
b78727c6
NC
3464 uint32_t num_cells = ::arg().asNum("unique-response-db-size");
3465 t_udrDBp = std::make_shared<nod::UniqueResponseDB>(num_cells);
41c542ec
NC
3466 try {
3467 t_udrDBp->setCacheDir(::arg()["unique-response-history-dir"]);
3468 }
3469 catch (const PDNSException& e) {
3470 g_log<<Logger::Error<<"unique-response-history-dir (" << ::arg()["unique-response-history-dir"] << ") is not readable or does not exist"<<endl;
3471 _exit(1);
3472 }
3473 if (!t_udrDBp->init()) {
3474 g_log<<Logger::Error<<"Could not initialize unique response tracking"<<endl;
3475 _exit(1);
3476 }
3477 std::thread t(nod::UniqueResponseDB::startHousekeepingThread, t_udrDBp, std::this_thread::get_id());
af1377b7 3478 t.detach();
ca2526f5 3479 g_udr_pbtag = ::arg()["unique-response-pb-tag"];
af1377b7
NC
3480 }
3481}
3482
3483void parseNODWhitelist(const std::string& wlist)
3484{
3485 vector<string> parts;
3486 stringtok(parts, wlist, ",; ");
3487 for(const auto& a : parts) {
3488 g_nodDomainWL.add(DNSName(a));
3489 }
3490}
3491
3492static void setupNODGlobal()
3493{
3494 // Setup NOD subsystem
3495 g_nodEnabled = ::arg().mustDo("new-domain-tracking");
3496 g_nodLookupDomain = DNSName(::arg()["new-domain-lookup"]);
3497 g_nodLog = ::arg().mustDo("new-domain-log");
3498 parseNODWhitelist(::arg()["new-domain-whitelist"]);
41c542ec
NC
3499
3500 // Setup Unique DNS Response subsystem
3501 g_udrEnabled = ::arg().mustDo("unique-response-tracking");
3502 g_udrLog = ::arg().mustDo("unique-response-log");
af1377b7
NC
3503}
3504#endif /* NOD_ENABLED */
3505
d187038c 3506static int serviceMain(int argc, char*argv[])
18af64a8 3507{
e6a9dde5
PL
3508 g_log.setName(s_programname);
3509 g_log.disableSyslog(::arg().mustDo("disable-syslog"));
3510 g_log.setTimestamps(::arg().mustDo("log-timestamp"));
18af64a8
BH
3511
3512 if(!::arg()["logging-facility"].empty()) {
f8499e52
BH
3513 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
3514 if(val >= 0)
e6a9dde5 3515 g_log.setFacility(val);
18af64a8 3516 else
e6a9dde5 3517 g_log<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
18af64a8
BH
3518 }
3519
ba1a571d 3520 showProductVersion();
3afde9b2 3521
06ea9015 3522 g_disthashseed=dns_random(0xffffffff);
3523
b7ef5828
PL
3524 checkLinuxIPv6Limits();
3525 try {
3526 vector<string> addrs;
3527 if(!::arg()["query-local-address6"].empty()) {
3528 SyncRes::s_doIPv6=true;
e6a9dde5 3529 g_log<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
b7ef5828
PL
3530
3531 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
3532 for(const string& addr : addrs) {
3533 g_localQueryAddresses6.push_back(ComboAddress(addr));
3534 }
3535 }
3536 else {
e6a9dde5 3537 g_log<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
b7ef5828
PL
3538 }
3539 addrs.clear();
3540 stringtok(addrs, ::arg()["query-local-address"], ", ;");
3541 for(const string& addr : addrs) {
3542 g_localQueryAddresses4.push_back(ComboAddress(addr));
3543 }
3544 }
3545 catch(std::exception& e) {
e6a9dde5 3546 g_log<<Logger::Error<<"Assigning local query addresses: "<<e.what();
b7ef5828
PL
3547 exit(99);
3548 }
3549
e48c6b8a
PL
3550 // keep this ABOVE loadRecursorLuaConfig!
3551 if(::arg()["dnssec"]=="off")
3552 g_dnssecmode=DNSSECMode::Off;
3553 else if(::arg()["dnssec"]=="process-no-validate")
3554 g_dnssecmode=DNSSECMode::ProcessNoValidate;
3555 else if(::arg()["dnssec"]=="process")
3556 g_dnssecmode=DNSSECMode::Process;
3557 else if(::arg()["dnssec"]=="validate")
3558 g_dnssecmode=DNSSECMode::ValidateAll;
3559 else if(::arg()["dnssec"]=="log-fail")
3560 g_dnssecmode=DNSSECMode::ValidateForLog;
3561 else {
e6a9dde5 3562 g_log<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
e48c6b8a
PL
3563 exit(1);
3564 }
3565
9a3ab3e4
KM
3566 g_signatureInceptionSkew = ::arg().asNum("signature-inception-skew");
3567 if (g_signatureInceptionSkew < 0) {
3568 g_log<<Logger::Error<<"A negative value for 'signature-inception-skew' is not allowed"<<endl;
3569 exit(1);
3570 }
3571
e48c6b8a 3572 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
d377bb54 3573 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
e48c6b8a 3574
a6f7f5fe 3575 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
3576 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
e6ec15bf
RG
3577
3578 luaConfigDelayedThreads delayedLuaThreads;
0f5785a6 3579 try {
e6ec15bf 3580 loadRecursorLuaConfig(::arg()["lua-config-file"], delayedLuaThreads);
0f5785a6
PL
3581 }
3582 catch (PDNSException &e) {
e6a9dde5 3583 g_log<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
0f5785a6
PL
3584 exit(1);
3585 }
ad42489c 3586
18af64a8 3587 parseACLs();
d6f3fcfa 3588 initPublicSuffixList(::arg()["public-suffix-list-file"]);
92011b8f 3589
eb5bae86 3590 if(!::arg()["dont-query"].empty()) {
eb5bae86
BH
3591 vector<string> ips;
3592 stringtok(ips, ::arg()["dont-query"], ", ");
66e0b6ea
BH
3593 ips.push_back("0.0.0.0");
3594 ips.push_back("::");
c36bc97a 3595
e6a9dde5 3596 g_log<<Logger::Warning<<"Will not send queries to: ";
eb5bae86 3597 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
9065eb05 3598 SyncRes::addDontQuery(*i);
eb5bae86 3599 if(i!=ips.begin())
e6a9dde5
PL
3600 g_log<<Logger::Warning<<", ";
3601 g_log<<Logger::Warning<<*i;
eb5bae86 3602 }
e6a9dde5 3603 g_log<<Logger::Warning<<endl;
eb5bae86
BH
3604 }
3605
f7c1d4e3 3606 g_quiet=::arg().mustDo("quiet");
3ddb9247 3607
b243ca3b 3608 /* this needs to be done before parseACLs(), which call broadcastFunction() */
1bc3c142
BH
3609 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
3610 if(g_weDistributeQueries) {
b243ca3b 3611 g_log<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
1bc3c142 3612 }
3ddb9247 3613
756e82cf 3614 setupDelegationOnly();
b33c2462 3615 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
756e82cf 3616
77499b05
BH
3617 if(::arg()["trace"]=="fail") {
3618 SyncRes::setDefaultLogMode(SyncRes::Store);
3619 }
3620 else if(::arg().mustDo("trace")) {
3621 SyncRes::setDefaultLogMode(SyncRes::Log);
f7c1d4e3
BH
3622 ::arg().set("quiet")="no";
3623 g_quiet=false;
3e9c6c0a 3624 g_dnssecLOG=true;
f7c1d4e3 3625 }
43a9b290
PL
3626 string myHostname = getHostname();
3627 if (myHostname == "UNKNOWN"){
3628 g_log<<Logger::Warning<<"Unable to get the hostname, NSID and id.server values will be empty"<<endl;
3629 myHostname = "";
d0983bff 3630 }
3ddb9247 3631
aadceba8 3632 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
3633
1051f8a9
BH
3634 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
3635
f7c1d4e3 3636 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
63637fd8 3637 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
1051f8a9 3638 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
79ec0627
PL
3639 // Cap the packetcache-servfail-ttl to the packetcache-ttl
3640 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
3641 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
628e2c7b
PA
3642 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
3643 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
f7c1d4e3 3644 SyncRes::s_serverID=::arg()["server-id"];
173d790e 3645 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
9de3e034 3646 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
7c3398aa 3647 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
01402d56 3648 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
f7c1d4e3 3649 if(SyncRes::s_serverID.empty()) {
d0983bff 3650 SyncRes::s_serverID = myHostname;
f7c1d4e3 3651 }
3ddb9247 3652
e9f9b8ec
RG
3653 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
3654 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
3655
8a3a3822
RG
3656 if (!::arg().isEmpty("ecs-scope-zero-address")) {
3657 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
3658 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
3659 }
3660 else {
3661 bool found = false;
3662 for (const auto& addr : g_localQueryAddresses4) {
3663 if (!IsAnyAddress(addr)) {
3664 SyncRes::setECSScopeZeroAddress(Netmask(addr, 32));
3665 found = true;
3666 break;
3667 }
3668 }
3669 if (!found) {
3670 for (const auto& addr : g_localQueryAddresses6) {
3671 if (!IsAnyAddress(addr)) {
3672 SyncRes::setECSScopeZeroAddress(Netmask(addr, 128));
3673 found = true;
3674 break;
3675 }
3676 }
3677 if (!found) {
3678 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
3679 }
3680 }
3681 }
3682
2fe3354d
CH
3683 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
3684 SyncRes::parseEDNSSubnetAddFor(::arg()["ecs-add-for"]);
3685 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
3686
5cc8371b 3687 g_XPFAcl.toMasks(::arg()["xpf-allow-from"]);
59cb4a79 3688 g_xpfRRCode = ::arg().asNum("xpf-rr-code");
5cc8371b 3689
5b0ddd18 3690 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
bb4bdbaf 3691
49a699c4 3692 g_initialDomainMap = parseAuthAndForwards();
3ddb9247 3693
08f3f638 3694 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3ddb9247 3695
f7c1d4e3 3696 g_logCommonErrors=::arg().mustDo("log-common-errors");
98d36505 3697 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
e661a20b
PD
3698
3699 g_anyToTcp = ::arg().mustDo("any-to-tcp");
a09a8ce0
PD
3700 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
3701
b3adda56
PD
3702 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
3703
b243ca3b 3704 g_numDistributorThreads = ::arg().asNum("distributor-threads");
810ff705 3705 g_numWorkerThreads = ::arg().asNum("threads");
1c4f2e1b 3706 if (g_numWorkerThreads < 1) {
e6a9dde5 3707 g_log<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
1c4f2e1b
RG
3708 g_numWorkerThreads = 1;
3709 }
3710
b243ca3b 3711 g_numThreads = g_numDistributorThreads + g_numWorkerThreads;
810ff705
RG
3712 g_maxMThreads = ::arg().asNum("max-mthreads");
3713
00b8cadc
RG
3714 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
3715
0ec489bf 3716 g_statisticsInterval = ::arg().asNum("statistics-interval");
3717
810ff705
RG
3718#ifdef SO_REUSEPORT
3719 g_reusePort = ::arg().mustDo("reuseport");
3720#endif
3721
b243ca3b 3722 s_threadInfos.resize(g_numDistributorThreads + g_numWorkerThreads + /* handler */ 1);
810ff705 3723
b243ca3b
RG
3724 if (g_reusePort) {
3725 if (g_weDistributeQueries) {
3726 /* first thread is the handler, then distributors */
3727 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3728 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
adb6cd72 3729 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
b243ca3b 3730 makeUDPServerSockets(deferredAdds);
adb6cd72 3731 makeTCPServerSockets(deferredAdds, tcpSockets);
b243ca3b
RG
3732 }
3733 }
3734 else {
3735 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3736 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3737 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
adb6cd72 3738 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
b243ca3b 3739 makeUDPServerSockets(deferredAdds);
adb6cd72 3740 makeTCPServerSockets(deferredAdds, tcpSockets);
b243ca3b 3741 }
810ff705
RG
3742 }
3743 }
3744 else {
c47f201b 3745 std::set<int> tcpSockets;
b243ca3b
RG
3746 /* we don't have reuseport so we can only open one socket per
3747 listening addr:port and everyone will listen on it */
3748 makeUDPServerSockets(g_deferredAdds);
c47f201b
RG
3749 makeTCPServerSockets(g_deferredAdds, tcpSockets);
3750
3751 /* every listener (so distributor if g_weDistributeQueries, workers otherwise)
3752 needs to listen to the shared sockets */
3753 if (g_weDistributeQueries) {
3754 /* first thread is the handler, then distributors */
3755 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3756 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3757 }
3758 }
3759 else {
3760 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3761 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3762 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3763 }
3764 }
810ff705 3765 }
815099b2 3766
af1377b7
NC
3767#ifdef NOD_ENABLED
3768 // Setup newly observed domain globals
3769 setupNODGlobal();
3770#endif /* NOD_ENABLED */
3771
677e2a46
BH
3772 int forks;
3773 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
1bc3c142
BH
3774 if(!fork()) // we are child
3775 break;
3776 }
3ddb9247 3777
f7c1d4e3 3778 if(::arg().mustDo("daemon")) {
e6a9dde5
PL
3779 g_log<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
3780 g_log.toConsole(Logger::Critical);
f7c1d4e3
BH
3781 daemonize();
3782 }
3783 signal(SIGUSR1,usr1Handler);
3784 signal(SIGUSR2,usr2Handler);
3785 signal(SIGPIPE,SIG_IGN);
810ff705 3786
a6414fdc 3787 checkOrFixFDS();
3ddb9247 3788
d1b28475
KM
3789#ifdef HAVE_LIBSODIUM
3790 if (sodium_init() == -1) {
e6a9dde5 3791 g_log<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
d1b28475
KM
3792 exit(99);
3793 }
3794#endif
3795
3afde9b2
PL
3796 openssl_thread_setup();
3797 openssl_seed();
e97cb679
AT
3798 /* setup rng before chroot */
3799 dns_random_init();
3afde9b2 3800
bdbb07e0 3801 if(::arg()["server-id"].empty()) {
d0983bff 3802 ::arg().set("server-id") = myHostname;
bdbb07e0
PL
3803 }
3804
138435cb
BH
3805 int newgid=0;
3806 if(!::arg()["setgid"].empty())
3807 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
3808 int newuid=0;
3809 if(!::arg()["setuid"].empty())
3810 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
3811
f1d6a7ce
KM
3812 Utility::dropGroupPrivs(newuid, newgid);
3813
138435cb 3814 if (!::arg()["chroot"].empty()) {
75336810
PL
3815#ifdef HAVE_SYSTEMD
3816 char *ns;
3817 ns = getenv("NOTIFY_SOCKET");
3818 if (ns != nullptr) {
e6a9dde5 3819 g_log<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
75336810
PL
3820 exit(1);
3821 }
3822#endif
138435cb 3823 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
e6a9dde5 3824 g_log<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
138435cb
BH
3825 exit(1);
3826 }
f0f3f0b0 3827 else
377602e3 3828 g_log<<Logger::Info<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
138435cb
BH
3829 }
3830
f0f3f0b0
PL
3831 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
3832 if(!s_pidfname.empty())
3833 unlink(s_pidfname.c_str()); // remove possible old pid file
3834 writePid();
3835
3836 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
3837
f1d6a7ce 3838 Utility::dropUserPrivs(newuid);
1f2b341e
RG
3839 try {
3840 /* we might still have capabilities remaining, for example if we have been started as root
3841 without --setuid (please don't do that) or as an unprivileged user with ambient capabilities
3842 like CAP_NET_BIND_SERVICE.
3843 */
3844 dropCapabilities();
3845 }
3846 catch(const std::exception& e) {
3847 g_log<<Logger::Warning<<e.what()<<endl;
3848 }
c0063e60 3849
e6ec15bf
RG
3850 startLuaConfigDelayedThreads(delayedLuaThreads, g_luaconfs.getCopy().generation);
3851
49a699c4 3852 makeThreadPipes();
3ddb9247 3853
5d4dd7fe
BH
3854 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
3855 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
fde296a3 3856 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
a5886e6a 3857 s_maxUDPQueriesPerRound=::arg().asNum("max-udp-queries-per-round");
343257a4 3858
d705aad9
RG
3859 if (::arg().mustDo("snmp-agent")) {
3860 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
3861 g_snmpAgent->run();
3862 }
3863
b47026fd 3864 int port = ::arg().asNum("udp-source-port-min");
58da9034 3865 if(port < 1024 || port > 65535){
e6a9dde5 3866 g_log<<Logger::Error<<"Unable to launch, udp-source-port-min is not a valid port number"<<endl;
bf6f28ca
CHB
3867 exit(99); // this isn't going to fix itself either
3868 }
3869 s_minUdpSourcePort = port;
b47026fd 3870 port = ::arg().asNum("udp-source-port-max");
58da9034 3871 if(port < 1024 || port > 65535 || port < s_minUdpSourcePort){
e6a9dde5 3872 g_log<<Logger::Error<<"Unable to launch, udp-source-port-max is not a valid port number or is smaller than udp-source-port-min"<<endl;
bf6f28ca
CHB
3873 exit(99); // this isn't going to fix itself either
3874 }
3875 s_maxUdpSourcePort = port;
3876 std::vector<string> parts {};
b47026fd 3877 stringtok(parts, ::arg()["udp-source-port-avoid"], ", ");
bf6f28ca
CHB
3878 for (const auto &part : parts)
3879 {
3880 port = std::stoi(part);
58da9034 3881 if(port < 1024 || port > 65535){
e6a9dde5 3882 g_log<<Logger::Error<<"Unable to launch, udp-source-port-avoid contains an invalid port number: "<<part<<endl;
bf6f28ca
CHB
3883 exit(99); // this isn't going to fix itself either
3884 }
3885 s_avoidUdpSourcePorts.insert(port);
3886 }
3887
b243ca3b 3888 unsigned int currentThreadId = 1;
8fd25133 3889 const auto cpusMap = parseCPUMap();
d77abca1 3890
c3828c03 3891 if(g_numThreads == 1) {
e6a9dde5 3892 g_log<<Logger::Warning<<"Operating unthreaded"<<endl;
6b6720de
PL
3893#ifdef HAVE_SYSTEMD
3894 sd_notify(0, "READY=1");
3895#endif
b243ca3b
RG
3896
3897 /* This thread handles the web server, carbon, statistics and the control channel */
3898 auto& handlerInfos = s_threadInfos.at(0);
3899 handlerInfos.isHandler = true;
c390b2da 3900 handlerInfos.thread = std::thread(recursorThread, 0, "main");
b243ca3b
RG
3901
3902 setCPUMap(cpusMap, currentThreadId, pthread_self());
3903
3904 auto& infos = s_threadInfos.at(currentThreadId);
3905 infos.isListener = true;
3906 infos.isWorker = true;
c390b2da 3907 recursorThread(currentThreadId++, "worker");
76698c6e
BH
3908 }
3909 else {
8fd25133 3910
b243ca3b
RG
3911 if (g_weDistributeQueries) {
3912 g_log<<Logger::Warning<<"Launching "<< g_numDistributorThreads <<" distributor threads"<<endl;
3913 for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
3914 auto& infos = s_threadInfos.at(currentThreadId);
3915 infos.isListener = true;
c390b2da 3916 infos.thread = std::thread(recursorThread, currentThreadId++, "distr");
b243ca3b
RG
3917
3918 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
3919 }
3920 }
8fd25133 3921
62b549e0
RG
3922 g_log<<Logger::Warning<<"Launching "<< g_numWorkerThreads <<" worker threads"<<endl;
3923
b243ca3b
RG
3924 for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
3925 auto& infos = s_threadInfos.at(currentThreadId);
3926 infos.isListener = g_weDistributeQueries ? false : true;
3927 infos.isWorker = true;
c390b2da 3928 infos.thread = std::thread(recursorThread, currentThreadId++, "worker");
b243ca3b
RG
3929
3930 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
76698c6e 3931 }
b243ca3b 3932
6b6720de
PL
3933#ifdef HAVE_SYSTEMD
3934 sd_notify(0, "READY=1");
3935#endif
b243ca3b
RG
3936
3937 /* This thread handles the web server, carbon, statistics and the control channel */
3938 auto& infos = s_threadInfos.at(0);
3939 infos.isHandler = true;
c390b2da 3940 infos.thread = std::thread(recursorThread, 0, "web+stat");
b243ca3b
RG
3941
3942 s_threadInfos.at(0).thread.join();
bb4bdbaf 3943 }
bb4bdbaf
BH
3944 return 0;
3945}
3946
c390b2da 3947static void* recursorThread(unsigned int n, const string& threadName)
bb4bdbaf
BH
3948try
3949{
d77abca1 3950 t_id=n;
b243ca3b 3951 auto& threadInfo = s_threadInfos.at(t_id);
c390b2da
PL
3952
3953 static string threadPrefix = "pdns-r/";
519f5484 3954 setThreadName(threadPrefix + threadName);
c390b2da 3955
49a699c4 3956 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
a712cb56 3957 SyncRes::setDomainMap(g_initialDomainMap);
49a699c4 3958 t_allowFrom = g_initialAllowFrom;
f26bf547
RG
3959 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
3960 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
49a699c4 3961 primeHints();
3ddb9247 3962
f26bf547 3963 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
3ddb9247 3964
e6a9dde5 3965 g_log<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3ddb9247 3966
af1377b7 3967#ifdef NOD_ENABLED
41c542ec
NC
3968 if (threadInfo.isWorker)
3969 setupNODThread();
af1377b7 3970#endif /* NOD_ENABLED */
c1751a59
RG
3971
3972 /* the listener threads handle TCP queries */
3973 if(threadInfo.isWorker || threadInfo.isListener) {
5b388d28
PD
3974 try {
3975 if(!::arg()["lua-dns-script"].empty()) {
3976 t_pdl = std::make_shared<RecursorLua4>();
3977 t_pdl->loadFile(::arg()["lua-dns-script"]);
3978 g_log<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
3979 }
3980 }
3981 catch(std::exception &e) {
3982 g_log<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
3983 _exit(99);
674cf0f6 3984 }
674cf0f6 3985 }
3ddb9247 3986
f8f243b0 3987 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
92011b8f 3988 if(ringsize) {
f26bf547 3989 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
b243ca3b
RG
3990 if(g_weDistributeQueries)
3991 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries") / g_numDistributorThreads);
f8f243b0 3992 else
3ddb9247 3993 t_remotes->set_capacity(ringsize);
f26bf547 3994 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 3995 t_servfailremotes->set_capacity(ringsize);
66f2e6ad
KM
3996 t_bogusremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3997 t_bogusremotes->set_capacity(ringsize);
f26bf547 3998 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 3999 t_largeanswerremotes->set_capacity(ringsize);
621ccf89 4000 t_timeouts = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4001 t_timeouts->set_capacity(ringsize);
92011b8f 4002
f26bf547 4003 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 4004 t_queryring->set_capacity(ringsize);
f26bf547 4005 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 4006 t_servfailqueryring->set_capacity(ringsize);
66f2e6ad
KM
4007 t_bogusqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4008 t_bogusqueryring->set_capacity(ringsize);
92011b8f 4009 }
3ddb9247 4010
f26bf547 4011 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
3ddb9247 4012
63341e8d
RG
4013#ifdef HAVE_PROTOBUF
4014 /* start protobuf export threads if needed */
4015 auto luaconfsLocal = g_luaconfs.getLocal();
4016 checkProtobufExport(luaconfsLocal);
4017 checkOutgoingProtobufExport(luaconfsLocal);
4018#endif /* HAVE_PROTOBUF */
4019
bb4bdbaf
BH
4020 PacketID pident;
4021
4022 t_fdm=getMultiplexer();
d77abca1 4023
b243ca3b 4024 if(threadInfo.isHandler) {
d07bf7ff 4025 if(::arg().mustDo("webserver")) {
e6a9dde5 4026 g_log<<Logger::Warning << "Enabling web server" << endl;
8989097d 4027 try {
1ce57618 4028 new RecursorWebServer(t_fdm);
8989097d
CH
4029 }
4030 catch(PDNSException &e) {
e6a9dde5 4031 g_log<<Logger::Error<<"Exception: "<<e.reason<<endl;
8989097d
CH
4032 exit(99);
4033 }
f3d1d67b 4034 }
377602e3 4035 g_log<<Logger::Info<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
f3d1d67b 4036 }
810ff705 4037 else {
d77abca1 4038
b243ca3b
RG
4039 t_fdm->addReadFD(threadInfo.pipes.readToThread, handlePipeRequest);
4040 t_fdm->addReadFD(threadInfo.pipes.readQueriesToThread, handlePipeRequest);
4041
4042 if (threadInfo.isListener) {
4043 if (g_reusePort) {
4044 /* then every listener has its own FDs */
4045 for(const auto deferred : threadInfo.deferredAdds) {
4046 t_fdm->addReadFD(deferred.first, deferred.second);
4047 }
810ff705 4048 }
b243ca3b
RG
4049 else {
4050 /* otherwise all listeners are listening on the same ones */
4051 for(const auto deferred : g_deferredAdds) {
4052 t_fdm->addReadFD(deferred.first, deferred.second);
d77abca1
RG
4053 }
4054 }
4055 }
810ff705 4056 }
3ddb9247 4057
b0b37121 4058 registerAllStats();
d77abca1 4059
b243ca3b 4060 if(threadInfo.isHandler) {
674cf0f6
BH
4061 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
4062 }
1bc3c142 4063
f7c1d4e3 4064 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3ddb9247 4065
f7c1d4e3 4066 bool listenOnTCP(true);
49a699c4 4067
cb1523d1 4068 time_t last_stat = 0;
a2f87dd1 4069 time_t last_carbon=0, last_lua_maintenance=0;
2c78bd57 4070 time_t carbonInterval=::arg().asNum("carbon-interval");
a2f87dd1 4071 time_t luaMaintenanceInterval=::arg().asNum("lua-maintenance-interval");
ac0995bb 4072 counter.store(0); // used to periodically execute certain tasks
f7c1d4e3 4073 for(;;) {
ac0e821b 4074 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3ddb9247 4075
3427fa8a
BH
4076 if(!(counter%500)) {
4077 MT->makeThread(houseKeeping, 0);
f7c1d4e3
BH
4078 }
4079
d2392145 4080 if(!(counter%55)) {
d8f6d49f 4081 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
bb4bdbaf 4082 expired_t expired=t_fdm->getTimeouts(g_now);
3ddb9247 4083
f7c1d4e3 4084 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
cd989c87 4085 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4957a608 4086 if(g_logCommonErrors)
e6a9dde5 4087 g_log<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4957a608 4088 t_fdm->removeReadFD(i->first);
f7c1d4e3
BH
4089 }
4090 }
3ddb9247 4091
f7c1d4e3
BH
4092 counter++;
4093
b243ca3b 4094 if(threadInfo.isHandler) {
cb1523d1
RG
4095 if(statsWanted || (g_statisticsInterval > 0 && (g_now.tv_sec - last_stat) >= g_statisticsInterval)) {
4096 doStats();
4097 last_stat = g_now.tv_sec;
4098 }
f7c1d4e3 4099
cb1523d1 4100 Utility::gettimeofday(&g_now, 0);
2c78bd57 4101
cb1523d1
RG
4102 if((g_now.tv_sec - last_carbon) >= carbonInterval) {
4103 MT->makeThread(doCarbonDump, 0);
4104 last_carbon = g_now.tv_sec;
4105 }
2c78bd57 4106 }
2a0276a9 4107 if (t_pdl != nullptr) {
9adbe790 4108 // lua-dns-script directive is present, call the maintenance callback if needed
c1751a59
RG
4109 /* remember that the listener threads handle TCP queries */
4110 if (threadInfo.isWorker || threadInfo.isListener) {
2a0276a9
CHB
4111 // Only on threads processing queries
4112 if(g_now.tv_sec - last_lua_maintenance >= luaMaintenanceInterval) {
4113 t_pdl->maintenance();
4114 last_lua_maintenance = g_now.tv_sec;
4115 }
9adbe790 4116 }
a2f87dd1 4117 }
2c78bd57 4118
bb4bdbaf 4119 t_fdm->run(&g_now);
3ea54bf0 4120 // 'run' updates g_now for us
f7c1d4e3 4121
b243ca3b 4122 if(threadInfo.isListener) {
5c889cf5 4123 if(listenOnTCP) {
c47f201b
RG
4124 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
4125 for(const auto fd : threadInfo.tcpSockets) {
4126 t_fdm->removeReadFD(fd);
b243ca3b 4127 }
c47f201b
RG
4128 listenOnTCP=false;
4129 }
f7c1d4e3 4130 }
5c889cf5 4131 else {
c47f201b
RG
4132 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
4133 for(const auto fd : threadInfo.tcpSockets) {
4134 t_fdm->addReadFD(fd, handleNewTCPQuestion);
b243ca3b 4135 }
c47f201b
RG
4136 listenOnTCP=true;
4137 }
f7c1d4e3
BH
4138 }
4139 }
4140 }
4141}
3f81d239 4142catch(PDNSException &ae) {
e6a9dde5 4143 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
bb4bdbaf
BH
4144 return 0;
4145}
4146catch(std::exception &e) {
e6a9dde5 4147 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
bb4bdbaf
BH
4148 return 0;
4149}
4150catch(...) {
e6a9dde5 4151 g_log<<Logger::Error<<"any other exception in main: "<<endl;
bb4bdbaf
BH
4152 return 0;
4153}
4154
51e2144e 4155
3ddb9247 4156int main(int argc, char **argv)
288f4aa9 4157{
dbd23fc2
BH
4158 g_argc = argc;
4159 g_argv = argv;
5e3de507 4160 g_stats.startupTime=time(0);
3e135495 4161 versionSetProduct(ProductRecursor);
8a63d3ce 4162 reportBasicTypes();
0007c2e5 4163 reportOtherTypes();
ea634573 4164
22030c37 4165 int ret = EXIT_SUCCESS;
caa6eefa 4166
288f4aa9 4167 try {
f888311c 4168 ::arg().set("stack-size","stack size per mthread")="200000";
2e3d8a19 4169 ::arg().set("soa-minimum-ttl","Don't change")="0";
2e3d8a19 4170 ::arg().set("no-shuffle","Don't change")="off";
2e3d8a19 4171 ::arg().set("local-port","port to listen on")="53";
32252594 4172 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
fec7dd5a 4173 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
77499b05 4174 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
a6415142 4175 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
c87e1876 4176 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
13c46e62 4177 ::arg().set("signature-inception-skew", "Allow the signature inception to be off by this number of seconds")="60";
d3f809bf 4178 ::arg().set("daemon","Operate as a daemon")="no";
191f2e47 4179 ::arg().setSwitch("write-pid","Write a PID file")="yes";
9afa8662 4180 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
b6cfa948 4181 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
b18fa400 4182 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
22e0810c 4183 ::arg().set("log-common-errors","If we should log rather common errors")="no";
2e3d8a19
BH
4184 ::arg().set("chroot","switch to chroot jail")="";
4185 ::arg().set("setgid","If set, change group id to this gid for more security")="";
4186 ::arg().set("setuid","If set, change user id to this uid for more security")="";
c83ee49d 4187 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
bb4bdbaf 4188 ::arg().set("threads", "Launch this number of threads")="2";
b243ca3b 4189 ::arg().set("distributor-threads", "Launch this number of distributor threads, distributing queries to other threads")="0";
adabfcb9 4190 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
5124de27 4191 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
d07bf7ff 4192 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
479e0976 4193 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
479e0976 4194 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
d07bf7ff
PL
4195 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
4196 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
4197 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
be3e1477 4198 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
cc08b5a9 4199 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
e12f8407 4200 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
2c78bd57 4201 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
f7a645ec
RG
4202 ::arg().set("carbon-namespace", "If set overwrites the first part of the carbon string")="pdns";
4203 ::arg().set("carbon-instance", "If set overwrites the the instance name default")="recursor";
4204
0ec489bf 4205 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
c038218b 4206 ::arg().set("quiet","Suppress logging of questions and answers")="";
f27e6356 4207 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
2e3d8a19 4208 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
fdbf35ac
BH
4209 ::arg().set("socket-owner","Owner of socket")="";
4210 ::arg().set("socket-group","Group of socket")="";
4211 ::arg().set("socket-mode", "Permissions for socket")="";
3ddb9247 4212
f0f3f0b0 4213 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
2e3d8a19
BH
4214 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
4215 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
d4fb76e9 4216 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
2e3d8a19 4217 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
85c32340 4218 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
2e3d8a19 4219 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
324dc148 4220 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
979edd70 4221 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
2e3d8a19 4222 ::arg().set("hint-file", "If set, load root hints from this file")="";
b45eb27c 4223 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
a9af3782 4224 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
c3e753c7 4225 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
1051f8a9 4226 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
927c12b0 4227 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
1051f8a9 4228 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
950626be 4229 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname, set custom or 'disabled'")="";
92011b8f 4230 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
ba1a571d 4231 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
49a699c4 4232 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
2c95fc65 4233 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
51e2144e 4234 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3ddb9247 4235 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4e120339 4236 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
fde296a3 4237 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
0d5f0a9f 4238 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4ef015cd 4239 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
5605c067 4240 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3e61e7f7 4241 ::arg().set("lua-config-file", "More powerful configuration options")="";
644dd1da 4242
5605c067 4243 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
927c12b0
BH
4244 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
4245 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
5605c067 4246 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
ac0b4eb3 4247 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3ea54bf0 4248 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
e498dac1 4249 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
4485aa35 4250 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
a2f87dd1 4251 ::arg().set("lua-maintenance-interval", "Number of seconds between calls to the lua user defined maintenance() function")="1";
08f3f638 4252 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3ddb9247 4253 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
35695d18 4254 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
4255 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
3f975863 4256 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
2fe3354d 4257 ::arg().set("ecs-add-for", "List of client netmasks for which EDNS Client Subnet will be added")="0.0.0.0/0, ::/0, " LOCAL_NETS_INVERSE;
8a3a3822 4258 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
a16c4536 4259 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
e498dac1 4260 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
4ca2d205 4261 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
e661a20b 4262 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
b3adda56 4263 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
00b8cadc 4264 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
54c36063
PL
4265 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1232";
4266 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1232";
aadceba8 4267 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
173d790e 4268 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
c5950146 4269 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
7c3398aa 4270 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
78227847 4271 ::arg().set("max-udp-queries-per-round", "Maximum number of UDP queries processed per recvmsg() round, before returning back to normal processing")="10000";
a09a8ce0 4272
68e6df3c 4273 ::arg().set("include-dir","Include *.conf files from this directory")="";
d67620e4 4274 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
2332f42d 4275
4276 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
2e3d8a19 4277
d705aad9 4278 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
396f126e 4279 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
d705aad9 4280
0735b17e 4281 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
d377bb54 4282 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
0735b17e 4283
8fd25133
RG
4284 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
4285
98d36505
RG
4286 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
4287
5cc8371b 4288 ::arg().set("xpf-allow-from","XPF information is only processed from these subnets")="";
59cb4a79 4289 ::arg().set("xpf-rr-code","XPF option code to use")="0";
5cc8371b 4290
58da9034 4291 ::arg().set("udp-source-port-min", "Minimum UDP port to bind on")="1024";
b47026fd
CHB
4292 ::arg().set("udp-source-port-max", "Maximum UDP port to bind on")="65535";
4293 ::arg().set("udp-source-port-avoid", "List of comma separated UDP port number to avoid")="11211";
e97cb679 4294 ::arg().set("rng", "Specify random number generator to use. Valid values are auto,sodium,openssl,getrandom,arc4random,urandom.")="auto";
d6f3fcfa 4295 ::arg().set("public-suffix-list-file", "Path to the Public Suffix List file, if any")="";
af1377b7
NC
4296#ifdef NOD_ENABLED
4297 ::arg().set("new-domain-tracking", "Track newly observed domains (i.e. never seen before).")="no";
4298 ::arg().set("new-domain-log", "Log newly observed domains.")="yes";
4299 ::arg().set("new-domain-lookup", "Perform a DNS lookup newly observed domains as a subdomain of the configured domain")="";
4300 ::arg().set("new-domain-history-dir", "Persist new domain tracking data here to persist between restarts")=string(NODCACHEDIR)+"/nod";
4301 ::arg().set("new-domain-whitelist", "List of domains (and implicitly all subdomains) which will never be considered a new domain")="";
b78727c6 4302 ::arg().set("new-domain-db-size", "Size of the DB used to track new domains in terms of number of cells. Defaults to 67108864")="67108864";
ca2526f5 4303 ::arg().set("new-domain-pb-tag", "If protobuf is configured, the tag to use for messages containing newly observed domains. Defaults to 'pdns-nod'")="pdns-nod";
41c542ec
NC
4304 ::arg().set("unique-response-tracking", "Track unique responses (tuple of query name, type and RR).")="no";
4305 ::arg().set("unique-response-log", "Log unique responses")="yes";
4306 ::arg().set("unique-response-history-dir", "Persist unique response tracking data here to persist between restarts")=string(NODCACHEDIR)+"/udr";
b78727c6 4307 ::arg().set("unique-response-db-size", "Size of the DB used to track unique responses in terms of number of cells. Defaults to 67108864")="67108864";
ca2526f5 4308 ::arg().set("unique-response-pb-tag", "If protobuf is configured, the tag to use for messages containing unique DNS responses. Defaults to 'pdns-udr'")="pdns-udr";
af1377b7 4309#endif /* NOD_ENABLED */
2e3d8a19 4310 ::arg().setCmd("help","Provide a helpful message");
ba1a571d 4311 ::arg().setCmd("version","Print version string");
d5141417 4312 ::arg().setCmd("config","Output blank configuration");
e6a9dde5 4313 g_log.toConsole(Logger::Info);
2e3d8a19 4314 ::arg().laxParse(argc,argv); // do a lax parse
c75a6a9e 4315
2d733c0f
CH
4316 string configname=::arg()["config-dir"]+"/recursor.conf";
4317 if(::arg()["config-name"]!="") {
4318 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
5124de27 4319 s_programname+="-"+::arg()["config-name"];
2d733c0f
CH
4320 }
4321 cleanSlashes(configname);
5124de27 4322
5cc1ea1d
CH
4323 if(!::arg().getCommands().empty()) {
4324 cerr<<"Fatal: non-option on the command line, perhaps a '--setting=123' statement missed the '='?"<<endl;
4325 exit(99);
4326 }
4327
577cf284
BH
4328 if(::arg().mustDo("config")) {
4329 cout<<::arg().configstring()<<endl;
4330 exit(0);
4331 }
4332
3ddb9247 4333 if(!::arg().file(configname.c_str()))
e6a9dde5 4334 g_log<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
c75a6a9e 4335
2e3d8a19 4336 ::arg().parse(argc,argv);
c836dc19 4337
2054afbb
CH
4338 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() ) {
4339 g_log<<Logger::Error<<"Using chroot and enabling the API is not possible"<<endl;
f0f3f0b0
PL
4340 exit(EXIT_FAILURE);
4341 }
4342
4343 if (::arg()["socket-dir"].empty()) {
4344 if (::arg()["chroot"].empty())
4345 ::arg().set("socket-dir") = LOCALSTATEDIR;
4346 else
4347 ::arg().set("socket-dir") = "/";
4348 }
4349
2e3d8a19 4350 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
562588a3 4351
b243ca3b
RG
4352 if(::arg().asNum("threads")==1) {
4353 if (::arg().mustDo("pdns-distributes-queries")) {
4354 g_log<<Logger::Warning<<"Only one thread, no need to distribute queries ourselves"<<endl;
4355 ::arg().set("pdns-distributes-queries")="no";
4356 }
4357 }
4358
4359 if(::arg().mustDo("pdns-distributes-queries") && ::arg().asNum("distributor-threads") <= 0) {
4360 g_log<<Logger::Warning<<"Asked to run with pdns-distributes-queries set but no distributor threads, raising to 1"<<endl;
4361 ::arg().set("distributor-threads")="1";
4362 }
4363
4364 if (!::arg().mustDo("pdns-distributes-queries")) {
4365 ::arg().set("distributor-threads")="0";
4366 }
61d74169 4367
2e3d8a19 4368 if(::arg().mustDo("help")) {
ff5ba4f9
WA
4369 cout<<"syntax:"<<endl<<endl;
4370 cout<<::arg().helpstring(::arg()["help"])<<endl;
4371 exit(0);
b636533b 4372 }
5e3de507 4373 if(::arg().mustDo("version")) {
ba1a571d 4374 showProductVersion();
3613a51c 4375 showBuildConfiguration();
67076869 4376 exit(0);
5e3de507 4377 }
b636533b 4378
34162f8f 4379 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
f48d7b65 4380
34162f8f
CH
4381 if (logUrgency < Logger::Error)
4382 logUrgency = Logger::Error;
f48d7b65 4383 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
4384 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
4385 }
e6a9dde5
PL
4386 g_log.setLoglevel(logUrgency);
4387 g_log.toConsole(logUrgency);
34162f8f 4388
f7c1d4e3 4389 serviceMain(argc, argv);
288f4aa9 4390 }
3f81d239 4391 catch(PDNSException &ae) {
e6a9dde5 4392 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
22030c37 4393 ret=EXIT_FAILURE;
288f4aa9 4394 }
fdbf35ac 4395 catch(std::exception &e) {
e6a9dde5 4396 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
22030c37 4397 ret=EXIT_FAILURE;
288f4aa9
BH
4398 }
4399 catch(...) {
e6a9dde5 4400 g_log<<Logger::Error<<"any other exception in main: "<<endl;
22030c37 4401 ret=EXIT_FAILURE;
288f4aa9 4402 }
3ddb9247 4403
22030c37 4404 return ret;
288f4aa9 4405}