]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/pdns_recursor.cc
emit a /0 ECS response on non-variable answers when using use-incoming-ecs, for the...
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
CommitLineData
288f4aa9 1/*
6edbf68a
PL
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
870a0fe4
AT
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
3e61e7f7 25
76473b92
KM
26#include <netdb.h>
27#include <sys/stat.h>
28#include <unistd.h>
f097141b 29#ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
b47026fd 30#include <boost/container/flat_set.hpp>
f097141b 31#endif
2470b36e 32#include "ws-recursor.hh"
c390b2da 33#include <thread>
519f5484 34#include "threadname.hh"
3ea54bf0 35#include "recpacketcache.hh"
3ddb9247 36#include "utility.hh"
51e2144e 37#include "dns_random.hh"
d1b28475
KM
38#ifdef HAVE_LIBSODIUM
39#include <sodium.h>
40#endif
3afde9b2 41#include "opensslsigners.hh"
288f4aa9
BH
42#include <iostream>
43#include <errno.h>
81859ba5 44#include <boost/static_assert.hpp>
288f4aa9
BH
45#include <map>
46#include <set>
97bb160b 47#include "recursor_cache.hh"
38c9ceaa 48#include "cachecleaner.hh"
288f4aa9 49#include <stdio.h>
c75a6a9e 50#include <signal.h>
288f4aa9 51#include <stdlib.h>
bb4bdbaf 52#include "misc.hh"
288f4aa9
BH
53#include "mtasker.hh"
54#include <utility>
288f4aa9
BH
55#include "arguments.hh"
56#include "syncres.hh"
88def049
BH
57#include <fcntl.h>
58#include <fstream>
3e61e7f7 59#include "sortlist.hh"
5c633640
BH
60#include "sstuff.hh"
61#include <boost/tuple/tuple.hpp>
62#include <boost/tuple/tuple_comparison.hpp>
72df400f 63#include <boost/shared_array.hpp>
7f1fa77d 64#include <boost/function.hpp>
5605c067 65#include <boost/algorithm/string.hpp>
8f7473d7 66#ifdef MALLOC_TRACE
67#include "malloctrace.hh"
68#endif
40a3dd64 69#include <netinet/tcp.h>
f12666f2 70#include "capabilities.hh"
ea634573
BH
71#include "dnsparser.hh"
72#include "dnswriter.hh"
73#include "dnsrecords.hh"
f814d7c8 74#include "zoneparser-tng.hh"
1d5b3ce6 75#include "rec_channel.hh"
aaacf7f2 76#include "logger.hh"
c8ddb7c2 77#include "iputils.hh"
09e6702a 78#include "mplexer.hh"
c038218b 79#include "config.h"
808c5ef7 80#include "lua-recursor4.hh"
ba1a571d 81#include "version.hh"
79332bff 82#include "responsestats.hh"
d67620e4 83#include "secpoll-recursor.hh"
c5c066bf 84#include "dnsname.hh"
644dd1da 85#include "filterpo.hh"
86#include "rpzloader.hh"
b3f0ed10 87#include "validate-recursor.hh"
f3c18728 88#include "rec-lua-conf.hh"
5c3b5e7f 89#include "ednsoptions.hh"
85c7ca75 90#include "gettime.hh"
d6f3fcfa 91#include "pubsuffix.hh"
af1377b7
NC
92#ifdef NOD_ENABLED
93#include "nod.hh"
94#endif /* NOD_ENABLED */
f3c18728 95
d9d3f9c1 96#include "rec-protobuf.hh"
d705aad9 97#include "rec-snmp.hh"
aa7929a3 98
6b6720de
PL
99#ifdef HAVE_SYSTEMD
100#include <systemd/sd-daemon.h>
101#endif
102
d187038c
RG
103#include "namespaces.hh"
104
5cc8371b
RG
105#include "xpf.hh"
106
d187038c
RG
107typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
108
f26bf547 109static thread_local std::shared_ptr<RecursorLua4> t_pdl;
b243ca3b 110static thread_local unsigned int t_id = 0;
f26bf547
RG
111static thread_local std::shared_ptr<Regex> t_traceRegex;
112static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
63341e8d 113#ifdef HAVE_PROTOBUF
3fe06137 114static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_protobufServers{nullptr};
b773359c 115static thread_local uint64_t t_protobufServersGeneration;
3fe06137 116static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_outgoingProtobufServers{nullptr};
b773359c 117static thread_local uint64_t t_outgoingProtobufServersGeneration;
63341e8d 118#endif /* HAVE_PROTOBUF */
f26bf547
RG
119
120thread_local std::unique_ptr<MT_t> MT; // the big MTasker
121thread_local std::unique_ptr<MemRecursorCache> t_RC;
122thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
3337c2f7 123thread_local FDMultiplexer* t_fdm{nullptr};
be9078b3 124thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes, t_bogusremotes;
66f2e6ad 125thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring, t_bogusqueryring;
f26bf547 126thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
aa7929a3 127#ifdef HAVE_PROTOBUF
f26bf547 128thread_local std::unique_ptr<boost::uuids::random_generator> t_uuidGenerator;
aa7929a3 129#endif
af1377b7
NC
130#ifdef NOD_ENABLED
131thread_local std::shared_ptr<nod::NODDB> t_nodDBp;
41c542ec 132thread_local std::shared_ptr<nod::UniqueResponseDB> t_udrDBp;
af1377b7 133#endif /* NOD_ENABLED */
d187038c 134__thread struct timeval g_now; // timestamp, updated (too) frequently
d7dae798 135
b243ca3b
RG
136typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
137
d7dae798 138// for communicating with our threads
b243ca3b
RG
139// effectively readonly after startup
140struct RecThreadInfo
141{
142 struct ThreadPipeSet
143 {
144 int writeToThread{-1};
145 int readToThread{-1};
146 int writeFromThread{-1};
147 int readFromThread{-1};
148 int writeQueriesToThread{-1}; // this one is non-blocking
149 int readQueriesToThread{-1};
150 };
151
adb6cd72 152 /* FD corresponding to TCP sockets this thread is listening
c47f201b 153 on.
adb6cd72
RG
154 These FDs are also in deferredAdds when we have one
155 socket per listener, and in g_deferredAdds instead. */
156 std::set<int> tcpSockets;
b243ca3b
RG
157 /* FD corresponding to listening sockets if we have one socket per
158 listener (with reuseport), otherwise all listeners share the
159 same FD and g_deferredAdds is then used instead */
160 deferredAdd_t deferredAdds;
161 struct ThreadPipeSet pipes;
162 std::thread thread;
163 /* handle the web server, carbon, statistics and the control channel */
164 bool isHandler{false};
165 /* accept incoming queries (and distributes them to the workers if pdns-distributes-queries is set) */
166 bool isListener{false};
167 /* process queries */
168 bool isWorker{false};
49a699c4 169};
810ff705 170
b243ca3b
RG
171/* first we have the handler thread, t_id == 0 (some other
172 helper threads like SNMP might have t_id == 0 as well)
173 then the distributor threads if any
174 and finally the workers */
175static std::vector<RecThreadInfo> s_threadInfos;
176/* without reuseport, all listeners share the same sockets */
177static deferredAdd_t g_deferredAdds;
faf580f5 178
d187038c
RG
179typedef vector<int> tcpListenSockets_t;
180typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
3ea54bf0 181
d187038c 182static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
d187038c 183static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
d187038c
RG
184static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
185static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
186static AtomicCounter counter;
9065eb05 187static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
f26bf547 188static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
5cc8371b 189static NetmaskGroup g_XPFAcl;
d187038c 190static size_t g_tcpMaxQueriesPerConn;
a5886e6a 191static size_t s_maxUDPQueriesPerRound;
d187038c
RG
192static uint64_t g_latencyStatSize;
193static uint32_t g_disthashseed;
194static unsigned int g_maxTCPPerClient;
d187038c 195static unsigned int g_maxMThreads;
b243ca3b 196static unsigned int g_numDistributorThreads;
d187038c
RG
197static unsigned int g_numWorkerThreads;
198static int g_tcpTimeout;
199static uint16_t g_udpTruncationThreshold;
59cb4a79 200static uint16_t g_xpfRRCode{0};
d187038c
RG
201static std::atomic<bool> statsWanted;
202static std::atomic<bool> g_quiet;
203static bool g_logCommonErrors;
204static bool g_anyToTcp;
b243ca3b 205static bool g_weDistributeQueries; // if true, 1 or more threads listen on the incoming query sockets and distribute them to workers
810ff705 206static bool g_reusePort{false};
00b8cadc 207static bool g_gettagNeedsEDNSOptions{false};
0ec489bf 208static time_t g_statisticsInterval;
9065eb05 209static bool g_useIncomingECS;
a6f7f5fe 210std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
af1377b7
NC
211#ifdef NOD_ENABLED
212static bool g_nodEnabled;
213static DNSName g_nodLookupDomain;
214static bool g_nodLog;
215static SuffixMatchNode g_nodDomainWL;
ca2526f5 216static std::string g_nod_pbtag;
41c542ec
NC
217static bool g_udrEnabled;
218static bool g_udrLog;
ca2526f5 219static std::string g_udr_pbtag;
af1377b7 220#endif /* NOD_ENABLED */
f097141b 221#ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
bf6f28ca 222static boost::container::flat_set<uint16_t> s_avoidUdpSourcePorts;
f097141b
CHB
223#else
224static std::set<uint16_t> s_avoidUdpSourcePorts;
225#endif
bf6f28ca
CHB
226static uint16_t s_minUdpSourcePort;
227static uint16_t s_maxUdpSourcePort;
49a699c4 228
b243ca3b 229RecursorControlChannel s_rcc; // only active in the handler thread
d187038c 230RecursorStats g_stats;
2d733c0f 231string s_programname="pdns_recursor";
d187038c 232string s_pidfname;
c1c29961 233bool g_lowercaseOutgoing;
bf19ccfd 234unsigned int g_networkTimeoutMsec;
d187038c
RG
235unsigned int g_numThreads;
236uint16_t g_outgoingEDNSBufsize;
98d36505 237bool g_logRPZChanges{false};
c3828c03 238
12cd44ee 239#define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
2fe3354d 240#define LOCAL_NETS_INVERSE "!127.0.0.0/8, !10.0.0.0/8, !100.64.0.0/10, !169.254.0.0/16, !192.168.0.0/16, !172.16.0.0/12, !::1/128, !fc00::/7, !fe80::/10"
12cd44ee 241// Bad Nets taken from both:
3ddb9247 242// http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
12cd44ee 243// and
244// http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
245// where such a network may not be considered a valid destination
246#define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
247#define DONT_QUERY LOCAL_NETS ", " BAD_NETS
49a699c4 248
d7dae798 249//! used to send information to a newborn mthread
ea634573 250struct DNSComboWriter {
08b02366 251 DNSComboWriter(const std::string& query, const struct timeval& now): d_mdp(true, query), d_now(now), d_query(query)
2749c3fe
RG
252 {
253 }
5cc8371b 254
08b02366 255 DNSComboWriter(const std::string& query, const struct timeval& now, std::vector<std::string>&& policyTags, LuaContext::LuaObject&& data): d_mdp(true, query), d_now(now), d_query(query), d_policyTags(std::move(policyTags)), d_data(std::move(data))
5164bac3
RG
256 {
257 }
258
5cc8371b
RG
259 void setRemote(const ComboAddress& sa)
260 {
261 d_remote=sa;
262 }
263
264 void setSource(const ComboAddress& sa)
ea634573 265 {
5cc8371b 266 d_source=sa;
ea634573
BH
267 }
268
b71b60ee 269 void setLocal(const ComboAddress& sa)
270 {
271 d_local=sa;
272 }
273
5cc8371b
RG
274 void setDestination(const ComboAddress& sa)
275 {
276 d_destination=sa;
277 }
b71b60ee 278
ea634573
BH
279 void setSocket(int sock)
280 {
281 d_socket=sock;
282 }
a1754c6a
BH
283
284 string getRemote() const
285 {
5cc8371b
RG
286 if (d_source == d_remote) {
287 return d_source.toStringWithPort();
288 }
289 return d_source.toStringWithPort() + " (proxied by " + d_remote.toStringWithPort() + ")";
a1754c6a
BH
290 }
291
5cc8371b 292 MOADNSParser d_mdp;
c9e9e5e0 293 struct timeval d_now;
5cc8371b
RG
294 /* Remote client, might differ from d_source
295 in case of XPF, in which case d_source holds
296 the IP of the client and d_remote of the proxy
297 */
298 ComboAddress d_remote;
299 ComboAddress d_source;
300 /* Destination address, might differ from
301 d_destination in case of XPF, in which case
302 d_destination holds the IP of the proxy and
303 d_local holds our own. */
304 ComboAddress d_local;
305 ComboAddress d_destination;
aa7929a3
RG
306#ifdef HAVE_PROTOBUF
307 boost::uuids::uuid d_uuid;
67e31ebe 308 string d_requestorId;
590388d2 309 string d_deviceId;
aa7929a3 310#endif
08b02366 311 std::string d_query;
5164bac3
RG
312 std::vector<std::string> d_policyTags;
313 LuaContext::LuaObject d_data;
b40562da 314 EDNSSubnetOpts d_ednssubnet;
5164bac3 315 shared_ptr<TCPConnection> d_tcpConnection;
ea634573 316 int d_socket;
b673817a 317 unsigned int d_tag{0};
e9f63d47 318 uint32_t d_qhash{0};
70fb28d9 319 uint32_t d_ttlCap{std::numeric_limits<uint32_t>::max()};
08b02366
RG
320 uint16_t d_ecsBegin{0};
321 uint16_t d_ecsEnd{0};
70fb28d9 322 bool d_variable{false};
5164bac3
RG
323 bool d_ecsFound{false};
324 bool d_ecsParsed{false};
325 bool d_tcp;
ea634573
BH
326};
327
06857845
RG
328MT_t* getMT()
329{
330 return MT ? MT.get() : nullptr;
331}
ea634573 332
288f4aa9
BH
333ArgvMap &arg()
334{
335 static ArgvMap theArg;
336 return theArg;
337}
4ef015cd 338
8fb594ba 339unsigned int getRecursorThreadId()
b4015453 340{
30da2030 341 return t_id;
b4015453 342}
09e6702a 343
30ee601a
RG
344int getMTaskerTID()
345{
346 return MT->getTid();
347}
348
b243ca3b
RG
349static bool isDistributorThread()
350{
351 if (t_id == 0) {
352 return false;
353 }
354
355 return g_weDistributeQueries && s_threadInfos.at(t_id).isListener;
356}
357
358static bool isHandlerThread()
359{
360 if (t_id == 0) {
361 return true;
362 }
363
364 return s_threadInfos.at(t_id).isHandler;
365}
366
d187038c 367static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 368
50c81227 369// -1 is error, 0 is timeout, 1 is success
3ddb9247 370int asendtcp(const string& data, Socket* sock)
5c633640
BH
371{
372 PacketID pident;
373 pident.sock=sock;
374 pident.outMSG=data;
3ddb9247 375
bb4bdbaf 376 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
50c81227 377 string packet;
5c633640 378
5b0ddd18 379 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
23db0a09 380
9170fbaf 381 if(!ret || ret==-1) { // timeout
bb4bdbaf 382 t_fdm->removeWriteFD(sock->getHandle());
5c633640 383 }
50c81227
BH
384 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
385 return -1;
386 }
9170fbaf 387 return ret;
5c633640
BH
388}
389
d187038c 390static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 391
9170fbaf 392// -1 is error, 0 is timeout, 1 is success
a683e8bd 393int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
288f4aa9 394{
50c81227 395 data.clear();
5c633640
BH
396 PacketID pident;
397 pident.sock=sock;
398 pident.inNeeded=len;
825fa717 399 pident.inIncompleteOkay=incompleteOkay;
bb4bdbaf 400 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
5c633640 401
bb4bdbaf 402 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
9170fbaf 403 if(!ret || ret==-1) { // timeout
bb4bdbaf 404 t_fdm->removeReadFD(sock->getHandle());
288f4aa9 405 }
50c81227
BH
406 else if(data.empty()) {// error, EOF or other
407 return -1;
408 }
409
9170fbaf 410 return ret;
288f4aa9
BH
411}
412
d187038c 413static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
4465e941 414{
fba1e944 415 PacketID pident=*any_cast<PacketID>(&var);
4465e941 416 char resp[512];
7c77ce63
RG
417 ComboAddress fromaddr;
418 socklen_t addrlen=sizeof(fromaddr);
419
420 ssize_t ret=recvfrom(fd, resp, sizeof(resp), 0, (sockaddr *)&fromaddr, &addrlen);
421 if (fromaddr != pident.remote) {
e6a9dde5 422 g_log<<Logger::Notice<<"Response received from the wrong remote host ("<<fromaddr.toStringWithPort()<<" instead of "<<pident.remote.toStringWithPort()<<"), discarding"<<endl;
7c77ce63
RG
423
424 }
425
4465e941 426 t_fdm->removeReadFD(fd);
427 if(ret >= 0) {
a683e8bd 428 string data(resp, (size_t) ret);
fba1e944 429 MT->sendEvent(pident, &data);
4465e941 430 }
431 else {
fba1e944 432 string empty;
433 MT->sendEvent(pident, &empty);
434 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
4465e941 435 }
436}
fba1e944 437string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
4465e941 438{
4465e941 439 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
440 s.setNonBlocking();
441 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
442
443 s.bind(local);
444 s.connect(dest);
4465e941 445 s.send(query);
446
447 PacketID pident;
448 pident.sock=&s;
7c77ce63 449 pident.remote=dest;
4465e941 450 pident.type=0;
fba1e944 451 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
4465e941 452
453 string data;
fba1e944 454
4465e941 455 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
fba1e944 456
4465e941 457 if(!ret || ret==-1) { // timeout
4465e941 458 t_fdm->removeReadFD(s.getHandle());
459 }
460 else if(data.empty()) {// error, EOF or other
fba1e944 461 // we could special case this
4465e941 462 return data;
463 }
4465e941 464 return data;
465}
466
d7dae798 467//! pick a random query local address
1652a63e 468ComboAddress getQueryLocalAddress(int family, uint16_t port)
5a38281c 469{
1652a63e 470 ComboAddress ret;
5a38281c 471 if(family==AF_INET) {
3ddb9247 472 if(g_localQueryAddresses4.empty())
1652a63e 473 ret = g_local4;
3ddb9247 474 else
1652a63e
BH
475 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
476 ret.sin4.sin_port = htons(port);
5a38281c
BH
477 }
478 else {
479 if(g_localQueryAddresses6.empty())
1652a63e
BH
480 ret = g_local6;
481 else
482 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
3ddb9247 483
1652a63e 484 ret.sin6.sin6_port = htons(port);
5a38281c 485 }
1652a63e 486 return ret;
5a38281c 487}
4ef015cd 488
d187038c 489static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
09e6702a 490
d187038c 491static void setSocketBuffer(int fd, int optname, uint32_t size)
d7dae798
BH
492{
493 uint32_t psize=0;
494 socklen_t len=sizeof(psize);
3ddb9247 495
d7dae798 496 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
e6a9dde5 497 g_log<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
3ddb9247 498 return;
d7dae798
BH
499 }
500
501 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
e6a9dde5 502 g_log<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
d7dae798
BH
503}
504
505
506static void setSocketReceiveBuffer(int fd, uint32_t size)
507{
508 setSocketBuffer(fd, SO_RCVBUF, size);
509}
510
511static void setSocketSendBuffer(int fd, uint32_t size)
512{
513 setSocketBuffer(fd, SO_SNDBUF, size);
514}
515
516
4ef015cd
BH
517// you can ask this class for a UDP socket to send a query from
518// this socket is not yours, don't even think about deleting it
519// but after you call 'returnSocket' on it, don't assume anything anymore
520class UDPClientSocks
521{
4ef015cd 522 unsigned int d_numsocks;
4ef015cd 523public:
e2642526 524 UDPClientSocks() : d_numsocks(0)
4ef015cd
BH
525 {
526 }
527
996c89cc 528 typedef set<int> socks_t;
4ef015cd
BH
529 socks_t d_socks;
530
2ee280cf 531 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
d8f6d49f 532 int getSocket(const ComboAddress& toaddr, int* fd)
4ef015cd 533 {
d8f6d49f
BH
534 *fd=makeClientSocket(toaddr.sin4.sin_family);
535 if(*fd < 0) // temporary error - receive exception otherwise
2ee280cf 536 return -2;
d8f6d49f
BH
537
538 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
539 int err = errno;
41ff43f8 540 // returnSocket(*fd);
a7b68ae7
RG
541 try {
542 closesocket(*fd);
543 }
544 catch(const PDNSException& e) {
e6a9dde5 545 g_log<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
a7b68ae7
RG
546 }
547
d8f6d49f 548 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
4957a608 549 return -2;
998a4334 550 return -1;
d8f6d49f 551 }
998a4334 552
d8f6d49f 553 d_socks.insert(*fd);
998a4334 554 d_numsocks++;
d8f6d49f 555 return 0;
4ef015cd
BH
556 }
557
095c3045
BH
558 void returnSocket(int fd)
559 {
560 socks_t::iterator i=d_socks.find(fd);
34801ab1 561 if(i==d_socks.end()) {
335da0ba 562 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd)+") not in the pool");
34801ab1 563 }
bb4bdbaf 564 returnSocketLocked(i);
095c3045
BH
565 }
566
4ef015cd 567 // return a socket to the pool, or simply erase it
bb4bdbaf 568 void returnSocketLocked(socks_t::iterator& i)
4ef015cd 569 {
600fc20b 570 if(i==d_socks.end()) {
3f81d239 571 throw PDNSException("Trying to return a socket not in the pool");
600fc20b 572 }
80baf329 573 try {
bb4bdbaf 574 t_fdm->removeReadFD(*i);
80baf329
BH
575 }
576 catch(FDMultiplexerException& e) {
bb4bdbaf 577 // we sometimes return a socket that has not yet been assigned to t_fdm
80baf329 578 }
a7b68ae7
RG
579 try {
580 closesocket(*i);
581 }
582 catch(const PDNSException& e) {
e6a9dde5 583 g_log<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
a7b68ae7 584 }
3ddb9247 585
998a4334
BH
586 d_socks.erase(i++);
587 --d_numsocks;
4ef015cd 588 }
d8f6d49f
BH
589
590 // returns -1 for errors which might go away, throws for ones that won't
bb4bdbaf 591 static int makeClientSocket(int family)
d8f6d49f 592 {
a683e8bd 593 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
42c235e5 594
d8f6d49f
BH
595 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
596 return ret;
3ddb9247
PD
597
598 if(ret<0)
335da0ba 599 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
36855b53 600
7eb73ffa 601 // setCloseOnExec(ret); // we're not going to exec
5a38281c 602
d8f6d49f 603 int tries=10;
3aa91c3e 604 ComboAddress sin;
d8f6d49f 605 while(--tries) {
1652a63e 606 uint16_t port;
3ddb9247 607
d8f6d49f 608 if(tries==1) // fall back to kernel 'random'
4957a608 609 port = 0;
bf6f28ca
CHB
610 else {
611 do {
612 port = s_minUdpSourcePort + dns_random(s_maxUdpSourcePort - s_minUdpSourcePort + 1);
613 }
614 while (s_avoidUdpSourcePorts.count(port));
615 }
5a38281c 616
3aa91c3e 617 sin=getQueryLocalAddress(family, port); // does htons for us
5a38281c 618
3ddb9247 619 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
4957a608 620 break;
d8f6d49f
BH
621 }
622 if(!tries)
3aa91c3e 623 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
3ddb9247 624
3897b9e1 625 setNonBlocking(ret);
d8f6d49f
BH
626 return ret;
627 }
49a699c4
BH
628};
629
f26bf547 630static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
4ef015cd 631
288f4aa9 632/* these two functions are used by LWRes */
34801ab1 633// -2 is OS error, -1 is error that depends on the remote, > 0 is success
a683e8bd 634int asendto(const char *data, size_t len, int flags,
3ddb9247 635 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
288f4aa9 636{
34801ab1
BH
637
638 PacketID pident;
787e5eab
BH
639 pident.domain = domain;
640 pident.remote = toaddr;
641 pident.type = qtype;
34801ab1
BH
642
643 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
644 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
645
646 for(; chain.first != chain.second; chain.first++) {
647 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
e27e91a8 648 /*
4665c31e
BH
649 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
650 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
4957a608 651 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
e27e91a8 652 */
34801ab1
BH
653 chain.first->key.chain.insert(id); // we can chain
654 *fd=-1; // gets used in waitEvent / sendEvent later on
655 return 1;
656 }
657 }
658
49a699c4 659 int ret=t_udpclientsocks->getSocket(toaddr, fd);
d8f6d49f
BH
660 if(ret < 0)
661 return ret;
34801ab1 662
998a4334
BH
663 pident.fd=*fd;
664 pident.id=id;
3ddb9247 665
bb4bdbaf
BH
666 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
667 ret = send(*fd, data, len, 0);
668
5b0ddd18 669 int tmp = errno;
bb4bdbaf 670
7302ed0a 671 if(ret < 0)
49a699c4 672 t_udpclientsocks->returnSocket(*fd);
bb4bdbaf 673
5b0ddd18 674 errno = tmp; // this is for logging purposes only
7302ed0a 675 return ret;
288f4aa9
BH
676}
677
9170fbaf 678// -1 is error, 0 is timeout, 1 is success
f128d20d 679int arecvfrom(std::string& packet, int flags, const ComboAddress& fromaddr, size_t *d_len,
c5c066bf 680 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
288f4aa9 681{
0d5f0a9f 682 static optional<unsigned int> nearMissLimit;
3ddb9247 683 if(!nearMissLimit)
0d5f0a9f
BH
684 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
685
288f4aa9 686 PacketID pident;
4ef015cd 687 pident.fd=fd;
288f4aa9 688 pident.id=id;
0d5f0a9f 689 pident.domain=domain;
787e5eab 690 pident.type = qtype;
996c89cc 691 pident.remote=fromaddr;
b636533b 692
5b0ddd18 693 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
34801ab1 694
9170fbaf 695 if(ret > 0) {
996c89cc 696 if(packet.empty()) // means "error"
3ddb9247 697 return -1;
998a4334 698
a683e8bd 699 *d_len=packet.size();
f128d20d 700
0d5f0a9f 701 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
e6a9dde5 702 g_log<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
0d5f0a9f 703 g_stats.spoofCount++;
35ce8576
BH
704 return -1;
705 }
288f4aa9 706 }
09e6702a 707 else {
34801ab1 708 if(fd >= 0)
49a699c4 709 t_udpclientsocks->returnSocket(fd);
09e6702a 710 }
9170fbaf 711 return ret;
288f4aa9
BH
712}
713
88def049
BH
714static void writePid(void)
715{
191f2e47 716 if(!::arg().mustDo("write-pid"))
717 return;
18e7758c 718 ofstream of(s_pidfname.c_str(), std::ios_base::app);
88def049 719 if(of)
705f31ae 720 of<< Utility::getpid() <<endl;
88def049 721 else
e6a9dde5 722 g_log<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
88def049
BH
723}
724
2749c3fe 725TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : data(2, 0), d_remote(addr), d_fd(fd)
3ddb9247
PD
726{
727 ++s_currentConnections;
cd989c87 728 (*t_tcpClientCounts)[d_remote]++;
0e408828 729}
cd989c87
BH
730
731TCPConnection::~TCPConnection()
0e408828 732{
a7b68ae7
RG
733 try {
734 if(closesocket(d_fd) < 0)
e6a9dde5 735 g_log<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
a7b68ae7
RG
736 }
737 catch(const PDNSException& e) {
e6a9dde5 738 g_log<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
a7b68ae7
RG
739 }
740
3ddb9247 741 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
cd989c87 742 t_tcpClientCounts->erase(d_remote);
1bc9e6bd 743 --s_currentConnections;
0e408828 744}
0e9d9ce2 745
3ddb9247 746AtomicCounter TCPConnection::s_currentConnections;
d187038c
RG
747
748static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
6dcd28c3 749
92011b8f 750// the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
d187038c 751static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
2cc13433 752{
92011b8f 753 if(packetsize > 1000 && t_largeanswerremotes)
754 t_largeanswerremotes->push_back(remote);
2cc13433
BH
755 switch(res) {
756 case RCode::ServFail:
92011b8f 757 if(t_servfailremotes) {
758 t_servfailremotes->push_back(remote);
5af86fdc 759 if(query && t_servfailqueryring) // packet cache
92011b8f 760 t_servfailqueryring->push_back(make_pair(*query, qtype));
761 }
2cc13433
BH
762 g_stats.servFails++;
763 break;
764 case RCode::NXDomain:
765 g_stats.nxDomains++;
766 break;
767 case RCode::NoError:
768 g_stats.noErrors++;
769 break;
770 }
771}
772
9a864da4 773static string makeLoginfo(const std::unique_ptr<DNSComboWriter>& dc)
a903b39c 774try
775{
5cc8371b 776 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->getRemote())+")";
a903b39c 777}
778catch(...)
779{
780 return "Exception making error message for exception";
781}
782
aa7929a3 783#ifdef HAVE_PROTOBUF
b773359c 784static void protobufLogQuery(uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId)
aa7929a3 785{
b773359c
RG
786 if (!t_protobufServers) {
787 return;
788 }
789
e1c8a4bb
RG
790 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
791 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
792 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
c165308b 793 message.setServerIdentity(SyncRes::s_serverID);
a94bc5d7 794 message.setEDNSSubnet(ednssubnet, ednssubnet.isIpv4() ? maskV4 : maskV6);
67e31ebe 795 message.setRequestorId(requestorId);
590388d2 796 message.setDeviceId(deviceId);
02b47f43 797
02b47f43 798 if (!policyTags.empty()) {
d9d3f9c1 799 message.setPolicyTags(policyTags);
02b47f43 800 }
aa7929a3 801
d9d3f9c1 802// cerr <<message.toDebugString()<<endl;
aa7929a3 803 std::string str;
d9d3f9c1 804 message.serialize(str);
b773359c
RG
805
806 for (auto& server : *t_protobufServers) {
807 server->queueData(str);
808 }
aa7929a3
RG
809}
810
b773359c 811static void protobufLogResponse(const RecProtoBufMessage& message)
aa7929a3 812{
b773359c
RG
813 if (!t_protobufServers) {
814 return;
815 }
816
d9d3f9c1 817// cerr <<message.toDebugString()<<endl;
aa7929a3 818 std::string str;
d9d3f9c1 819 message.serialize(str);
b773359c
RG
820
821 for (auto& server : *t_protobufServers) {
822 server->queueData(str);
823 }
aa7929a3
RG
824}
825#endif
826
53508135
PL
827/**
828 * Chases the CNAME provided by the PolicyCustom RPZ policy.
829 *
830 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
831 * @param qtype: The QType of the original query
832 * @param sr: A SyncRes
833 * @param res: An integer that will contain the RCODE of the lookup we do
834 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
835 */
d187038c 836static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
53508135
PL
837{
838 if (spoofed.d_type == QType::CNAME) {
30ee601a
RG
839 bool oldWantsRPZ = sr.getWantsRPZ();
840 sr.setWantsRPZ(false);
53508135
PL
841 vector<DNSRecord> ans;
842 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, 1, ans);
843 for (const auto& rec : ans) {
844 if(rec.d_place == DNSResourceRecord::ANSWER) {
845 ret.push_back(rec);
846 }
847 }
848 // Reset the RPZ state of the SyncRes
30ee601a 849 sr.setWantsRPZ(oldWantsRPZ);
53508135
PL
850 }
851}
852
70fb28d9 853static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, uint32_t ttlCap, const uint16_t maxAnswerSize)
97c6d7e5 854{
70fb28d9 855 pw.startRecord(rec.d_name, rec.d_type, (rec.d_ttl > ttlCap ? ttlCap : rec.d_ttl), rec.d_class, rec.d_place);
97c6d7e5
RG
856
857 if(rec.d_type != QType::OPT) // their TTL ain't real
858 minTTL = min(minTTL, rec.d_ttl);
859
860 rec.d_content->toPacket(pw);
861 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
862 pw.rollback();
863 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
864 pw.getHeader()->tc=1;
865 pw.truncate();
866 }
867 return false;
868 }
869
870 return true;
871}
872
63341e8d 873#ifdef HAVE_PROTOBUF
3fe06137 874static std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> startProtobufServers(const ProtobufExportConfig& config)
63341e8d 875{
3fe06137 876 auto result = std::make_shared<std::vector<std::unique_ptr<RemoteLogger>>>();
b773359c
RG
877
878 for (const auto& server : config.servers) {
879 try {
3fe06137 880 result->emplace_back(new RemoteLogger(server, config.timeout, config.maxQueuedEntries, config.reconnectWaitTime, config.asyncConnect));
b773359c
RG
881 }
882 catch(const std::exception& e) {
883 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.what()<<endl;
884 }
885 catch(const PDNSException& e) {
886 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.reason<<endl;
887 }
63341e8d
RG
888 }
889
890 return result;
891}
892
893static bool checkProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
894{
895 if (!luaconfsLocal->protobufExportConfig.enabled) {
b773359c
RG
896 if (t_protobufServers) {
897 for (auto& server : *t_protobufServers) {
898 server->stop();
899 }
900 t_protobufServers.reset();
63341e8d
RG
901 }
902
903 return false;
904 }
905
906 /* if the server was not running, or if it was running according to a
907 previous configuration */
b773359c
RG
908 if (!t_protobufServers ||
909 t_protobufServersGeneration < luaconfsLocal->generation) {
63341e8d 910
b773359c
RG
911 if (t_protobufServers) {
912 for (auto& server : *t_protobufServers) {
913 server->stop();
914 }
63341e8d 915 }
b773359c 916 t_protobufServers.reset();
63341e8d 917
b773359c
RG
918 t_protobufServers = startProtobufServers(luaconfsLocal->protobufExportConfig);
919 t_protobufServersGeneration = luaconfsLocal->generation;
63341e8d
RG
920 }
921
922 return true;
923}
924
925static bool checkOutgoingProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
926{
927 if (!luaconfsLocal->outgoingProtobufExportConfig.enabled) {
b773359c
RG
928 if (t_outgoingProtobufServers) {
929 for (auto& server : *t_outgoingProtobufServers) {
930 server->stop();
931 }
63341e8d 932 }
b773359c 933 t_outgoingProtobufServers.reset();
63341e8d
RG
934
935 return false;
936 }
937
938 /* if the server was not running, or if it was running according to a
939 previous configuration */
b773359c
RG
940 if (!t_outgoingProtobufServers ||
941 t_outgoingProtobufServersGeneration < luaconfsLocal->generation) {
63341e8d 942
b773359c
RG
943 if (t_outgoingProtobufServers) {
944 for (auto& server : *t_outgoingProtobufServers) {
945 server->stop();
946 }
63341e8d 947 }
b773359c 948 t_outgoingProtobufServers.reset();
63341e8d 949
b773359c
RG
950 t_outgoingProtobufServers = startProtobufServers(luaconfsLocal->outgoingProtobufExportConfig);
951 t_outgoingProtobufServersGeneration = luaconfsLocal->generation;
63341e8d
RG
952 }
953
954 return true;
955}
956#endif /* HAVE_PROTOBUF */
957
af1377b7 958#ifdef NOD_ENABLED
41c542ec 959static bool nodCheckNewDomain(const DNSName& dname)
af1377b7
NC
960{
961 static const QType qt(QType::A);
962 static const uint16_t qc(QClass::IN);
41c542ec 963 bool ret = false;
af1377b7
NC
964 // First check the (sub)domain isn't whitelisted for NOD purposes
965 if (!g_nodDomainWL.check(dname)) {
966 // Now check the NODDB (note this is probablistic so can have FNs/FPs)
967 if (t_nodDBp && t_nodDBp->isNewDomain(dname)) {
968 if (g_nodLog) {
969 // This should probably log to a dedicated log file
970 g_log<<Logger::Notice<<"Newly observed domain nod="<<dname.toLogString()<<endl;
971 }
972 if (!(g_nodLookupDomain.isRoot())) {
973 // Send a DNS A query to <domain>.g_nodLookupDomain
974 DNSName qname = dname;
975 vector<DNSRecord> dummy;
976 qname += g_nodLookupDomain;
977 directResolve(qname, qt, qc, dummy);
978 }
41c542ec 979 ret = true;
af1377b7
NC
980 }
981 }
41c542ec 982 return ret;
af1377b7
NC
983}
984
985static void nodAddDomain(const DNSName& dname)
986{
987 // Don't bother adding domains on the nod whitelist
988 if (!g_nodDomainWL.check(dname)) {
989 if (t_nodDBp) {
990 // This keeps the nod info up to date
991 t_nodDBp->addDomain(dname);
992 }
993 }
994}
41c542ec
NC
995
996static bool udrCheckUniqueDNSRecord(const DNSName& dname, uint16_t qtype, const DNSRecord& record)
997{
998 bool ret = false;
999 if (record.d_place == DNSResourceRecord::ANSWER ||
1000 record.d_place == DNSResourceRecord::ADDITIONAL) {
1001 // Create a string that represent a triplet of (qname, qtype and RR[type, name, content])
1002 std::stringstream ss;
1003 ss << dname.toDNSStringLC() << ":" << qtype << ":" << qtype << ":" << record.d_type << ":" << record.d_name.toDNSStringLC() << ":" << record.d_content->getZoneRepresentation();
1004 if (t_udrDBp && t_udrDBp->isUniqueResponse(ss.str())) {
ff4d391d
NC
1005 if (g_udrLog) {
1006 // This should also probably log to a dedicated file.
1007 g_log<<Logger::Notice<<"Unique response observed: qname="<<dname.toLogString()<<" qtype="<<QType(qtype).getName()<< " rrtype=" << QType(record.d_type).getName() << " rrname=" << record.d_name.toLogString() << " rrcontent=" << record.d_content->getZoneRepresentation() << endl;
41c542ec
NC
1008 }
1009 ret = true;
1010 }
1011 }
1012 return ret;
1013}
af1377b7
NC
1014#endif /* NOD_ENABLED */
1015
d187038c 1016static void startDoResolve(void *p)
288f4aa9 1017{
9a864da4 1018 auto dc=std::unique_ptr<DNSComboWriter>(reinterpret_cast<DNSComboWriter*>(p));
288f4aa9 1019 try {
5af86fdc
RG
1020 if (t_queryring)
1021 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
92011b8f 1022
32015748 1023 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
7f7b8d55 1024 EDNSOpts edo;
5164bac3 1025 std::vector<pair<uint16_t, string> > ednsOpts;
eb9444be 1026 bool variableAnswer = dc->d_variable;
8e079f3a 1027 bool haveEDNS=false;
ca2526f5
NC
1028#ifdef NOD_ENABLED
1029 bool hasUDR = false;
1030#endif /* NOD_ENABLED */
f1db0de2
PL
1031 DNSPacketWriter::optvect_t returnedEdnsOptions; // Here we stuff all the options for the return packet
1032 uint8_t ednsExtRCode = 0;
8e079f3a 1033 if(getEDNSOpts(dc->d_mdp, &edo)) {
f1db0de2
PL
1034 haveEDNS=true;
1035 if (edo.d_version != 0) {
1036 ednsExtRCode = ERCode::BADVERS;
1037 }
1038
32015748
RG
1039 if(!dc->d_tcp) {
1040 /* rfc6891 6.2.3:
1041 "Values lower than 512 MUST be treated as equal to 512."
1042 */
1043 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
1044 }
5164bac3 1045 ednsOpts = edo.d_options;
8e079f3a 1046 haveEDNS=true;
3af35968 1047 maxanswersize -= 11; // EDNS header size
b40562da 1048
1f691b94
PL
1049 for (const auto& o : edo.d_options) {
1050 if (o.first == EDNSOptionCode::ECS && g_useIncomingECS && !dc->d_ecsParsed) {
1051 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
1052 } else if (o.first == EDNSOptionCode::NSID) {
f1db0de2 1053 const static string mode_server_id = ::arg()["server-id"];
8a42919a
PL
1054 if(mode_server_id != "disabled" && !mode_server_id.empty() &&
1055 maxanswersize > (2 + 2 + mode_server_id.size())) {
f1db0de2
PL
1056 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::NSID, mode_server_id));
1057 variableAnswer = true; // Can't packetcache an answer with NSID
1058 // Option Code and Option Length are both 2
1059 maxanswersize -= 2 + 2 + mode_server_id.size();
1060 }
b40562da
RG
1061 }
1062 }
10321a98 1063 }
b40562da
RG
1064 /* perhaps there was no EDNS or no ECS but by now we looked */
1065 dc->d_ecsParsed = true;
e325f20c 1066 vector<DNSRecord> ret;
ea634573 1067 vector<uint8_t> packet;
b23b8614 1068
ad42489c 1069 auto luaconfsLocal = g_luaconfs.getLocal();
b8470add
PL
1070 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
1071 bool wantsRPZ(true);
1fbc6dc5 1072 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
f1c7929a 1073 bool logResponse = false;
aa7929a3 1074#ifdef HAVE_PROTOBUF
63341e8d 1075 if (checkProtobufExport(luaconfsLocal)) {
b773359c 1076 logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
5cc8371b 1077 Netmask requestorNM(dc->d_source, dc->d_source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
e1c8a4bb 1078 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
0bd2e252 1079 pbMessage = RecProtoBufMessage(RecProtoBufMessage::Response, dc->d_uuid, &requestor, &dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass, dc->d_mdp.d_header.id, dc->d_tcp, 0);
c165308b 1080 pbMessage->setServerIdentity(SyncRes::s_serverID);
d362f7c1 1081 pbMessage->setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
d9d3f9c1
RG
1082 }
1083#endif /* HAVE_PROTOBUF */
ad42489c 1084
3ddb9247 1085 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
ea634573
BH
1086
1087 pw.getHeader()->aa=0;
1088 pw.getHeader()->ra=1;
c154c8a4 1089 pw.getHeader()->qr=1;
bb4bdbaf 1090 pw.getHeader()->tc=0;
ea634573 1091 pw.getHeader()->id=dc->d_mdp.d_header.id;
10321a98 1092 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
57769f13 1093 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
ea634573 1094
70fb28d9
RG
1095 /* This is the lowest TTL seen in the records of the response,
1096 so we can't cache it for longer than this value.
1097 If we have a TTL cap, this value can't be larger than the
1098 cap no matter what. */
1099 uint32_t minTTL = dc->d_ttlCap;
904d3219
PD
1100
1101 SyncRes sr(dc->d_now);
0c43f455 1102
2e921ec6 1103 bool DNSSECOK=false;
3457a2a0 1104 if(t_pdl) {
f26bf547 1105 sr.setLuaEngine(t_pdl);
3457a2a0 1106 }
9eec8c98 1107 if(g_dnssecmode != DNSSECMode::Off) {
30ee601a 1108 sr.setDoDNSSEC(true);
9eec8c98
PL
1109
1110 // Does the requestor want DNSSEC records?
d6c335ab 1111 if(edo.d_extFlags & EDNSOpts::DNSSECOK) {
9eec8c98
PL
1112 DNSSECOK=true;
1113 g_stats.dnssecQueries++;
1114 }
88c33dca
RG
1115 if (dc->d_mdp.d_header.cd) {
1116 /* Per rfc6840 section 5.9, "When processing a request with
1117 the Checking Disabled (CD) bit set, a resolver SHOULD attempt
1118 to return all response data, even data that has failed DNSSEC
1119 validation. */
1120 ++g_stats.dnssecCheckDisabledQueries;
1121 }
1122 if (dc->d_mdp.d_header.ad) {
1123 /* Per rfc6840 section 5.7, "the AD bit in a query as a signal
1124 indicating that the requester understands and is interested in the
1125 value of the AD bit in the response. This allows a requester to
1126 indicate that it understands the AD bit without also requesting
1127 DNSSEC data via the DO bit. */
1128 ++g_stats.dnssecAuthenticDataQueries;
1129 }
9eec8c98
PL
1130 } else {
1131 // Ignore the client-set CD flag
1132 pw.getHeader()->cd=0;
5b9853c9 1133 }
0c43f455
RG
1134 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
1135
4898a348 1136#ifdef HAVE_PROTOBUF
30ee601a 1137 sr.setInitialRequestId(dc->d_uuid);
b773359c 1138 sr.setOutgoingProtobufServers(t_outgoingProtobufServers);
4898a348 1139#endif
0c43f455 1140
2fe3354d 1141 sr.setQuerySource(dc->d_remote, g_useIncomingECS && !dc->d_ednssubnet.source.empty() ? boost::optional<const EDNSSubnetOpts&>(dc->d_ednssubnet) : boost::none);
57769f13 1142
904d3219 1143 bool tracedQuery=false; // we could consider letting Lua know about this too
9fc36e90 1144 bool shouldNotValidate = false;
904d3219 1145
ef3b6cd7
RG
1146 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
1147 int res = RCode::NoError;
1f1ca368 1148 DNSFilterEngine::Policy appliedPolicy;
39ec5d29 1149 DNSRecord spoofed;
f1c7929a 1150 RecursorLua4::DNSQuestion dq(dc->d_source, dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ, logResponse);
d6c335ab 1151 dq.ednsFlags = &edo.d_extFlags;
5164bac3 1152 dq.ednsOptions = &ednsOpts;
6e505c5e
RG
1153 dq.tag = dc->d_tag;
1154 dq.discardedPolicies = &sr.d_discardedPolicies;
1155 dq.policyTags = &dc->d_policyTags;
1156 dq.appliedPolicy = &appliedPolicy;
1157 dq.currentRecords = &ret;
1158 dq.dh = &dc->d_mdp.d_header;
05c74122 1159 dq.data = dc->d_data;
67e31ebe
RG
1160#ifdef HAVE_PROTOBUF
1161 dq.requestorId = dc->d_requestorId;
590388d2 1162 dq.deviceId = dc->d_deviceId;
67e31ebe 1163#endif
ba21fcfe 1164
6cf96227
PL
1165 if(ednsExtRCode != 0) {
1166 goto sendit;
1167 }
1168
e661a20b 1169 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
56b4d21b
PD
1170 pw.getHeader()->tc = 1;
1171 res = 0;
1172 variableAnswer = true;
e661a20b
PD
1173 goto sendit;
1174 }
1175
f26bf547 1176 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
77499b05
BH
1177 sr.setLogMode(SyncRes::Store);
1178 tracedQuery=true;
1179 }
3ddb9247 1180
8f7473d7 1181
976ec823 1182 if(!g_quiet || tracedQuery) {
e6a9dde5 1183 g_log<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
976ec823 1184 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
b40562da 1185 if(!dc->d_ednssubnet.source.empty()) {
e6a9dde5 1186 g_log<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
6e986f5e 1187 }
e6a9dde5 1188 g_log<<endl;
976ec823 1189 }
c75a6a9e 1190
fededf47 1191 sr.setId(MT->getTid());
67828389 1192 if(!dc->d_mdp.d_header.rd)
c836dc19
BH
1193 sr.setCacheOnly();
1194
f26bf547
RG
1195 if (t_pdl) {
1196 t_pdl->prerpz(dq, res);
0a273054
RG
1197 }
1198
db486de5 1199 // Check if the query has a policy attached to it
0a273054 1200 if (wantsRPZ) {
5cc8371b 1201 appliedPolicy = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_source, sr.d_discardedPolicies);
0a273054 1202 }
644dd1da 1203
54be222b 1204 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
f26bf547 1205 if(!t_pdl || !t_pdl->preresolve(dq, res)) {
b8470add 1206
30ee601a 1207 sr.setWantsRPZ(wantsRPZ);
b8470add
PL
1208 if(wantsRPZ) {
1209 switch(appliedPolicy.d_kind) {
1210 case DNSFilterEngine::PolicyKind::NoAction:
1211 break;
1212 case DNSFilterEngine::PolicyKind::Drop:
1213 g_stats.policyDrops++;
7a25883a 1214 g_stats.policyResults[appliedPolicy.d_kind]++;
b8470add
PL
1215 return;
1216 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1217 g_stats.policyResults[appliedPolicy.d_kind]++;
1218 res=RCode::NXDomain;
1219 goto haveAnswer;
1220 case DNSFilterEngine::PolicyKind::NODATA:
1221 g_stats.policyResults[appliedPolicy.d_kind]++;
1222 res=RCode::NoError;
db486de5 1223 goto haveAnswer;
b8470add
PL
1224 case DNSFilterEngine::PolicyKind::Custom:
1225 g_stats.policyResults[appliedPolicy.d_kind]++;
1226 res=RCode::NoError;
a9e029ee 1227 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
b8470add 1228 ret.push_back(spoofed);
53508135 1229 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
b8470add
PL
1230 goto haveAnswer;
1231 case DNSFilterEngine::PolicyKind::Truncate:
1232 if(!dc->d_tcp) {
1233 g_stats.policyResults[appliedPolicy.d_kind]++;
1234 res=RCode::NoError;
1235 pw.getHeader()->tc=1;
1236 goto haveAnswer;
1237 }
1238 break;
1239 }
db486de5
PL
1240 }
1241
b8470add 1242 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
44971ca0
PD
1243 try {
1244 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
9fc36e90 1245 shouldNotValidate = sr.wasOutOfBand();
44971ca0
PD
1246 }
1247 catch(ImmediateServFailException &e) {
854d44e3 1248 if(g_logCommonErrors)
e6a9dde5 1249 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
44971ca0
PD
1250 res = RCode::ServFail;
1251 }
4485aa35 1252
1921a4c2
RG
1253 dq.validationState = sr.getValidationState();
1254
b8470add
PL
1255 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
1256 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
1257 appliedPolicy = sr.d_appliedPolicy;
1258 g_stats.policyResults[appliedPolicy.d_kind]++;
1259 switch(appliedPolicy.d_kind) {
1260 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
1261 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
1262 case DNSFilterEngine::PolicyKind::Drop:
1263 g_stats.policyDrops++;
b8470add
PL
1264 return;
1265 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1266 ret.clear();
1267 res=RCode::NXDomain;
1268 goto haveAnswer;
1269
1270 case DNSFilterEngine::PolicyKind::NODATA:
1271 ret.clear();
1272 res=RCode::NoError;
1273 goto haveAnswer;
1274
1275 case DNSFilterEngine::PolicyKind::Truncate:
1276 if(!dc->d_tcp) {
1277 ret.clear();
1278 res=RCode::NoError;
1279 pw.getHeader()->tc=1;
1280 goto haveAnswer;
1281 }
1282 break;
1283
1284 case DNSFilterEngine::PolicyKind::Custom:
1285 ret.clear();
1286 res=RCode::NoError;
a9e029ee 1287 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
b8470add 1288 ret.push_back(spoofed);
53508135 1289 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
b8470add
PL
1290 goto haveAnswer;
1291 }
1292 }
1293
1294 if (wantsRPZ) {
1f1ca368 1295 appliedPolicy = luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies);
b8470add 1296 }
db486de5 1297
f26bf547 1298 if(t_pdl) {
db486de5
PL
1299 if(res == RCode::NoError) {
1300 auto i=ret.cbegin();
1301 for(; i!= ret.cend(); ++i)
1302 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
1303 break;
f26bf547 1304 if(i == ret.cend() && t_pdl->nodata(dq, res))
3ca4e735
PL
1305 shouldNotValidate = true;
1306
db486de5 1307 }
f26bf547 1308 else if(res == RCode::NXDomain && t_pdl->nxdomain(dq, res))
3ca4e735 1309 shouldNotValidate = true;
db486de5 1310
f26bf547 1311 if(t_pdl->postresolve(dq, res))
3ca4e735 1312 shouldNotValidate = true;
db486de5
PL
1313 }
1314
b8470add
PL
1315 if (wantsRPZ) { //XXX This block is repeated, see above
1316 g_stats.policyResults[appliedPolicy.d_kind]++;
1317 switch(appliedPolicy.d_kind) {
1318 case DNSFilterEngine::PolicyKind::NoAction:
1319 break;
1320 case DNSFilterEngine::PolicyKind::Drop:
1321 g_stats.policyDrops++;
b8470add
PL
1322 return;
1323 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1324 ret.clear();
1325 res=RCode::NXDomain;
1326 goto haveAnswer;
1327
1328 case DNSFilterEngine::PolicyKind::NODATA:
1329 ret.clear();
1330 res=RCode::NoError;
1331 goto haveAnswer;
1332
1333 case DNSFilterEngine::PolicyKind::Truncate:
1334 if(!dc->d_tcp) {
1335 ret.clear();
1336 res=RCode::NoError;
1337 pw.getHeader()->tc=1;
1338 goto haveAnswer;
1339 }
1340 break;
1341
1342 case DNSFilterEngine::PolicyKind::Custom:
1343 ret.clear();
1344 res=RCode::NoError;
a9e029ee 1345 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
b8470add 1346 ret.push_back(spoofed);
53508135 1347 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
b8470add
PL
1348 goto haveAnswer;
1349 }
644dd1da 1350 }
4485aa35 1351 }
644dd1da 1352 haveAnswer:;
3e8216c8 1353 if(res == PolicyDecision::DROP) {
e9c2ad3a 1354 g_stats.policyDrops++;
ae7e77ad 1355 return;
3ddb9247 1356 }
9cdfab64 1357 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1dc8f4d0 1358 {
85ffbc53
PD
1359 string trace(sr.getTrace());
1360 if(!trace.empty()) {
1361 vector<string> lines;
1362 boost::split(lines, trace, boost::is_any_of("\n"));
1dc8f4d0 1363 for(const string& line : lines) {
85ffbc53 1364 if(!line.empty())
e6a9dde5 1365 g_log<<Logger::Warning<< line << endl;
85ffbc53
PD
1366 }
1367 }
1368 }
3ddb9247 1369
9cdfab64 1370 if(res == -1) {
0fe1d080
PD
1371 pw.getHeader()->rcode=RCode::ServFail;
1372 // no commit here, because no record
1373 g_stats.servFails++;
1374 }
288f4aa9 1375 else {
ea634573 1376 pw.getHeader()->rcode=res;
92011b8f 1377
f3fe4ae6 1378 // Does the validation mode or query demand validation?
0c43f455 1379 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
b25cae9a 1380 try {
f3fe4ae6 1381 if(sr.doLog()) {
e6a9dde5 1382 g_log<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<endl;
2e921ec6 1383 }
4d2be65d
RG
1384
1385 auto state = sr.getValidationState();
1386
b25cae9a 1387 if(state == Secure) {
2e921ec6 1388 if(sr.doLog()) {
e6a9dde5 1389 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates correctly"<<endl;
2e921ec6 1390 }
b25cae9a 1391
1392 // Is the query source interested in the value of the ad-bit?
885c8881 1393 if (dc->d_mdp.d_header.ad || DNSSECOK)
b25cae9a 1394 pw.getHeader()->ad=1;
1395 }
1396 else if(state == Insecure) {
f3fe4ae6 1397 if(sr.doLog()) {
e6a9dde5 1398 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Insecure"<<endl;
12ce523e 1399 }
b25cae9a 1400
1401 pw.getHeader()->ad=0;
f3fe4ae6 1402 }
b25cae9a 1403 else if(state == Bogus) {
66f2e6ad
KM
1404 if(t_bogusremotes)
1405 t_bogusremotes->push_back(dc->d_source);
1406 if(t_bogusqueryring)
1407 t_bogusqueryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
c87e1876 1408 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
e6a9dde5 1409 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Bogus"<<endl;
b25cae9a 1410 }
1411
1412 // Does the query or validation mode sending out a SERVFAIL on validation errors?
885c8881 1413 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
b25cae9a 1414 if(sr.doLog()) {
e6a9dde5 1415 g_log<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
b25cae9a 1416 }
1417
1418 pw.getHeader()->rcode=RCode::ServFail;
1419 goto sendit;
1420 } else {
1421 if(sr.doLog()) {
e6a9dde5 1422 g_log<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
b25cae9a 1423 }
1424 }
1425 }
1426 }
1427 catch(ImmediateServFailException &e) {
1428 if(g_logCommonErrors)
e6a9dde5 1429 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
b25cae9a 1430 pw.getHeader()->rcode=RCode::ServFail;
1431 goto sendit;
f3fe4ae6 1432 }
b3f0ed10 1433 }
1434
c154c8a4 1435 if(ret.size()) {
92476c8b 1436 orderAndShuffle(ret);
5cc8371b 1437 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_source)) {
20d84f77 1438 stable_sort(ret.begin(), ret.end(), *sl);
3e61e7f7 1439 variableAnswer=true;
1440 }
8e079f3a 1441 }
0afa32d4
RG
1442
1443 bool needCommit = false;
8e079f3a 1444 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
3e80ebce
KM
1445 if( ! DNSSECOK &&
1446 ( i->d_type == QType::NSEC3 ||
1447 (
1448 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1449 (
1450 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1451 i->d_place != DNSResourceRecord::ANSWER
1452 )
1453 )
1454 )
1455 ) {
2e921ec6 1456 continue;
3e80ebce
KM
1457 }
1458
70fb28d9 1459 if (!addRecordToPacket(pw, *i, minTTL, dc->d_ttlCap, maxanswersize)) {
97c6d7e5
RG
1460 needCommit = false;
1461 break;
1462 }
1463 needCommit = true;
1464
41c542ec
NC
1465#ifdef NOD_ENABLED
1466 bool udr = false;
1467 if (g_udrEnabled) {
1468 udr = udrCheckUniqueDNSRecord(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, *i);
ca2526f5
NC
1469 if (!hasUDR && udr)
1470 hasUDR = true;
41c542ec
NC
1471 }
1472#endif /* NOD ENABLED */
1473
aa7929a3 1474#ifdef HAVE_PROTOBUF
b773359c 1475 if (t_protobufServers) {
41c542ec
NC
1476#ifdef NOD_ENABLED
1477 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes, udr);
1478#else
0bd2e252 1479 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes);
41c542ec 1480#endif /* NOD_ENABLED */
aa7929a3
RG
1481 }
1482#endif
ea634573 1483 }
0afa32d4 1484 if(needCommit)
8e079f3a 1485 pw.commit();
288f4aa9 1486 }
10321a98 1487 sendit:;
b3f0ed10 1488
5a7f99b4 1489 if(g_useIncomingECS && haveEDNS && !sr.wasVariable()) {
1490 EDNSSubnetOpts eo;
1491 eo.source = dc->d_ednssubnet.source;
1492 ComboAddress sa;
1493 memset(&sa, 0, sizeof(sa));
1494 sa.sin4.sin_family = eo.source.getNetwork().sin4.sin_family;
1495 eo.scope = Netmask(sa, 0);
1496
1497 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::ECS, makeEDNSSubnetOptsString(eo)));
1498 }
1499
97c6d7e5
RG
1500 if (haveEDNS) {
1501 /* we try to add the EDNS OPT RR even for truncated answers,
1502 as rfc6891 states:
1503 "The minimal response MUST be the DNS header, question section, and an
1504 OPT record. This MUST also occur when a truncated response (using
1505 the DNS header's TC bit) is returned."
1506 */
9b60fb71 1507 pw.addOpt(512, ednsExtRCode, DNSSECOK ? EDNSOpts::DNSSECOK : 0, returnedEdnsOptions);
1f691b94 1508 pw.commit();
97c6d7e5
RG
1509 }
1510
79332bff 1511 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
5cc8371b 1512 updateResponseStats(res, dc->d_source, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
ff4d391d
NC
1513#ifdef NOD_ENABLED
1514 bool nod = false;
1515 if (g_nodEnabled) {
1516 if (nodCheckNewDomain(dc->d_mdp.d_qname))
1517 nod = true;
1518 }
1519#endif /* NOD_ENABLED */
aa7929a3 1520#ifdef HAVE_PROTOBUF
b773359c 1521 if (t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && (!appliedPolicy.d_name || appliedPolicy.d_name->empty()) && dc->d_policyTags.empty())) {
d362f7c1
RG
1522 pbMessage->setBytes(packet.size());
1523 pbMessage->setResponseCode(pw.getHeader()->rcode);
0a273054 1524 if (appliedPolicy.d_name) {
d362f7c1
RG
1525 pbMessage->setAppliedPolicy(*appliedPolicy.d_name);
1526 pbMessage->setAppliedPolicyType(appliedPolicy.d_type);
0a273054 1527 }
d362f7c1
RG
1528 pbMessage->setPolicyTags(dc->d_policyTags);
1529 pbMessage->setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1530 pbMessage->setRequestorId(dq.requestorId);
1531 pbMessage->setDeviceId(dq.deviceId);
41c542ec
NC
1532#ifdef NOD_ENABLED
1533 if (g_nodEnabled) {
ca2526f5 1534 if (nod) {
41c542ec 1535 pbMessage->setNOD(true);
ca2526f5
NC
1536 pbMessage->addPolicyTag(g_nod_pbtag);
1537 }
1538 if (hasUDR) {
1539 pbMessage->addPolicyTag(g_udr_pbtag);
1540 }
41c542ec
NC
1541 }
1542#endif /* NOD_ENABLED */
b773359c 1543 protobufLogResponse(*pbMessage);
ac238ea7 1544#ifdef NOD_ENABLED
ca2526f5
NC
1545 if (g_nodEnabled) {
1546 pbMessage->setNOD(false);
1547 pbMessage->clearUDR();
1548 if (nod)
1549 pbMessage->removePolicyTag(g_nod_pbtag);
1550 if (hasUDR)
1551 pbMessage->removePolicyTag(g_udr_pbtag);
1552 }
ac238ea7 1553#endif /* NOD_ENABLED */
aa7929a3
RG
1554 }
1555#endif
ea634573 1556 if(!dc->d_tcp) {
b71b60ee 1557 struct msghdr msgh;
1558 struct iovec iov;
1559 char cbuf[256];
1560 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
2c0af54f
PD
1561 msgh.msg_control=NULL;
1562
cbc03320 1563 if(g_fromtosockets.count(dc->d_socket)) {
fbe2a2e0 1564 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
2c0af54f 1565 }
cbc03320 1566 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
e6a9dde5 1567 g_log<<Logger::Warning<<"Sending UDP reply to client "<<dc->getRemote()<<" failed with: "<<strerror(errno)<<endl;
70fb28d9 1568
3762e821 1569 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
b5e675a7 1570 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, std::move(dc->d_query), dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
76e2b9e3 1571 string((const char*)&*packet.begin(), packet.size()),
3ddb9247 1572 g_now.tv_sec,
76e2b9e3 1573 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
d9d3f9c1 1574 min(minTTL,SyncRes::s_packetcachettl),
88694a6a 1575 dq.validationState,
08b02366
RG
1576 dc->d_ecsBegin,
1577 dc->d_ecsEnd,
4b0bdd5f 1578 std::move(pbMessage));
1051f8a9 1579 }
3762e821 1580 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
feccc9fc 1581 }
9c495589
BH
1582 else {
1583 char buf[2];
ea634573
BH
1584 buf[0]=packet.size()/256;
1585 buf[1]=packet.size()%256;
feccc9fc 1586
c038218b 1587 Utility::iovec iov[2];
feccc9fc 1588
ea634573
BH
1589 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1590 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
feccc9fc 1591
dd079764 1592 int wret=Utility::writev(dc->d_socket, iov, 2);
0e9d9ce2 1593 bool hadError=true;
feccc9fc 1594
dd079764 1595 if(wret == 0)
e6a9dde5 1596 g_log<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
dd079764 1597 else if(wret < 0 )
e6a9dde5 1598 g_log<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
dd079764 1599 else if((unsigned int)wret != 2 + packet.size())
e6a9dde5 1600 g_log<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
0e9d9ce2 1601 else
18af64a8 1602 hadError=false;
3ddb9247 1603
09e6702a 1604 // update tcp connection status, either by closing or moving to 'BYTE0'
3ddb9247 1605
09e6702a 1606 if(hadError) {
18af64a8 1607 // no need to remove us from FDM, we weren't there
c36bc97a 1608 dc->d_socket = -1;
09e6702a 1609 }
a6ae6414 1610 else {
fde296a3
RG
1611 dc->d_tcpConnection->queriesCount++;
1612 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1613 dc->d_socket = -1;
1614 }
1615 else {
1616 dc->d_tcpConnection->state=TCPConnection::BYTE0;
1617 Utility::gettimeofday(&g_now, 0); // needs to be updated
1618 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection);
1619 t_fdm->setReadTTD(dc->d_socket, g_now, g_tcpTimeout);
1620 }
0e9d9ce2 1621 }
9c495589 1622 }
2c9119cd 1623 float spent=makeFloat(sr.getNow()-dc->d_now);
1d5b3ce6 1624 if(!g_quiet) {
e6a9dde5
PL
1625 g_log<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1626 g_log<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
2c9119cd 1627 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
1628 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
1629
1630 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
e6a9dde5 1631 g_log<< ", dnssec="<<vStates[sr.getValidationState()];
2c9119cd 1632 }
1633
e6a9dde5 1634 g_log<<endl;
2c9119cd 1635
c75a6a9e 1636 }
b23b8614 1637
f7b8cffa
RG
1638 if (sr.d_outqueries || sr.d_authzonequeries) {
1639 t_RC->cacheMisses++;
1640 }
1641 else {
1642 t_RC->cacheHits++;
1643 }
2c9119cd 1644
fe213470
BH
1645 if(spent < 0.001)
1646 g_stats.answers0_1++;
1647 else if(spent < 0.010)
1648 g_stats.answers1_10++;
1649 else if(spent < 0.1)
1650 g_stats.answers10_100++;
1651 else if(spent < 1.0)
1652 g_stats.answers100_1000++;
1653 else
1654 g_stats.answersSlow++;
1655
574af7ea 1656 uint64_t newLat=(uint64_t)(spent*1000000);
b841314c 1657 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
08f3f638 1658 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
0a6b1027 1659 // no worries, we do this for packet cache hits elsewhere
19178da9 1660
1661 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
1662 if(ourtime < 1)
1663 g_stats.ourtime0_1++;
1664 else if(ourtime < 2)
1665 g_stats.ourtime1_2++;
1666 else if(ourtime < 4)
1667 g_stats.ourtime2_4++;
1668 else if(ourtime < 8)
1669 g_stats.ourtime4_8++;
1670 else if(ourtime < 16)
1671 g_stats.ourtime8_16++;
1672 else if(ourtime < 32)
1673 g_stats.ourtime16_32++;
1674 else {
1675 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1676 g_stats.ourtimeSlow++;
1677 }
042da1a1 1678 if(ourtime >= 0.0) {
1679 newLat=ourtime*1000; // usec
1680 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
1681 }
c6d04bdc 1682 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
288f4aa9 1683 }
3f81d239 1684 catch(PDNSException &ae) {
e6a9dde5 1685 g_log<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
288f4aa9 1686 }
16ce7f18
JS
1687 catch(const MOADNSException &mde) {
1688 g_log<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<mde.what()<<endl;
7b1469bb 1689 }
fdbf35ac 1690 catch(std::exception& e) {
e6a9dde5 1691 g_log<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
068c7634
PD
1692
1693 // Luawrapper nests the exception from Lua, so we unnest it here
1694 try {
1695 std::rethrow_if_nested(e);
2010ac95 1696 } catch(const std::exception& ne) {
e6a9dde5 1697 g_log<<". Extra info: "<<ne.what();
068c7634
PD
1698 } catch(...) {}
1699
e6a9dde5 1700 g_log<<endl;
c154c8a4 1701 }
288f4aa9 1702 catch(...) {
e6a9dde5 1703 g_log<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
288f4aa9 1704 }
3ddb9247 1705
ec6eacbc 1706 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
288f4aa9
BH
1707}
1708
d187038c 1709static void makeControlChannelSocket(int processNum=-1)
1d5b3ce6 1710{
2d733c0f 1711 string sockname=::arg()["socket-dir"]+"/"+s_programname;
677e2a46 1712 if(processNum >= 0)
335da0ba 1713 sockname += "."+std::to_string(processNum);
677e2a46 1714 sockname+=".controlsocket";
41f7a068 1715 s_rcc.listen(sockname);
3ddb9247 1716
387de317
BH
1717 int sockowner = -1;
1718 int sockgroup = -1;
1719
1720 if (!::arg().isEmpty("socket-group"))
1721 sockgroup=::arg().asGid("socket-group");
1722 if (!::arg().isEmpty("socket-owner"))
1723 sockowner=::arg().asUid("socket-owner");
3ddb9247 1724
f838ad8d
BH
1725 if (sockgroup > -1 || sockowner > -1) {
1726 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1727 unixDie("Failed to chown control socket");
1728 }
1729 }
387de317
BH
1730
1731 // do mode change if socket-mode is given
1732 if(!::arg().isEmpty("socket-mode")) {
1733 mode_t sockmode=::arg().asMode("socket-mode");
34c513f9
RG
1734 if(chmod(sockname.c_str(), sockmode) < 0) {
1735 unixDie("Failed to chmod control socket");
1736 }
387de317 1737 }
1d5b3ce6
BH
1738}
1739
5cc8371b 1740static void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass,
29e6303a 1741 bool& foundECS, EDNSSubnetOpts* ednssubnet, EDNSOptionViewMap* options,
5cc8371b 1742 bool& foundXPF, ComboAddress* xpfSource, ComboAddress* xpfDest)
02b47f43 1743{
59cb4a79 1744 const bool lookForXPF = xpfSource != nullptr && g_xpfRRCode != 0;
5cc8371b
RG
1745 const bool lookForECS = ednssubnet != nullptr;
1746 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(question.c_str());
02b47f43
RG
1747 size_t questionLen = question.length();
1748 unsigned int consumed=0;
1749 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1750
1751 size_t pos= sizeof(dnsheader)+consumed+4;
5cc8371b
RG
1752 const size_t headerSize = /* root */ 1 + sizeof(dnsrecordheader);
1753 const uint16_t arcount = ntohs(dh->arcount);
1754
1755 for (uint16_t arpos = 0; arpos < arcount && questionLen > (pos + headerSize) && ((lookForECS && !foundECS) || (lookForXPF && !foundXPF)); arpos++) {
1756 if (question.at(pos) != 0) {
1757 /* not an OPT or a XPF, bye. */
1758 return;
1759 }
1760
1761 pos += 1;
1762 const dnsrecordheader* drh = reinterpret_cast<const dnsrecordheader*>(&question.at(pos));
1763 pos += sizeof(dnsrecordheader);
1764
1765 if (pos >= questionLen) {
1766 return;
1767 }
1768
02b47f43 1769 /* OPT root label (1) followed by type (2) */
5cc8371b 1770 if(lookForECS && ntohs(drh->d_type) == QType::OPT) {
00b8cadc
RG
1771 if (!options) {
1772 char* ecsStart = nullptr;
1773 size_t ecsLen = 0;
5cc8371b
RG
1774 /* we need to pass the record len */
1775 int res = getEDNSOption(const_cast<char*>(reinterpret_cast<const char*>(&question.at(pos - sizeof(drh->d_clen)))), questionLen - pos + sizeof(drh->d_clen), EDNSOptionCode::ECS, &ecsStart, &ecsLen);
00b8cadc
RG
1776 if (res == 0 && ecsLen > 4) {
1777 EDNSSubnetOpts eso;
1778 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1779 *ednssubnet=eso;
5cc8371b 1780 foundECS = true;
00b8cadc
RG
1781 }
1782 }
1783 }
1784 else {
5cc8371b
RG
1785 /* we need to pass the record len */
1786 int res = getEDNSOptions(reinterpret_cast<const char*>(&question.at(pos -sizeof(drh->d_clen))), questionLen - pos + (sizeof(drh->d_clen)), *options);
00b8cadc
RG
1787 if (res == 0) {
1788 const auto& it = options->find(EDNSOptionCode::ECS);
29e6303a 1789 if (it != options->end() && !it->second.values.empty() && it->second.values.at(0).content != nullptr && it->second.values.at(0).size > 0) {
00b8cadc 1790 EDNSSubnetOpts eso;
29e6303a 1791 if(getEDNSSubnetOptsFromString(it->second.values.at(0).content, it->second.values.at(0).size, &eso)) {
00b8cadc 1792 *ednssubnet=eso;
5cc8371b 1793 foundECS = true;
00b8cadc
RG
1794 }
1795 }
02b47f43
RG
1796 }
1797 }
1798 }
59cb4a79 1799 else if (lookForXPF && ntohs(drh->d_type) == g_xpfRRCode && ntohs(drh->d_class) == QClass::IN && drh->d_ttl == 0) {
5cc8371b
RG
1800 if ((questionLen - pos) < ntohs(drh->d_clen)) {
1801 return;
1802 }
1803
1804 foundXPF = parseXPFPayload(reinterpret_cast<const char*>(&question.at(pos)), ntohs(drh->d_clen), *xpfSource, xpfDest);
1805 }
1806
1807 pos += ntohs(drh->d_clen);
02b47f43
RG
1808 }
1809}
1810
d187038c 1811static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1812{
cd989c87 1813 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
c038218b 1814
879b3f70 1815 if(conn->state==TCPConnection::BYTE0) {
2749c3fe 1816 ssize_t bytes=recv(conn->getFD(), &conn->data[0], 2, 0);
09e6702a 1817 if(bytes==1)
667f7e60 1818 conn->state=TCPConnection::BYTE1;
3ddb9247 1819 if(bytes==2) {
a0aa4f64 1820 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2749c3fe 1821 conn->data.resize(conn->qlen);
667f7e60
BH
1822 conn->bytesread=0;
1823 conn->state=TCPConnection::GETQUESTION;
09e6702a
BH
1824 }
1825 if(!bytes || bytes < 0) {
bb4bdbaf 1826 t_fdm->removeReadFD(fd);
09e6702a
BH
1827 return;
1828 }
1829 }
667f7e60 1830 else if(conn->state==TCPConnection::BYTE1) {
2749c3fe 1831 ssize_t bytes=recv(conn->getFD(), &conn->data[1], 1, 0);
09e6702a 1832 if(bytes==1) {
667f7e60 1833 conn->state=TCPConnection::GETQUESTION;
a0aa4f64 1834 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2749c3fe 1835 conn->data.resize(conn->qlen);
667f7e60 1836 conn->bytesread=0;
09e6702a
BH
1837 }
1838 if(!bytes || bytes < 0) {
1839 if(g_logCommonErrors)
e6a9dde5 1840 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected after first byte"<<endl;
bb4bdbaf 1841 t_fdm->removeReadFD(fd);
09e6702a
BH
1842 return;
1843 }
1844 }
667f7e60 1845 else if(conn->state==TCPConnection::GETQUESTION) {
2749c3fe 1846 ssize_t bytes=recv(conn->getFD(), &conn->data[conn->bytesread], conn->qlen - conn->bytesread, 0);
f9d67b41 1847 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
c0f9be19
RG
1848 if(g_logCommonErrors) {
1849 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected while reading question body"<<endl;
1850 }
bb4bdbaf 1851 t_fdm->removeReadFD(fd);
09e6702a
BH
1852 return;
1853 }
b841314c 1854 conn->bytesread+=(uint16_t)bytes;
667f7e60 1855 if(conn->bytesread==conn->qlen) {
bb4bdbaf 1856 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
879b3f70 1857
9a864da4 1858 std::unique_ptr<DNSComboWriter> dc;
09e6702a 1859 try {
9a864da4 1860 dc=std::unique_ptr<DNSComboWriter>(new DNSComboWriter(conn->data, g_now));
09e6702a 1861 }
16ce7f18 1862 catch(const MOADNSException &mde) {
3ddb9247 1863 g_stats.clientParseError++;
4957a608 1864 if(g_logCommonErrors)
e6a9dde5 1865 g_log<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4957a608 1866 return;
09e6702a 1867 }
cd989c87
BH
1868 dc->d_tcpConnection = conn; // carry the torch
1869 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
09e6702a 1870 dc->d_tcp=true;
5cc8371b
RG
1871 dc->setRemote(conn->d_remote);
1872 dc->setSource(conn->d_remote);
a6147cd2 1873 ComboAddress dest;
d38e2ba9 1874 dest.reset();
a6147cd2 1875 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1876 socklen_t len = dest.getSocklen();
1877 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1878 dc->setLocal(dest);
5cc8371b 1879 dc->setDestination(dest);
33dcceba
RG
1880 DNSName qname;
1881 uint16_t qtype=0;
1882 uint16_t qclass=0;
1883 bool needECS = false;
5cc8371b 1884 bool needXPF = g_XPFAcl.match(conn->d_remote);
67e31ebe 1885 string requestorId;
590388d2 1886 string deviceId;
16bbc6e3 1887 bool logQuery = false;
aa7929a3 1888#ifdef HAVE_PROTOBUF
02b47f43 1889 auto luaconfsLocal = g_luaconfs.getLocal();
63341e8d 1890 if (checkProtobufExport(luaconfsLocal)) {
33dcceba
RG
1891 needECS = true;
1892 }
b773359c 1893 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
33dcceba
RG
1894#endif
1895
70fb28d9 1896 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag_ffi || t_pdl->d_gettag))) {
33dcceba
RG
1897
1898 try {
29e6303a 1899 EDNSOptionViewMap ednsOptions;
5cc8371b 1900 bool xpfFound = false;
b40562da 1901 dc->d_ecsParsed = true;
5cc8371b 1902 dc->d_ecsFound = false;
2749c3fe 1903 getQNameAndSubnet(conn->data, &qname, &qtype, &qclass,
5cc8371b
RG
1904 dc->d_ecsFound, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
1905 xpfFound, needXPF ? &dc->d_source : nullptr, needXPF ? &dc->d_destination : nullptr);
02b47f43 1906
70fb28d9 1907 if(t_pdl) {
33dcceba 1908 try {
70fb28d9 1909 if (t_pdl->d_gettag_ffi) {
f1c7929a 1910 dc->d_tag = t_pdl->gettag_ffi(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, dc->d_ttlCap, dc->d_variable, logQuery);
70fb28d9
RG
1911 }
1912 else if (t_pdl->d_gettag) {
1913 dc->d_tag = t_pdl->gettag(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId);
1914 }
33dcceba 1915 }
70fb28d9 1916 catch(const std::exception& e) {
33dcceba 1917 if(g_logCommonErrors)
e6a9dde5 1918 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
33dcceba
RG
1919 }
1920 }
1921 }
70fb28d9 1922 catch(const std::exception& e)
33dcceba
RG
1923 {
1924 if(g_logCommonErrors)
e6a9dde5 1925 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
33dcceba
RG
1926 }
1927 }
f52177c3
RG
1928
1929 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(&conn->data[0]);
1930
33dcceba 1931#ifdef HAVE_PROTOBUF
b773359c 1932 if(t_protobufServers || t_outgoingProtobufServers) {
67e31ebe 1933 dc->d_requestorId = requestorId;
590388d2 1934 dc->d_deviceId = deviceId;
02b47f43 1935 dc->d_uuid = (*t_uuidGenerator)();
4898a348 1936 }
02b47f43 1937
b773359c 1938 if(t_protobufServers) {
02b47f43 1939 try {
02b47f43 1940
845cbf4c 1941 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && dc->d_policyTags.empty())) {
b773359c 1942 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, dc->d_source, dc->d_destination, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId);
b790ef3d 1943 }
02b47f43
RG
1944 }
1945 catch(std::exception& e) {
1946 if(g_logCommonErrors)
e6a9dde5 1947 g_log<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
02b47f43
RG
1948 }
1949 }
aa7929a3 1950#endif
879b3f70 1951 if(dc->d_mdp.d_header.qr) {
048f5db6 1952 g_stats.ignoredCount++;
c0f9be19
RG
1953 if(g_logCommonErrors) {
1954 g_log<<Logger::Error<<"Ignoring answer from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
1955 }
4957a608 1956 return;
879b3f70 1957 }
3abcdab2 1958 if(dc->d_mdp.d_header.opcode) {
048f5db6 1959 g_stats.ignoredCount++;
c0f9be19
RG
1960 if(g_logCommonErrors) {
1961 g_log<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
1962 }
c0f9be19
RG
1963 return;
1964 }
1965 else if (dh->qdcount == 0) {
1966 g_stats.emptyQueriesCount++;
1967 if(g_logCommonErrors) {
1968 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<< dc->getRemote() <<" on server socket!"<<endl;
1969 }
3abcdab2
PD
1970 return;
1971 }
09e6702a 1972 else {
4957a608
BH
1973 ++g_stats.qcounter;
1974 ++g_stats.tcpqcounter;
9a864da4 1975 MT->makeThread(startDoResolve, dc.release()); // deletes dc, will set state to BYTE0 again
4957a608 1976 return;
09e6702a
BH
1977 }
1978 }
1979 }
1980}
1981
6dcd28c3 1982//! Handle new incoming TCP connection
d187038c 1983static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
09e6702a 1984{
37d3f960 1985 ComboAddress addr;
09e6702a 1986 socklen_t addrlen=sizeof(addr);
a683e8bd 1987 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
b841314c 1988 if(newsock>=0) {
85c32340
BH
1989 if(MT->numProcesses() > g_maxMThreads) {
1990 g_stats.overCapacityDrops++;
a7b68ae7
RG
1991 try {
1992 closesocket(newsock);
1993 }
1994 catch(const PDNSException& e) {
e6a9dde5 1995 g_log<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
a7b68ae7 1996 }
85c32340
BH
1997 return;
1998 }
1999
92011b8f 2000 if(t_remotes)
2001 t_remotes->push_back(addr);
49a699c4 2002 if(t_allowFrom && !t_allowFrom->match(&addr)) {
3ddb9247 2003 if(!g_quiet)
e6a9dde5 2004 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2914b022 2005
09e6702a 2006 g_stats.unauthorizedTCP++;
a7b68ae7
RG
2007 try {
2008 closesocket(newsock);
2009 }
2010 catch(const PDNSException& e) {
e6a9dde5 2011 g_log<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
a7b68ae7 2012 }
09e6702a
BH
2013 return;
2014 }
bd0289fc 2015 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
09e6702a 2016 g_stats.tcpClientOverflow++;
a7b68ae7
RG
2017 try {
2018 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
2019 }
2020 catch(const PDNSException& e) {
e6a9dde5 2021 g_log<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
a7b68ae7 2022 }
09e6702a
BH
2023 return;
2024 }
3ddb9247 2025
3897b9e1 2026 setNonBlocking(newsock);
f26bf547 2027 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
cd989c87 2028 tc->state=TCPConnection::BYTE0;
3ddb9247 2029
cd989c87 2030 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc);
c038218b 2031
0bff046b 2032 struct timeval now;
c038218b 2033 Utility::gettimeofday(&now, 0);
cd989c87 2034 t_fdm->setReadTTD(tc->getFD(), now, g_tcpTimeout);
09e6702a
BH
2035 }
2036}
3ddb9247 2037
d187038c 2038static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
1bc3c142 2039{
183eb877 2040 gettimeofday(&g_now, 0);
b71b60ee 2041 struct timeval diff = g_now - tv;
2042 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
183eb877 2043
22cf1fda 2044 if(tv.tv_sec && delta > 1000.0) {
b71b60ee 2045 g_stats.tooOldDrops++;
2046 return 0;
2047 }
2048
1bc3c142 2049 ++g_stats.qcounter;
d7f10541
BH
2050 if(fromaddr.sin4.sin_family==AF_INET6)
2051 g_stats.ipv6qcounter++;
1bc3c142
BH
2052
2053 string response;
93f0da94 2054 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
49a3500d 2055 unsigned int ctag=0;
f57486f1 2056 uint32_t qhash = 0;
12aff2e5 2057 bool needECS = false;
5cc8371b 2058 bool needXPF = g_XPFAcl.match(fromaddr);
02b47f43 2059 std::vector<std::string> policyTags;
5fd2577f 2060 LuaContext::LuaObject data;
5cc8371b
RG
2061 ComboAddress source = fromaddr;
2062 ComboAddress destination = destaddr;
67e31ebe 2063 string requestorId;
590388d2 2064 string deviceId;
16bbc6e3 2065 bool logQuery = false;
12aff2e5 2066#ifdef HAVE_PROTOBUF
02b47f43 2067 boost::uuids::uuid uniqueId;
02b47f43 2068 auto luaconfsLocal = g_luaconfs.getLocal();
63341e8d 2069 if (checkProtobufExport(luaconfsLocal)) {
4898a348 2070 uniqueId = (*t_uuidGenerator)();
02b47f43 2071 needECS = true;
63341e8d 2072 } else if (checkOutgoingProtobufExport(luaconfsLocal)) {
02b47f43
RG
2073 uniqueId = (*t_uuidGenerator)();
2074 }
b773359c
RG
2075 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
2076 bool logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
12aff2e5 2077#endif
b40562da
RG
2078 EDNSSubnetOpts ednssubnet;
2079 bool ecsFound = false;
2080 bool ecsParsed = false;
08b02366
RG
2081 uint16_t ecsBegin = 0;
2082 uint16_t ecsEnd = 0;
70fb28d9
RG
2083 uint32_t ttlCap = std::numeric_limits<uint32_t>::max();
2084 bool variable = false;
1bc3c142 2085 try {
02b47f43
RG
2086 DNSName qname;
2087 uint16_t qtype=0;
2088 uint16_t qclass=0;
1bc3c142 2089 uint32_t age;
c15ff3df 2090 bool qnameParsed=false;
8f7473d7 2091#ifdef MALLOC_TRACE
2092 /*
2093 static uint64_t last=0;
2094 if(!last)
2095 g_mtracer->clearAllocators();
2096 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
2097 last=g_mtracer->getAllocs();
2098 cout<<g_mtracer->topAllocatorsString()<<endl;
2099 g_mtracer->clearAllocators();
2100 */
2101#endif
55a1378f 2102
70fb28d9 2103 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag || t_pdl->d_gettag_ffi))) {
b2eacd67 2104 try {
29e6303a 2105 EDNSOptionViewMap ednsOptions;
5cc8371b
RG
2106 bool xpfFound = false;
2107
2108 ecsFound = false;
2109
2110 getQNameAndSubnet(question, &qname, &qtype, &qclass,
2111 ecsFound, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2112 xpfFound, needXPF ? &source : nullptr, needXPF ? &destination : nullptr);
2113
c15ff3df
RG
2114 qnameParsed = true;
2115 ecsParsed = true;
12aff2e5 2116
70fb28d9 2117 if(t_pdl) {
12aff2e5 2118 try {
70fb28d9 2119 if (t_pdl->d_gettag_ffi) {
f1c7929a 2120 ctag = t_pdl->gettag_ffi(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, ttlCap, variable, logQuery);
70fb28d9
RG
2121 }
2122 else if (t_pdl->d_gettag) {
2123 ctag = t_pdl->gettag(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId);
2124 }
12aff2e5 2125 }
70fb28d9 2126 catch(const std::exception& e) {
12aff2e5 2127 if(g_logCommonErrors)
e6a9dde5 2128 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 2129 }
8ea8c302 2130 }
b2eacd67 2131 }
70fb28d9 2132 catch(const std::exception& e)
b2eacd67 2133 {
2134 if(g_logCommonErrors)
e6a9dde5 2135 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 2136 }
12ce523e 2137 }
3ddb9247 2138
02b47f43 2139 bool cacheHit = false;
1fbc6dc5 2140 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
02b47f43 2141#ifdef HAVE_PROTOBUF
b773359c 2142 if (t_protobufServers) {
d362f7c1 2143 pbMessage = RecProtoBufMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
c165308b 2144 pbMessage->setServerIdentity(SyncRes::s_serverID);
845cbf4c 2145 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && policyTags.empty())) {
b773359c 2146 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, source, destination, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId);
b790ef3d 2147 }
d9d3f9c1
RG
2148 }
2149#endif /* HAVE_PROTOBUF */
02b47f43 2150
70fb28d9
RG
2151 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
2152 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
2153 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
8467ec26 2154 vState valState;
c15ff3df 2155 if (qnameParsed) {
08b02366 2156 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
c15ff3df
RG
2157 }
2158 else {
08b02366 2159 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, &qtype, &qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
c15ff3df
RG
2160 }
2161
d9d3f9c1 2162 if (cacheHit) {
8467ec26
KM
2163 if(valState == Bogus) {
2164 if(t_bogusremotes)
2165 t_bogusremotes->push_back(source);
2166 if(t_bogusqueryring)
2167 t_bogusqueryring->push_back(make_pair(qname, qtype));
2168 }
2169
d9d3f9c1 2170#ifdef HAVE_PROTOBUF
b773359c 2171 if(t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && pbMessage->getAppliedPolicy().empty() && pbMessage->getPolicyTags().empty())) {
5cc8371b 2172 Netmask requestorNM(source, source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
e1c8a4bb 2173 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
d362f7c1
RG
2174 pbMessage->update(uniqueId, &requestor, &destination, false, dh->id);
2175 pbMessage->setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
2176 pbMessage->setQueryTime(g_now.tv_sec, g_now.tv_usec);
2177 pbMessage->setRequestorId(requestorId);
2178 pbMessage->setDeviceId(deviceId);
b773359c 2179 protobufLogResponse(*pbMessage);
02b47f43 2180 }
d9d3f9c1 2181#endif /* HAVE_PROTOBUF */
49a3500d 2182 if(!g_quiet)
e6a9dde5 2183 g_log<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<endl;
8f7473d7 2184
1bc3c142
BH
2185 g_stats.packetCacheHits++;
2186 SyncRes::s_queries++;
2187 ageDNSPacket(response, age);
b71b60ee 2188 struct msghdr msgh;
2189 struct iovec iov;
2190 char cbuf[256];
2191 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
2c0af54f
PD
2192 msgh.msg_control=NULL;
2193
cbc03320 2194 if(g_fromtosockets.count(fd)) {
fbe2a2e0 2195 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
b71b60ee 2196 }
cbc03320 2197 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
e6a9dde5 2198 g_log<<Logger::Warning<<"Sending UDP reply to client "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" failed with: "<<strerror(errno)<<endl;
b71b60ee 2199
97bee66d 2200 if(response.length() >= sizeof(struct dnsheader)) {
dd079764
RG
2201 struct dnsheader tmpdh;
2202 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
5cc8371b 2203 updateResponseStats(tmpdh.rcode, source, response.length(), 0, 0);
97bee66d 2204 }
08f3f638 2205 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
19178da9 2206 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
1bc3c142
BH
2207 return 0;
2208 }
3ddb9247 2209 }
1bc3c142 2210 catch(std::exception& e) {
e6a9dde5 2211 g_log<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1bc3c142
BH
2212 return 0;
2213 }
3ddb9247 2214
f26bf547 2215 if(t_pdl) {
5cc8371b 2216 if(t_pdl->ipfilter(source, destination, *dh)) {
4ea94941 2217 if(!g_quiet)
e6a9dde5 2218 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" based on policy"<<endl;
4ea94941 2219 g_stats.policyDrops++;
2220 return 0;
2221 }
2222 }
2223
1bc3c142 2224 if(MT->numProcesses() > g_maxMThreads) {
461df9d2 2225 if(!g_quiet)
e6a9dde5 2226 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<", over capacity"<<endl;
461df9d2 2227
1bc3c142
BH
2228 g_stats.overCapacityDrops++;
2229 return 0;
2230 }
3ddb9247 2231
9a864da4 2232 auto dc = std::unique_ptr<DNSComboWriter>(new DNSComboWriter(question, g_now, std::move(policyTags), std::move(data)));
1bc3c142 2233 dc->setSocket(fd);
49a3500d 2234 dc->d_tag=ctag;
e9f63d47 2235 dc->d_qhash=qhash;
5cc8371b
RG
2236 dc->setRemote(fromaddr);
2237 dc->setSource(source);
b71b60ee 2238 dc->setLocal(destaddr);
5cc8371b 2239 dc->setDestination(destination);
1bc3c142 2240 dc->d_tcp=false;
b40562da
RG
2241 dc->d_ecsFound = ecsFound;
2242 dc->d_ecsParsed = ecsParsed;
08b02366
RG
2243 dc->d_ecsBegin = ecsBegin;
2244 dc->d_ecsEnd = ecsEnd;
b40562da 2245 dc->d_ednssubnet = ednssubnet;
70fb28d9
RG
2246 dc->d_ttlCap = ttlCap;
2247 dc->d_variable = variable;
aa7929a3 2248#ifdef HAVE_PROTOBUF
b773359c 2249 if (t_protobufServers || t_outgoingProtobufServers) {
5164bac3 2250 dc->d_uuid = std::move(uniqueId);
d9d3f9c1 2251 }
67e31ebe 2252 dc->d_requestorId = requestorId;
590388d2 2253 dc->d_deviceId = deviceId;
aa7929a3
RG
2254#endif
2255
9a864da4 2256 MT->makeThread(startDoResolve, (void*) dc.release()); // deletes dc
1bc3c142 2257 return 0;
3ddb9247
PD
2258}
2259
b71b60ee 2260
d187038c 2261static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
5db529f8 2262{
a683e8bd 2263 ssize_t len;
12c2f2b9 2264 static const size_t maxIncomingQuerySize = 512;
04896b99 2265 static thread_local std::string data;
5db529f8 2266 ComboAddress fromaddr;
b71b60ee 2267 struct msghdr msgh;
2268 struct iovec iov;
2269 char cbuf[256];
390f1dab 2270 bool firstQuery = true;
b71b60ee 2271
c0a00acd
RG
2272 for(size_t queriesCounter = 0; queriesCounter < s_maxUDPQueriesPerRound; queriesCounter++) {
2273 data.resize(maxIncomingQuerySize);
2274 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
2275 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), &data[0], data.size(), &fromaddr);
b71b60ee 2276
c0a00acd 2277 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
390f1dab 2278
c0a00acd 2279 firstQuery = false;
390f1dab 2280
c0a00acd
RG
2281 if (static_cast<size_t>(len) < sizeof(dnsheader)) {
2282 g_stats.ignoredCount++;
2283 if (!g_quiet) {
2284 g_log<<Logger::Error<<"Ignoring too-short ("<<std::to_string(len)<<") query from "<<fromaddr.toString()<<endl;
2285 }
2286 return;
04896b99 2287 }
04896b99 2288
c0a00acd
RG
2289 if (msgh.msg_flags & MSG_TRUNC) {
2290 g_stats.truncatedDrops++;
2291 if (!g_quiet) {
2292 g_log<<Logger::Error<<"Ignoring truncated query from "<<fromaddr.toString()<<endl;
2293 }
2294 return;
ba892c7f 2295 }
b23b8614 2296
c0a00acd
RG
2297 if(t_remotes) {
2298 t_remotes->push_back(fromaddr);
2299 }
81859ba5 2300
c0a00acd
RG
2301 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
2302 if(!g_quiet) {
2303 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2304 }
3ddb9247 2305
c0a00acd
RG
2306 g_stats.unauthorizedUDP++;
2307 return;
5db529f8 2308 }
c0a00acd
RG
2309 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
2310 if(!fromaddr.sin4.sin_port) { // also works for IPv6
2311 if(!g_quiet) {
2312 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
2313 }
2314
2315 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
2316 return;
3abcdab2 2317 }
c0a00acd
RG
2318
2319 try {
2320 data.resize(static_cast<size_t>(len));
2321 dnsheader* dh=(dnsheader*)&data[0];
2322
2323 if(dh->qr) {
2324 g_stats.ignoredCount++;
2325 if(g_logCommonErrors) {
2326 g_log<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
2327 }
2328 }
2329 else if(dh->opcode) {
2330 g_stats.ignoredCount++;
2331 if(g_logCommonErrors) {
2332 g_log<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
2333 }
a6147cd2 2334 }
c0f9be19
RG
2335 else if (dh->qdcount == 0) {
2336 g_stats.emptyQueriesCount++;
2337 if(g_logCommonErrors) {
2338 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<<fromaddr.toString()<<" on server socket!"<<endl;
2339 }
2340 }
a6147cd2 2341 else {
c0a00acd
RG
2342 struct timeval tv={0,0};
2343 HarvestTimestamp(&msgh, &tv);
2344 ComboAddress dest;
2345 dest.reset(); // this makes sure we ignore this address if not returned by recvmsg above
2346 auto loc = rplookup(g_listenSocketsAddresses, fd);
2347 if(HarvestDestinationAddress(&msgh, &dest)) {
2348 // but.. need to get port too
2349 if(loc) {
2350 dest.sin4.sin_port = loc->sin4.sin_port;
2351 }
a6147cd2 2352 }
2353 else {
c0a00acd
RG
2354 if(loc) {
2355 dest = *loc;
2356 }
2357 else {
2358 dest.sin4.sin_family = fromaddr.sin4.sin_family;
2359 socklen_t slen = dest.getSocklen();
2360 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
2361 }
2362 }
2363
2364 if(g_weDistributeQueries) {
2365 distributeAsyncFunction(data, boost::bind(doProcessUDPQuestion, data, fromaddr, dest, tv, fd));
2366 }
2367 else {
2368 doProcessUDPQuestion(data, fromaddr, dest, tv, fd);
a6147cd2 2369 }
2370 }
c0a00acd 2371 }
16ce7f18 2372 catch(const MOADNSException &mde) {
c0a00acd
RG
2373 g_stats.clientParseError++;
2374 if(g_logCommonErrors) {
2375 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
2376 }
2377 }
2378 catch(const std::runtime_error& e) {
2379 g_stats.clientParseError++;
2380 if(g_logCommonErrors) {
2381 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
2382 }
5db529f8
BH
2383 }
2384 }
c0a00acd
RG
2385 else {
2386 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
2387 if(firstQuery && errno == EAGAIN) {
2388 g_stats.noPacketError++;
2389 }
390f1dab 2390
c0a00acd
RG
2391 break;
2392 }
ac0e821b 2393 }
5db529f8
BH
2394}
2395
adb6cd72 2396static void makeTCPServerSockets(deferredAdd_t& deferredAdds, std::set<int>& tcpSockets)
9c495589 2397{
37d3f960 2398 int fd;
f28307ad 2399 vector<string>locals;
2e3d8a19 2400 stringtok(locals,::arg()["local-address"]," ,");
9c495589 2401
f28307ad 2402 if(locals.empty())
3f81d239 2403 throw PDNSException("No local address specified");
3ddb9247 2404
f28307ad 2405 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
2406 ServiceTuple st;
2407 st.port=::arg().asNum("local-port");
2408 parseService(*i, st);
3ddb9247 2409
32252594
BH
2410 ComboAddress sin;
2411
d38e2ba9 2412 sin.reset();
37d3f960 2413 sin.sin4.sin_family = AF_INET;
32252594 2414 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 2415 sin.sin6.sin6_family = AF_INET6;
f71bc087 2416 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 2417 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
37d3f960
BH
2418 }
2419
2420 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
3ddb9247 2421 if(fd<0)
3f81d239 2422 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
f28307ad 2423
3897b9e1 2424 setCloseOnExec(fd);
a903b39c 2425
f28307ad 2426 int tmp=1;
810ff705 2427 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
e6a9dde5 2428 g_log<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
c8ddb7c2 2429 exit(1);
f28307ad 2430 }
0dfa94ab 2431 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
e6a9dde5 2432 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
0dfa94ab 2433 }
2434
c8ddb7c2 2435#ifdef TCP_DEFER_ACCEPT
38ac0821 2436 if(setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
37d3f960 2437 if(i==locals.begin())
377602e3 2438 g_log<<Logger::Info<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
c8ddb7c2
BH
2439 }
2440#endif
2441
fec7dd5a
SS
2442 if( ::arg().mustDo("non-local-bind") )
2443 Utility::setBindAny(AF_INET, fd);
2444
2332f42d 2445#ifdef SO_REUSEPORT
810ff705
RG
2446 if(g_reusePort) {
2447 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
2332f42d 2448 throw PDNSException("SO_REUSEPORT: "+stringerror());
2449 }
2450#endif
2451
0735b17e
RG
2452 if (::arg().asNum("tcp-fast-open") > 0) {
2453#ifdef TCP_FASTOPEN
2454 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
2455 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
e6a9dde5 2456 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno)<<endl;
0735b17e
RG
2457 }
2458#else
e6a9dde5 2459 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
0735b17e
RG
2460#endif
2461 }
2462
32252594 2463 sin.sin4.sin_port = htons(st.port);
a683e8bd 2464 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
3ddb9247 2465 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
3f81d239 2466 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
3ddb9247 2467
3897b9e1 2468 setNonBlocking(fd);
49a699c4 2469 setSocketSendBuffer(fd, 65000);
37d3f960 2470 listen(fd, 128);
b243ca3b 2471 deferredAdds.push_back(make_pair(fd, handleNewTCPQuestion));
adb6cd72
RG
2472 tcpSockets.insert(fd);
2473
84433b79 2474 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
2475 // - fd is not that which we know here, but returned from accept()
3ddb9247 2476 if(sin.sin4.sin_family == AF_INET)
377602e3 2477 g_log<<Logger::Info<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 2478 else
377602e3 2479 g_log<<Logger::Info<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 2480 }
9c495589
BH
2481}
2482
b243ca3b 2483static void makeUDPServerSockets(deferredAdd_t& deferredAdds)
288f4aa9 2484{
fec7dd5a 2485 int one=1;
f28307ad 2486 vector<string>locals;
2e3d8a19 2487 stringtok(locals,::arg()["local-address"]," ,");
288f4aa9 2488
f28307ad 2489 if(locals.empty())
3f81d239 2490 throw PDNSException("No local address specified");
3ddb9247 2491
f28307ad 2492 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
2493 ServiceTuple st;
2494 st.port=::arg().asNum("local-port");
2495 parseService(*i, st);
2496
37d3f960 2497 ComboAddress sin;
996c89cc 2498
d38e2ba9 2499 sin.reset();
37d3f960 2500 sin.sin4.sin_family = AF_INET;
32252594 2501 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 2502 sin.sin6.sin6_family = AF_INET6;
f71bc087 2503 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 2504 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
37d3f960 2505 }
3ddb9247 2506
bb4bdbaf 2507 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
d3b4137e 2508 if(fd < 0) {
3f81d239 2509 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
d3b4137e 2510 }
915b0c39 2511 if (!setSocketTimestamps(fd))
e6a9dde5 2512 g_log<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
0dfa94ab 2513
b71b60ee 2514 if(IsAnyAddress(sin)) {
cbc03320 2515 if(sin.sin4.sin_family == AF_INET)
2516 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
2517 g_fromtosockets.insert(fd);
757d3179 2518#ifdef IPV6_RECVPKTINFO
cbc03320 2519 if(sin.sin4.sin_family == AF_INET6)
2520 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
2521 g_fromtosockets.insert(fd);
757d3179 2522#endif
0dfa94ab 2523 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
e6a9dde5 2524 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
0dfa94ab 2525 }
b71b60ee 2526 }
fec7dd5a
SS
2527 if( ::arg().mustDo("non-local-bind") )
2528 Utility::setBindAny(AF_INET6, fd);
2529
3897b9e1 2530 setCloseOnExec(fd);
a903b39c 2531
4e9a20e6 2532 setSocketReceiveBuffer(fd, 250000);
32252594 2533 sin.sin4.sin_port = htons(st.port);
37d3f960 2534
2332f42d 2535
2573d4a6 2536#ifdef SO_REUSEPORT
810ff705 2537 if(g_reusePort) {
2332f42d 2538 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
2539 throw PDNSException("SO_REUSEPORT: "+stringerror());
2540 }
2541#endif
a683e8bd 2542 socklen_t socklen=sin.getSocklen();
3ddb9247 2543 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
335da0ba 2544 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
3ddb9247 2545
3897b9e1 2546 setNonBlocking(fd);
c2136bf0 2547
b243ca3b 2548 deferredAdds.push_back(make_pair(fd, handleNewUDPQuestion));
40a3dd64 2549 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
3ddb9247 2550 if(sin.sin4.sin_family == AF_INET)
377602e3 2551 g_log<<Logger::Info<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 2552 else
377602e3 2553 g_log<<Logger::Info<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 2554 }
c836dc19 2555}
caa6eefa 2556
d187038c 2557static void daemonize(void)
c836dc19
BH
2558{
2559 if(fork())
2560 exit(0); // bye bye
3ddb9247
PD
2561
2562 setsid();
c836dc19 2563
27a5ead5 2564 int i=open("/dev/null",O_RDWR); /* open stdin */
3ddb9247 2565 if(i < 0)
e6a9dde5 2566 g_log<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
27a5ead5
BH
2567 else {
2568 dup2(i,0); /* stdin */
2569 dup2(i,1); /* stderr */
2570 dup2(i,2); /* stderr */
2571 close(i);
2572 }
288f4aa9 2573}
caa6eefa 2574
d187038c 2575static void usr1Handler(int)
c75a6a9e
BH
2576{
2577 statsWanted=true;
2578}
ae1b2e98 2579
d187038c 2580static void usr2Handler(int)
9170fbaf 2581{
f1f34cc2 2582 g_quiet= !g_quiet;
2583 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
2584 ::arg().set("quiet")=g_quiet ? "" : "no";
9170fbaf
BH
2585}
2586
d187038c 2587static void doStats(void)
c75a6a9e 2588{
16beeaa4
BH
2589 static time_t lastOutputTime;
2590 static uint64_t lastQueryCount;
d299d4f5 2591
2592 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
2593 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
3ddb9247 2594
d299d4f5 2595 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
e6a9dde5 2596 g_log<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
3427fa8a
BH
2597 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
2598 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
3ddb9247
PD
2599 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
2600
e6a9dde5 2601 g_log<<Logger::Notice<<"stats: throttle map: "
3427fa8a 2602 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
3ddb9247 2603 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
e6a9dde5
PL
2604 g_log<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2605 g_log<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
525b8a7c 2606 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
e6a9dde5 2607 g_log<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
3427fa8a 2608 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
81883dcc 2609
e6a9dde5 2610 //g_log<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
16beeaa4 2611 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
3ddb9247 2612
e6a9dde5 2613 g_log<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
16beeaa4 2614 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
3ddb9247 2615
16beeaa4
BH
2616 time_t now = time(0);
2617 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
e6a9dde5 2618 g_log<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
16beeaa4
BH
2619 }
2620 lastOutputTime = now;
2621 lastQueryCount = SyncRes::s_queries;
c75a6a9e 2622 }
3ddb9247 2623 else if(statsWanted)
e6a9dde5 2624 g_log<<Logger::Notice<<"stats: no stats yet!"<<endl;
7becf07f 2625
c75a6a9e
BH
2626 statsWanted=false;
2627}
c836dc19 2628
29f0b1ce 2629static void houseKeeping(void *)
c836dc19 2630{
e4ae55e5 2631 static thread_local time_t last_rootupdate, last_prune, last_secpoll, last_trustAnchorUpdate{0};
3337c2f7
RG
2632 static thread_local int cleanCounter=0;
2633 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
e4ae55e5
PL
2634 auto luaconfsLocal = g_luaconfs.getLocal();
2635
2636 if (last_trustAnchorUpdate == 0 && !luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0) {
2637 // Loading the Lua config file already "refreshed" the TAs
2638 last_trustAnchorUpdate = g_now.tv_sec + luaconfsLocal->trustAnchorFileInfo.interval * 3600;
2639 }
2640
cc59bce6 2641 try {
2642 if(s_running)
2643 return;
2644 s_running=true;
3ddb9247 2645
cc59bce6 2646 struct timeval now;
2647 Utility::gettimeofday(&now, 0);
3ddb9247
PD
2648
2649 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
cc59bce6 2650 DTime dt;
2651 dt.setTimeval(now);
a6f7f5fe 2652 t_RC->doPrune(g_maxCacheEntries / g_numThreads); // this function is local to a thread, so fine anyhow
2653 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
3ddb9247 2654
a6f7f5fe 2655 SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
3ddb9247 2656
cc59bce6 2657 if(!((cleanCounter++)%40)) { // this is a full scan!
2658 time_t limit=now.tv_sec-300;
a712cb56 2659 SyncRes::pruneNSSpeeds(limit);
cc59bce6 2660 }
2661 last_prune=time(0);
d67620e4 2662 }
3ddb9247 2663
cc59bce6 2664 if(now.tv_sec - last_rootupdate > 7200) {
30ee601a 2665 int res = SyncRes::getRootNS(g_now, nullptr);
7836f7b4
PL
2666 if (!res)
2667 last_rootupdate=now.tv_sec;
cc59bce6 2668 }
3ddb9247 2669
b243ca3b 2670 if(isHandlerThread()) {
3ddb9247 2671
cc59bce6 2672 if(now.tv_sec - last_secpoll >= 3600) {
2673 try {
2674 doSecPoll(&last_secpoll);
2675 }
581d4ea3 2676 catch(std::exception& e)
2677 {
e6a9dde5 2678 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
581d4ea3 2679 }
47e9b74f 2680 catch(PDNSException& e)
2681 {
e6a9dde5 2682 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
47e9b74f 2683 }
d0992a65
CH
2684 catch(ImmediateServFailException &e)
2685 {
e6a9dde5 2686 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
d0992a65 2687 }
47e9b74f 2688 catch(...)
2689 {
e6a9dde5 2690 g_log<<Logger::Error<<"Exception while performing security poll"<<endl;
47e9b74f 2691 }
18b73338 2692 }
e4ae55e5
PL
2693
2694 if (!luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0 &&
2695 g_now.tv_sec - last_trustAnchorUpdate >= (luaconfsLocal->trustAnchorFileInfo.interval * 3600)) {
2696 g_log<<Logger::Debug<<"Refreshing Trust Anchors from file"<<endl;
2697 try {
2698 map<DNSName, dsmap_t> dsAnchors;
2699 if (updateTrustAnchorsFromFile(luaconfsLocal->trustAnchorFileInfo.fname, dsAnchors)) {
2700 g_luaconfs.modify([&dsAnchors](LuaConfigItems& lci) {
2701 lci.dsAnchors = dsAnchors;
2702 });
2703 }
2704 last_trustAnchorUpdate = now.tv_sec;
2705 } catch (const PDNSException &pe) {
2706 g_log<<Logger::Error<<"Unable to update Trust Anchors: "<<pe.reason<<endl;
2707 }
2708 }
2709 s_running=false;
d67620e4 2710 }
2711 }
cc59bce6 2712 catch(PDNSException& ae)
2713 {
2714 s_running=false;
e6a9dde5 2715 g_log<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
cc59bce6 2716 throw;
2717 }
779828c4 2718}
d6d5dea7 2719
d187038c 2720static void makeThreadPipes()
49a699c4 2721{
b243ca3b
RG
2722 /* thread 0 is the handler / SNMP, we start at 1 */
2723 for(unsigned int n = 1; n <= (g_numWorkerThreads + g_numDistributorThreads); ++n) {
2724 auto& threadInfos = s_threadInfos.at(n);
2725
49a699c4
BH
2726 int fd[2];
2727 if(pipe(fd) < 0)
2728 unixDie("Creating pipe for inter-thread communications");
3ddb9247 2729
b243ca3b
RG
2730 threadInfos.pipes.readToThread = fd[0];
2731 threadInfos.pipes.writeToThread = fd[1];
3ddb9247 2732
49a699c4
BH
2733 if(pipe(fd) < 0)
2734 unixDie("Creating pipe for inter-thread communications");
b243ca3b
RG
2735
2736 threadInfos.pipes.readFromThread = fd[0];
2737 threadInfos.pipes.writeFromThread = fd[1];
3ddb9247 2738
cf8cda18
RG
2739 if(pipe(fd) < 0)
2740 unixDie("Creating pipe for inter-thread communications");
d10307c5 2741
b243ca3b
RG
2742 threadInfos.pipes.readQueriesToThread = fd[0];
2743 threadInfos.pipes.writeQueriesToThread = fd[1];
2744
2745 if (!setNonBlocking(threadInfos.pipes.writeQueriesToThread)) {
d10307c5
RG
2746 unixDie("Making pipe for inter-thread communications non-blocking");
2747 }
49a699c4
BH
2748 }
2749}
2750
00c9b8c1
BH
2751struct ThreadMSG
2752{
2753 pipefunc_t func;
2754 bool wantAnswer;
2755};
2756
b4e76a18 2757void broadcastFunction(const pipefunc_t& func)
49a699c4 2758{
b243ca3b
RG
2759 /* This function might be called by the worker with t_id 0 during startup
2760 for the initialization of ACLs and domain maps. After that it should only
2761 be called by the handler. */
d77abca1 2762
b243ca3b
RG
2763 if (s_threadInfos.empty() && isHandlerThread()) {
2764 /* the handler and distributors will call themselves below, but
2765 during startup we get called while s_threadInfos has not been
2766 populated yet to update the ACL or domain maps, so we need to
2767 handle that case.
2768 */
2769 func();
2770 }
b4e76a18 2771
b243ca3b
RG
2772 unsigned int n = 0;
2773 for (const auto& threadInfo : s_threadInfos) {
49a699c4 2774 if(n++ == t_id) {
b4e76a18 2775 func(); // don't write to ourselves!
49a699c4
BH
2776 continue;
2777 }
3ddb9247 2778
00c9b8c1
BH
2779 ThreadMSG* tmsg = new ThreadMSG();
2780 tmsg->func = func;
2781 tmsg->wantAnswer = true;
b243ca3b 2782 if(write(threadInfo.pipes.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
b841314c 2783 delete tmsg;
b243ca3b 2784
49a699c4 2785 unixDie("write to thread pipe returned wrong size or error");
b841314c 2786 }
3ddb9247 2787
49467864 2788 string* resp = nullptr;
b243ca3b 2789 if(read(threadInfo.pipes.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
49a699c4 2790 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 2791
49a699c4 2792 if(resp) {
49a699c4 2793 delete resp;
49467864 2794 resp = nullptr;
49a699c4
BH
2795 }
2796 }
2797}
06ea9015 2798
b243ca3b 2799// This function is only called by the distributor threads, when pdns-distributes-queries is set
8171ab83 2800void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
00c9b8c1 2801{
b243ca3b 2802 if (!isDistributorThread()) {
d77abca1
RG
2803 g_log<<Logger::Error<<"distributeAsyncFunction() has been called by a worker ("<<t_id<<")"<<endl;
2804 exit(1);
2805 }
2806
8171ab83 2807 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
b243ca3b 2808 unsigned int target = /* skip handler */ 1 + g_numDistributorThreads + (hash % g_numWorkerThreads);
06ea9015 2809
b243ca3b
RG
2810 const auto& targetInfo = s_threadInfos[target];
2811 if(!targetInfo.isWorker) {
2812 g_log<<Logger::Error<<"distributeAsyncFunction() tried to assign a query to a non-worker thread"<<endl;
d77abca1 2813 exit(1);
00c9b8c1 2814 }
d77abca1 2815
b243ca3b 2816 const auto& tps = targetInfo.pipes;
00c9b8c1
BH
2817 ThreadMSG* tmsg = new ThreadMSG();
2818 tmsg->func = func;
2819 tmsg->wantAnswer = false;
3ddb9247 2820
cf8cda18
RG
2821 ssize_t written = write(tps.writeQueriesToThread, &tmsg, sizeof(tmsg));
2822 if (written > 0) {
2823 if (static_cast<size_t>(written) != sizeof(tmsg)) {
2824 delete tmsg;
2825 unixDie("write to thread pipe returned wrong size or error");
2826 }
2827 }
2828 else {
2829 int error = errno;
b841314c 2830 delete tmsg;
cf8cda18
RG
2831 if (error == EAGAIN || error == EWOULDBLOCK) {
2832 g_stats.queryPipeFullDrops++;
2833 } else {
17634427 2834 unixDie("write to thread pipe returned wrong size or error:" + std::to_string(error));
cf8cda18 2835 }
b841314c 2836 }
00c9b8c1 2837}
3427fa8a 2838
d187038c 2839static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
49a699c4 2840{
f26bf547 2841 ThreadMSG* tmsg = nullptr;
3ddb9247 2842
cf8cda18 2843 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread || fd == readQueriesToThread
49a699c4
BH
2844 unixDie("read from thread pipe returned wrong size or error");
2845 }
3ddb9247 2846
2f22827a 2847 void *resp=0;
2848 try {
2849 resp = tmsg->func();
2850 }
2851 catch(std::exception& e) {
6d2010a8 2852 if(g_logCommonErrors)
e6a9dde5 2853 g_log<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2f22827a 2854 }
2855 catch(PDNSException& e) {
6d2010a8 2856 if(g_logCommonErrors)
e6a9dde5 2857 g_log<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2f22827a 2858 }
d7c676a5 2859 if(tmsg->wantAnswer) {
b243ca3b
RG
2860 const auto& threadInfo = s_threadInfos.at(t_id);
2861 if(write(threadInfo.pipes.writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
d7c676a5 2862 delete tmsg;
00c9b8c1 2863 unixDie("write to thread pipe returned wrong size or error");
d7c676a5
RG
2864 }
2865 }
3ddb9247 2866
00c9b8c1 2867 delete tmsg;
49a699c4 2868}
09e6702a 2869
13034931
BH
2870template<class T> void *voider(const boost::function<T*()>& func)
2871{
2872 return func();
2873}
2874
b3b5459d
BH
2875vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
2876{
2877 a.insert(a.end(), b.begin(), b.end());
2878 return a;
2879}
2880
92011b8f 2881vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
2882{
2883 a.insert(a.end(), b.begin(), b.end());
2884 return a;
2885}
2886
3ddb9247
PD
2887vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
2888{
2889 a.insert(a.end(), b.begin(), b.end());
2890 return a;
2891}
2892
92011b8f 2893
387b9ca6
RG
2894/*
2895 This function should only be called by the handler to gather metrics, wipe the cache,
788eeb4c
RG
2896 reload the Lua script (not the Lua config) or change the current trace regex,
2897 and by the SNMP thread to gather metrics. */
b4e76a18 2898template<class T> T broadcastAccFunction(const boost::function<T*()>& func)
3427fa8a 2899{
b243ca3b 2900 if (!isHandlerThread()) {
788eeb4c 2901 g_log<<Logger::Error<<"broadcastAccFunction has been called by a worker ("<<t_id<<")"<<endl;
d77abca1 2902 exit(1);
d77abca1
RG
2903 }
2904
b243ca3b 2905 unsigned int n = 0;
3427fa8a 2906 T ret=T();
b243ca3b
RG
2907 for (const auto& threadInfo : s_threadInfos) {
2908 if (n++ == t_id) {
2909 continue;
2910 }
2911
2912 const auto& tps = threadInfo.pipes;
00c9b8c1
BH
2913 ThreadMSG* tmsg = new ThreadMSG();
2914 tmsg->func = boost::bind(voider<T>, func);
2915 tmsg->wantAnswer = true;
3ddb9247 2916
b841314c
RG
2917 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2918 delete tmsg;
3427fa8a 2919 unixDie("write to thread pipe returned wrong size or error");
b841314c 2920 }
3ddb9247 2921
49467864 2922 T* resp = nullptr;
3427fa8a
BH
2923 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2924 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 2925
3427fa8a 2926 if(resp) {
3427fa8a
BH
2927 ret += *resp;
2928 delete resp;
49467864 2929 resp = nullptr;
3427fa8a
BH
2930 }
2931 }
2932 return ret;
2933}
2934
b4e76a18
RG
2935template string broadcastAccFunction(const boost::function<string*()>& fun); // explicit instantiation
2936template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun); // explicit instantiation
2937template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun); // explicit instantiation
2938template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun); // explicit instantiation
3427fa8a 2939
d187038c 2940static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
09e6702a
BH
2941{
2942 string remote;
2943 string msg=s_rcc.recv(&remote);
2944 RecursorControlParser rcp;
2945 RecursorControlParser::func_t* command;
3ddb9247 2946
09e6702a 2947 string answer=rcp.getAnswer(msg, &command);
f0f3f0b0
PL
2948
2949 // If we are inside a chroot, we need to strip
2950 if (!arg()["chroot"].empty()) {
a683e8bd 2951 size_t len = arg()["chroot"].length();
f0f3f0b0
PL
2952 remote = remote.substr(len);
2953 }
2954
ab5c053d
BH
2955 try {
2956 s_rcc.send(answer, &remote);
2957 command();
2958 }
fdbf35ac 2959 catch(std::exception& e) {
e6a9dde5 2960 g_log<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
ab5c053d 2961 }
3f81d239 2962 catch(PDNSException& ae) {
e6a9dde5 2963 g_log<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
ab5c053d 2964 }
09e6702a
BH
2965}
2966
d187038c 2967static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2968{
0b18b22e 2969 PacketID* pident=any_cast<PacketID>(&var);
667f7e60 2970 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
09e6702a 2971
667f7e60 2972 shared_array<char> buffer(new char[pident->inNeeded]);
09e6702a 2973
a683e8bd 2974 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
09e6702a 2975 if(ret > 0) {
667f7e60 2976 pident->inMSG.append(&buffer[0], &buffer[ret]);
a683e8bd 2977 pident->inNeeded-=(size_t)ret;
825fa717 2978 if(!pident->inNeeded || pident->inIncompleteOkay) {
667f7e60
BH
2979 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
2980 PacketID pid=*pident;
2981 string msg=pident->inMSG;
3ddb9247 2982
bb4bdbaf 2983 t_fdm->removeReadFD(fd);
3ddb9247 2984 MT->sendEvent(pid, &msg);
09e6702a
BH
2985 }
2986 else {
667f7e60 2987 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
09e6702a
BH
2988 }
2989 }
2990 else {
667f7e60 2991 PacketID tmp=*pident;
bb4bdbaf 2992 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
09e6702a
BH
2993 string empty;
2994 MT->sendEvent(tmp, &empty); // this conveys error status
2995 }
2996}
2997
d187038c 2998static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2999{
0b18b22e 3000 PacketID* pid=any_cast<PacketID>(&var);
a683e8bd 3001 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
09e6702a 3002 if(ret > 0) {
a683e8bd 3003 pid->outPos+=(ssize_t)ret;
667f7e60
BH
3004 if(pid->outPos==pid->outMSG.size()) {
3005 PacketID tmp=*pid;
bb4bdbaf 3006 t_fdm->removeWriteFD(fd);
09e6702a
BH
3007 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
3008 }
3009 }
3010 else { // error or EOF
667f7e60 3011 PacketID tmp(*pid);
bb4bdbaf 3012 t_fdm->removeWriteFD(fd);
09e6702a 3013 string sent;
998a4334 3014 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
09e6702a
BH
3015 }
3016}
3017
34801ab1 3018// resend event to everybody chained onto it
d187038c 3019static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
34801ab1
BH
3020{
3021 if(iter->key.chain.empty())
3022 return;
e27e91a8 3023 // cerr<<"doResends called!\n";
34801ab1
BH
3024 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
3025 resend.fd=-1;
3026 resend.id=*i;
e27e91a8 3027 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
4665c31e 3028
34801ab1
BH
3029 MT->sendEvent(resend, &content);
3030 g_stats.chainResends++;
34801ab1
BH
3031 }
3032}
3033
d187038c 3034static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3035{
600fc20b 3036 PacketID pid=any_cast<PacketID>(var);
a683e8bd 3037 ssize_t len;
fae8fe07
RG
3038 std::string packet;
3039 packet.resize(g_outgoingEDNSBufsize);
996c89cc 3040 ComboAddress fromaddr;
09e6702a
BH
3041 socklen_t addrlen=sizeof(fromaddr);
3042
fae8fe07 3043 len=recvfrom(fd, &packet.at(0), packet.size(), 0, (sockaddr *)&fromaddr, &addrlen);
c1da7976 3044
a683e8bd 3045 if(len < (ssize_t) sizeof(dnsheader)) {
998a4334 3046 if(len < 0)
996c89cc 3047 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
09e6702a 3048 else {
3ddb9247 3049 g_stats.serverParseError++;
09e6702a 3050 if(g_logCommonErrors)
e6a9dde5 3051 g_log<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
e44d9fa7 3052 ": packet smaller than DNS header"<<endl;
998a4334 3053 }
34801ab1 3054
49a699c4 3055 t_udpclientsocks->returnSocket(fd);
34801ab1
BH
3056 string empty;
3057
3058 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3ddb9247 3059 if(iter != MT->d_waiters.end())
34801ab1 3060 doResends(iter, pid, empty);
3ddb9247 3061
34801ab1 3062 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
998a4334 3063 return;
3ddb9247 3064 }
998a4334 3065
fae8fe07 3066 packet.resize(len);
998a4334 3067 dnsheader dh;
fae8fe07 3068 memcpy(&dh, &packet.at(0), sizeof(dh));
3ddb9247 3069
6da3b3ad
PD
3070 PacketID pident;
3071 pident.remote=fromaddr;
3072 pident.id=dh.id;
3073 pident.fd=fd;
34801ab1 3074
33a928af 3075 if(!dh.qr && g_logCommonErrors) {
e6a9dde5 3076 g_log<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
6da3b3ad
PD
3077 }
3078
3079 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
3080 !dh.qr) { // one weird server
3081 pident.domain.clear();
3082 pident.type = 0;
3083 }
3084 else {
3085 try {
0b31e67e 3086 if(len > 12)
fae8fe07 3087 pident.domain=DNSName(&packet.at(0), len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
6da3b3ad
PD
3088 }
3089 catch(std::exception& e) {
3090 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
e6a9dde5 3091 g_log<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
6da3b3ad 3092 return;
34801ab1 3093 }
6da3b3ad 3094 }
34801ab1 3095
6da3b3ad
PD
3096 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
3097 if(iter != MT->d_waiters.end()) {
3098 doResends(iter, pident, packet);
3099 }
c1da7976 3100
6da3b3ad 3101retryWithName:
4957a608 3102
6da3b3ad
PD
3103 if(!MT->sendEvent(pident, &packet)) {
3104 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
3105 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
3106 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
e325f20c 3107 pident.domain == mthread->key.domain) {
6da3b3ad 3108 mthread->key.nearMisses++;
998a4334 3109 }
6da3b3ad
PD
3110
3111 // be a bit paranoid here since we're weakening our matching
3ddb9247 3112 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
6da3b3ad
PD
3113 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
3114 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
3115 pident.domain = mthread->key.domain;
3116 pident.type = mthread->key.type;
3117 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
d4fb76e9 3118 }
09e6702a 3119 }
6da3b3ad
PD
3120 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
3121 if(g_logCommonErrors) {
e6a9dde5 3122 g_log<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
d8f6d49f 3123 }
09e6702a 3124 }
6da3b3ad
PD
3125 else if(fd >= 0) {
3126 t_udpclientsocks->returnSocket(fd);
3127 }
09e6702a
BH
3128}
3129
1f4abb20
BH
3130FDMultiplexer* getMultiplexer()
3131{
3132 FDMultiplexer* ret;
f26bf547 3133 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
1f4abb20 3134 try {
f26bf547 3135 ret=i.second();
1f4abb20
BH
3136 return ret;
3137 }
98d0ee4a 3138 catch(FDMultiplexerException &fe) {
e6a9dde5 3139 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
98d0ee4a
BH
3140 }
3141 catch(...) {
e6a9dde5 3142 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
98d0ee4a 3143 }
1f4abb20 3144 }
e6a9dde5 3145 g_log<<Logger::Error<<"No working multiplexer found!"<<endl;
1f4abb20
BH
3146 exit(1);
3147}
3148
3ddb9247 3149
d187038c 3150static string* doReloadLuaScript()
4485aa35 3151{
674cf0f6 3152 string fname= ::arg()["lua-dns-script"];
4485aa35 3153 try {
674cf0f6 3154 if(fname.empty()) {
f26bf547 3155 t_pdl.reset();
377602e3 3156 g_log<<Logger::Info<<t_id<<" Unloaded current lua script"<<endl;
0f39c1a3 3157 return new string("unloaded\n");
4485aa35
BH
3158 }
3159 else {
9694e14f
AT
3160 t_pdl = std::make_shared<RecursorLua4>();
3161 t_pdl->loadFile(fname);
4485aa35
BH
3162 }
3163 }
fdbf35ac 3164 catch(std::exception& e) {
e6a9dde5 3165 g_log<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
0f39c1a3 3166 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
4485aa35 3167 }
3ddb9247 3168
e6a9dde5 3169 g_log<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
0f39c1a3 3170 return new string("(re)loaded '"+fname+"'\n");
4485aa35
BH
3171}
3172
49a699c4
BH
3173string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3174{
3ddb9247 3175 if(begin != end)
49a699c4 3176 ::arg().set("lua-dns-script") = *begin;
3ddb9247 3177
0f39c1a3 3178 return broadcastAccFunction<string>(doReloadLuaScript);
3ddb9247 3179}
49a699c4 3180
d187038c 3181static string* pleaseUseNewTraceRegex(const std::string& newRegex)
77499b05
BH
3182try
3183{
3184 if(newRegex.empty()) {
f26bf547 3185 t_traceRegex.reset();
77499b05
BH
3186 return new string("unset\n");
3187 }
3188 else {
f26bf547 3189 t_traceRegex = std::make_shared<Regex>(newRegex);
77499b05
BH
3190 return new string("ok\n");
3191 }
3192}
3f81d239 3193catch(PDNSException& ae)
77499b05
BH
3194{
3195 return new string(ae.reason+"\n");
3196}
3197
3198string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3199{
3200 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
3201}
3202
4e9a20e6 3203static void checkLinuxIPv6Limits()
3204{
3205#ifdef __linux__
3206 string line;
3207 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
335da0ba 3208 int lim=std::stoi(line);
4e9a20e6 3209 if(lim < 16384) {
e6a9dde5 3210 g_log<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
4e9a20e6 3211 }
3212 }
3213#endif
3214}
36849ff2 3215static void checkOrFixFDS()
4e9a20e6 3216{
c0063e60 3217 unsigned int availFDs=getFilenumLimit();
3218 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
3219
3220 if(wantFDs > availFDs) {
067ad20e 3221 unsigned int hardlimit= getFilenumLimit(true);
3222 if(hardlimit >= wantFDs) {
c0063e60 3223 setFilenumLimit(wantFDs);
e6a9dde5 3224 g_log<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
36849ff2 3225 }
3226 else {
067ad20e 3227 int newval = (hardlimit - 25) / g_numWorkerThreads;
e6a9dde5 3228 g_log<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
36849ff2 3229 g_maxMThreads = newval;
067ad20e 3230 setFilenumLimit(hardlimit);
36849ff2 3231 }
3232 }
4e9a20e6 3233}
77499b05 3234
c390b2da 3235static void* recursorThread(unsigned int tid, const string& threadName);
51e2144e 3236
f26bf547 3237static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
49a699c4
BH
3238{
3239 t_allowFrom = ng;
f26bf547 3240 return nullptr;
49a699c4
BH
3241}
3242
dbd23fc2
BH
3243int g_argc;
3244char** g_argv;
3245
18af64a8 3246void parseACLs()
f7c1d4e3 3247{
18af64a8 3248 static bool l_initialized;
3ddb9247 3249
49a699c4 3250 if(l_initialized) { // only reload configuration file on second call
18af64a8 3251 string configname=::arg()["config-dir"]+"/recursor.conf";
3e63da83
JR
3252 if(::arg()["config-name"]!="") {
3253 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3254 }
18af64a8 3255 cleanSlashes(configname);
3ddb9247
PD
3256
3257 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
7e818521 3258 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
49a699c4 3259 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
242b90e1 3260 ::arg().preParseFile(configname.c_str(), "include-dir");
829849d6
AT
3261 ::arg().preParse(g_argc, g_argv, "include-dir");
3262
3263 // then process includes
3264 std::vector<std::string> extraConfigs;
242b90e1
AT
3265 ::arg().gatherIncludes(extraConfigs);
3266
1dc8f4d0 3267 for(const std::string& fn : extraConfigs) {
7e818521 3268 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
3269 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
3270 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
3271 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
829849d6 3272 }
ca2c884c
AT
3273
3274 ::arg().preParse(g_argc, g_argv, "allow-from-file");
3275 ::arg().preParse(g_argc, g_argv, "allow-from");
f27e6356 3276 }
49a699c4 3277
f26bf547
RG
3278 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
3279 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
3ddb9247 3280
2c95fc65
BH
3281 if(!::arg()["allow-from-file"].empty()) {
3282 string line;
2c95fc65
BH
3283 ifstream ifs(::arg()["allow-from-file"].c_str());
3284 if(!ifs) {
9c61b9d0 3285 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2c95fc65
BH
3286 }
3287
3288 string::size_type pos;
3289 while(getline(ifs,line)) {
3290 pos=line.find('#');
3291 if(pos!=string::npos)
3292 line.resize(pos);
3293 trim(line);
3294 if(line.empty())
3295 continue;
3296
18af64a8 3297 allowFrom->addMask(line);
2c95fc65 3298 }
e6a9dde5 3299 g_log<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2c95fc65
BH
3300 }
3301 else if(!::arg()["allow-from"].empty()) {
f7c1d4e3
BH
3302 vector<string> ips;
3303 stringtok(ips, ::arg()["allow-from"], ", ");
3ddb9247 3304
e6a9dde5 3305 g_log<<Logger::Warning<<"Only allowing queries from: ";
f7c1d4e3 3306 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
18af64a8 3307 allowFrom->addMask(*i);
f7c1d4e3 3308 if(i!=ips.begin())
e6a9dde5
PL
3309 g_log<<Logger::Warning<<", ";
3310 g_log<<Logger::Warning<<*i;
f7c1d4e3 3311 }
e6a9dde5 3312 g_log<<Logger::Warning<<endl;
f7c1d4e3 3313 }
49a699c4 3314 else {
3ddb9247 3315 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
377602e3 3316 g_log<<Logger::Warning<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
f26bf547 3317 allowFrom = nullptr;
49a699c4 3318 }
3ddb9247 3319
49a699c4 3320 g_initialAllowFrom = allowFrom;
d7dae798 3321 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
f26bf547 3322 oldAllowFrom = nullptr;
3ddb9247 3323
49a699c4 3324 l_initialized = true;
18af64a8
BH
3325}
3326
795215f2 3327
756e82cf 3328static void setupDelegationOnly()
3329{
3330 vector<string> parts;
3331 stringtok(parts, ::arg()["delegation-only"], ", \t");
3332 for(const auto& p : parts) {
9065eb05 3333 SyncRes::addDelegationOnly(DNSName(p));
756e82cf 3334 }
3335}
795215f2 3336
8fd25133
RG
3337static std::map<unsigned int, std::set<int> > parseCPUMap()
3338{
3339 std::map<unsigned int, std::set<int> > result;
3340
3341 const std::string value = ::arg()["cpu-map"];
3342
3343 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
e6a9dde5 3344 g_log<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
8fd25133
RG
3345 return result;
3346 }
3347
3348 std::vector<std::string> parts;
3349
3350 stringtok(parts, value, " \t");
3351
3352 for(const auto& part : parts) {
3353 if (part.find('=') == string::npos)
3354 continue;
3355
3356 try {
3357 auto headers = splitField(part, '=');
3358 trim(headers.first);
3359 trim(headers.second);
3360
3361 unsigned int threadId = pdns_stou(headers.first);
3362 std::vector<std::string> cpus;
3363
3364 stringtok(cpus, headers.second, ",");
3365
3366 for(const auto& cpu : cpus) {
3367 int cpuId = std::stoi(cpu);
3368
3369 result[threadId].insert(cpuId);
3370 }
3371 }
3372 catch(const std::exception& e) {
e6a9dde5 3373 g_log<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
8fd25133
RG
3374 }
3375 }
3376
3377 return result;
3378}
3379
3380static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
3381{
3382 const auto& cpuMapping = cpusMap.find(n);
3383 if (cpuMapping != cpusMap.cend()) {
3384 int rc = mapThreadToCPUList(tid, cpuMapping->second);
3385 if (rc == 0) {
e6a9dde5 3386 g_log<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
8fd25133 3387 for (const auto cpu : cpuMapping->second) {
e6a9dde5 3388 g_log<<Logger::Info<<" "<<cpu;
8fd25133 3389 }
e6a9dde5 3390 g_log<<Logger::Info<<endl;
8fd25133
RG
3391 }
3392 else {
e6a9dde5 3393 g_log<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
8fd25133 3394 for (const auto cpu : cpuMapping->second) {
e6a9dde5 3395 g_log<<Logger::Info<<" "<<cpu;
8fd25133 3396 }
e6a9dde5 3397 g_log<<Logger::Info<<strerror(rc)<<endl;
8fd25133
RG
3398 }
3399 }
3400}
3401
af1377b7
NC
3402#ifdef NOD_ENABLED
3403static void setupNODThread()
3404{
3405 if (g_nodEnabled) {
b78727c6
NC
3406 uint32_t num_cells = ::arg().asNum("new-domain-db-size");
3407 t_nodDBp = std::make_shared<nod::NODDB>(num_cells);
af1377b7
NC
3408 try {
3409 t_nodDBp->setCacheDir(::arg()["new-domain-history-dir"]);
3410 }
3411 catch (const PDNSException& e) {
3412 g_log<<Logger::Error<<"new-domain-history-dir (" << ::arg()["new-domain-history-dir"] << ") is not readable or does not exist"<<endl;
3413 _exit(1);
3414 }
3415 if (!t_nodDBp->init()) {
3416 g_log<<Logger::Error<<"Could not initialize domain tracking"<<endl;
3417 _exit(1);
3418 }
41c542ec 3419 std::thread t(nod::NODDB::startHousekeepingThread, t_nodDBp, std::this_thread::get_id());
af1377b7 3420 t.detach();
ca2526f5 3421 g_nod_pbtag = ::arg()["new-domain-pb-tag"];
41c542ec
NC
3422 }
3423 if (g_udrEnabled) {
b78727c6
NC
3424 uint32_t num_cells = ::arg().asNum("unique-response-db-size");
3425 t_udrDBp = std::make_shared<nod::UniqueResponseDB>(num_cells);
41c542ec
NC
3426 try {
3427 t_udrDBp->setCacheDir(::arg()["unique-response-history-dir"]);
3428 }
3429 catch (const PDNSException& e) {
3430 g_log<<Logger::Error<<"unique-response-history-dir (" << ::arg()["unique-response-history-dir"] << ") is not readable or does not exist"<<endl;
3431 _exit(1);
3432 }
3433 if (!t_udrDBp->init()) {
3434 g_log<<Logger::Error<<"Could not initialize unique response tracking"<<endl;
3435 _exit(1);
3436 }
3437 std::thread t(nod::UniqueResponseDB::startHousekeepingThread, t_udrDBp, std::this_thread::get_id());
af1377b7 3438 t.detach();
ca2526f5 3439 g_udr_pbtag = ::arg()["unique-response-pb-tag"];
af1377b7
NC
3440 }
3441}
3442
3443void parseNODWhitelist(const std::string& wlist)
3444{
3445 vector<string> parts;
3446 stringtok(parts, wlist, ",; ");
3447 for(const auto& a : parts) {
3448 g_nodDomainWL.add(DNSName(a));
3449 }
3450}
3451
3452static void setupNODGlobal()
3453{
3454 // Setup NOD subsystem
3455 g_nodEnabled = ::arg().mustDo("new-domain-tracking");
3456 g_nodLookupDomain = DNSName(::arg()["new-domain-lookup"]);
3457 g_nodLog = ::arg().mustDo("new-domain-log");
3458 parseNODWhitelist(::arg()["new-domain-whitelist"]);
41c542ec
NC
3459
3460 // Setup Unique DNS Response subsystem
3461 g_udrEnabled = ::arg().mustDo("unique-response-tracking");
3462 g_udrLog = ::arg().mustDo("unique-response-log");
af1377b7
NC
3463}
3464#endif /* NOD_ENABLED */
3465
d187038c 3466static int serviceMain(int argc, char*argv[])
18af64a8 3467{
e6a9dde5
PL
3468 g_log.setName(s_programname);
3469 g_log.disableSyslog(::arg().mustDo("disable-syslog"));
3470 g_log.setTimestamps(::arg().mustDo("log-timestamp"));
18af64a8
BH
3471
3472 if(!::arg()["logging-facility"].empty()) {
f8499e52
BH
3473 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
3474 if(val >= 0)
e6a9dde5 3475 g_log.setFacility(val);
18af64a8 3476 else
e6a9dde5 3477 g_log<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
18af64a8
BH
3478 }
3479
ba1a571d 3480 showProductVersion();
3afde9b2 3481
06ea9015 3482 g_disthashseed=dns_random(0xffffffff);
3483
b7ef5828
PL
3484 checkLinuxIPv6Limits();
3485 try {
3486 vector<string> addrs;
3487 if(!::arg()["query-local-address6"].empty()) {
3488 SyncRes::s_doIPv6=true;
e6a9dde5 3489 g_log<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
b7ef5828
PL
3490
3491 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
3492 for(const string& addr : addrs) {
3493 g_localQueryAddresses6.push_back(ComboAddress(addr));
3494 }
3495 }
3496 else {
e6a9dde5 3497 g_log<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
b7ef5828
PL
3498 }
3499 addrs.clear();
3500 stringtok(addrs, ::arg()["query-local-address"], ", ;");
3501 for(const string& addr : addrs) {
3502 g_localQueryAddresses4.push_back(ComboAddress(addr));
3503 }
3504 }
3505 catch(std::exception& e) {
e6a9dde5 3506 g_log<<Logger::Error<<"Assigning local query addresses: "<<e.what();
b7ef5828
PL
3507 exit(99);
3508 }
3509
e48c6b8a
PL
3510 // keep this ABOVE loadRecursorLuaConfig!
3511 if(::arg()["dnssec"]=="off")
3512 g_dnssecmode=DNSSECMode::Off;
3513 else if(::arg()["dnssec"]=="process-no-validate")
3514 g_dnssecmode=DNSSECMode::ProcessNoValidate;
3515 else if(::arg()["dnssec"]=="process")
3516 g_dnssecmode=DNSSECMode::Process;
3517 else if(::arg()["dnssec"]=="validate")
3518 g_dnssecmode=DNSSECMode::ValidateAll;
3519 else if(::arg()["dnssec"]=="log-fail")
3520 g_dnssecmode=DNSSECMode::ValidateForLog;
3521 else {
e6a9dde5 3522 g_log<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
e48c6b8a
PL
3523 exit(1);
3524 }
3525
9a3ab3e4
KM
3526 g_signatureInceptionSkew = ::arg().asNum("signature-inception-skew");
3527 if (g_signatureInceptionSkew < 0) {
3528 g_log<<Logger::Error<<"A negative value for 'signature-inception-skew' is not allowed"<<endl;
3529 exit(1);
3530 }
3531
e48c6b8a 3532 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
d377bb54 3533 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
e48c6b8a 3534
a6f7f5fe 3535 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
3536 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
e6ec15bf
RG
3537
3538 luaConfigDelayedThreads delayedLuaThreads;
0f5785a6 3539 try {
e6ec15bf 3540 loadRecursorLuaConfig(::arg()["lua-config-file"], delayedLuaThreads);
0f5785a6
PL
3541 }
3542 catch (PDNSException &e) {
e6a9dde5 3543 g_log<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
0f5785a6
PL
3544 exit(1);
3545 }
ad42489c 3546
18af64a8 3547 parseACLs();
d6f3fcfa 3548 initPublicSuffixList(::arg()["public-suffix-list-file"]);
92011b8f 3549
eb5bae86 3550 if(!::arg()["dont-query"].empty()) {
eb5bae86
BH
3551 vector<string> ips;
3552 stringtok(ips, ::arg()["dont-query"], ", ");
66e0b6ea
BH
3553 ips.push_back("0.0.0.0");
3554 ips.push_back("::");
c36bc97a 3555
e6a9dde5 3556 g_log<<Logger::Warning<<"Will not send queries to: ";
eb5bae86 3557 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
9065eb05 3558 SyncRes::addDontQuery(*i);
eb5bae86 3559 if(i!=ips.begin())
e6a9dde5
PL
3560 g_log<<Logger::Warning<<", ";
3561 g_log<<Logger::Warning<<*i;
eb5bae86 3562 }
e6a9dde5 3563 g_log<<Logger::Warning<<endl;
eb5bae86
BH
3564 }
3565
f7c1d4e3 3566 g_quiet=::arg().mustDo("quiet");
3ddb9247 3567
b243ca3b 3568 /* this needs to be done before parseACLs(), which call broadcastFunction() */
1bc3c142
BH
3569 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
3570 if(g_weDistributeQueries) {
b243ca3b 3571 g_log<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
1bc3c142 3572 }
3ddb9247 3573
756e82cf 3574 setupDelegationOnly();
b33c2462 3575 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
756e82cf 3576
77499b05
BH
3577 if(::arg()["trace"]=="fail") {
3578 SyncRes::setDefaultLogMode(SyncRes::Store);
3579 }
3580 else if(::arg().mustDo("trace")) {
3581 SyncRes::setDefaultLogMode(SyncRes::Log);
f7c1d4e3
BH
3582 ::arg().set("quiet")="no";
3583 g_quiet=false;
3e9c6c0a 3584 g_dnssecLOG=true;
f7c1d4e3 3585 }
43a9b290
PL
3586 string myHostname = getHostname();
3587 if (myHostname == "UNKNOWN"){
3588 g_log<<Logger::Warning<<"Unable to get the hostname, NSID and id.server values will be empty"<<endl;
3589 myHostname = "";
d0983bff 3590 }
3ddb9247 3591
aadceba8 3592 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
3593
1051f8a9
BH
3594 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
3595
f7c1d4e3 3596 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
63637fd8 3597 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
1051f8a9 3598 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
79ec0627
PL
3599 // Cap the packetcache-servfail-ttl to the packetcache-ttl
3600 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
3601 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
628e2c7b
PA
3602 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
3603 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
f7c1d4e3 3604 SyncRes::s_serverID=::arg()["server-id"];
173d790e 3605 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
9de3e034 3606 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
7c3398aa 3607 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
01402d56 3608 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
f7c1d4e3 3609 if(SyncRes::s_serverID.empty()) {
d0983bff 3610 SyncRes::s_serverID = myHostname;
f7c1d4e3 3611 }
3ddb9247 3612
e9f9b8ec
RG
3613 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
3614 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
3615
8a3a3822
RG
3616 if (!::arg().isEmpty("ecs-scope-zero-address")) {
3617 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
3618 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
3619 }
3620 else {
3621 bool found = false;
3622 for (const auto& addr : g_localQueryAddresses4) {
3623 if (!IsAnyAddress(addr)) {
3624 SyncRes::setECSScopeZeroAddress(Netmask(addr, 32));
3625 found = true;
3626 break;
3627 }
3628 }
3629 if (!found) {
3630 for (const auto& addr : g_localQueryAddresses6) {
3631 if (!IsAnyAddress(addr)) {
3632 SyncRes::setECSScopeZeroAddress(Netmask(addr, 128));
3633 found = true;
3634 break;
3635 }
3636 }
3637 if (!found) {
3638 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
3639 }
3640 }
3641 }
3642
2fe3354d
CH
3643 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
3644 SyncRes::parseEDNSSubnetAddFor(::arg()["ecs-add-for"]);
3645 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
3646
5cc8371b 3647 g_XPFAcl.toMasks(::arg()["xpf-allow-from"]);
59cb4a79 3648 g_xpfRRCode = ::arg().asNum("xpf-rr-code");
5cc8371b 3649
5b0ddd18 3650 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
bb4bdbaf 3651
49a699c4 3652 g_initialDomainMap = parseAuthAndForwards();
3ddb9247 3653
08f3f638 3654 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3ddb9247 3655
f7c1d4e3 3656 g_logCommonErrors=::arg().mustDo("log-common-errors");
98d36505 3657 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
e661a20b
PD
3658
3659 g_anyToTcp = ::arg().mustDo("any-to-tcp");
a09a8ce0
PD
3660 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
3661
b3adda56
PD
3662 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
3663
b243ca3b 3664 g_numDistributorThreads = ::arg().asNum("distributor-threads");
810ff705 3665 g_numWorkerThreads = ::arg().asNum("threads");
1c4f2e1b 3666 if (g_numWorkerThreads < 1) {
e6a9dde5 3667 g_log<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
1c4f2e1b
RG
3668 g_numWorkerThreads = 1;
3669 }
3670
b243ca3b 3671 g_numThreads = g_numDistributorThreads + g_numWorkerThreads;
810ff705
RG
3672 g_maxMThreads = ::arg().asNum("max-mthreads");
3673
00b8cadc
RG
3674 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
3675
0ec489bf 3676 g_statisticsInterval = ::arg().asNum("statistics-interval");
3677
810ff705
RG
3678#ifdef SO_REUSEPORT
3679 g_reusePort = ::arg().mustDo("reuseport");
3680#endif
3681
b243ca3b 3682 s_threadInfos.resize(g_numDistributorThreads + g_numWorkerThreads + /* handler */ 1);
810ff705 3683
b243ca3b
RG
3684 if (g_reusePort) {
3685 if (g_weDistributeQueries) {
3686 /* first thread is the handler, then distributors */
3687 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3688 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
adb6cd72 3689 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
b243ca3b 3690 makeUDPServerSockets(deferredAdds);
adb6cd72 3691 makeTCPServerSockets(deferredAdds, tcpSockets);
b243ca3b
RG
3692 }
3693 }
3694 else {
3695 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3696 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3697 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
adb6cd72 3698 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
b243ca3b 3699 makeUDPServerSockets(deferredAdds);
adb6cd72 3700 makeTCPServerSockets(deferredAdds, tcpSockets);
b243ca3b 3701 }
810ff705
RG
3702 }
3703 }
3704 else {
c47f201b 3705 std::set<int> tcpSockets;
b243ca3b
RG
3706 /* we don't have reuseport so we can only open one socket per
3707 listening addr:port and everyone will listen on it */
3708 makeUDPServerSockets(g_deferredAdds);
c47f201b
RG
3709 makeTCPServerSockets(g_deferredAdds, tcpSockets);
3710
3711 /* every listener (so distributor if g_weDistributeQueries, workers otherwise)
3712 needs to listen to the shared sockets */
3713 if (g_weDistributeQueries) {
3714 /* first thread is the handler, then distributors */
3715 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3716 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3717 }
3718 }
3719 else {
3720 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3721 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3722 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3723 }
3724 }
810ff705 3725 }
815099b2 3726
af1377b7
NC
3727#ifdef NOD_ENABLED
3728 // Setup newly observed domain globals
3729 setupNODGlobal();
3730#endif /* NOD_ENABLED */
3731
677e2a46
BH
3732 int forks;
3733 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
1bc3c142
BH
3734 if(!fork()) // we are child
3735 break;
3736 }
3ddb9247 3737
f7c1d4e3 3738 if(::arg().mustDo("daemon")) {
e6a9dde5
PL
3739 g_log<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
3740 g_log.toConsole(Logger::Critical);
f7c1d4e3
BH
3741 daemonize();
3742 }
3743 signal(SIGUSR1,usr1Handler);
3744 signal(SIGUSR2,usr2Handler);
3745 signal(SIGPIPE,SIG_IGN);
810ff705 3746
a6414fdc 3747 checkOrFixFDS();
3ddb9247 3748
d1b28475
KM
3749#ifdef HAVE_LIBSODIUM
3750 if (sodium_init() == -1) {
e6a9dde5 3751 g_log<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
d1b28475
KM
3752 exit(99);
3753 }
3754#endif
3755
3afde9b2
PL
3756 openssl_thread_setup();
3757 openssl_seed();
e97cb679
AT
3758 /* setup rng before chroot */
3759 dns_random_init();
3afde9b2 3760
bdbb07e0 3761 if(::arg()["server-id"].empty()) {
d0983bff 3762 ::arg().set("server-id") = myHostname;
bdbb07e0
PL
3763 }
3764
138435cb
BH
3765 int newgid=0;
3766 if(!::arg()["setgid"].empty())
3767 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
3768 int newuid=0;
3769 if(!::arg()["setuid"].empty())
3770 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
3771
f1d6a7ce
KM
3772 Utility::dropGroupPrivs(newuid, newgid);
3773
138435cb 3774 if (!::arg()["chroot"].empty()) {
75336810
PL
3775#ifdef HAVE_SYSTEMD
3776 char *ns;
3777 ns = getenv("NOTIFY_SOCKET");
3778 if (ns != nullptr) {
e6a9dde5 3779 g_log<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
75336810
PL
3780 exit(1);
3781 }
3782#endif
138435cb 3783 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
e6a9dde5 3784 g_log<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
138435cb
BH
3785 exit(1);
3786 }
f0f3f0b0 3787 else
377602e3 3788 g_log<<Logger::Info<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
138435cb
BH
3789 }
3790
f0f3f0b0
PL
3791 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
3792 if(!s_pidfname.empty())
3793 unlink(s_pidfname.c_str()); // remove possible old pid file
3794 writePid();
3795
3796 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
3797
f1d6a7ce 3798 Utility::dropUserPrivs(newuid);
1f2b341e
RG
3799 try {
3800 /* we might still have capabilities remaining, for example if we have been started as root
3801 without --setuid (please don't do that) or as an unprivileged user with ambient capabilities
3802 like CAP_NET_BIND_SERVICE.
3803 */
3804 dropCapabilities();
3805 }
3806 catch(const std::exception& e) {
3807 g_log<<Logger::Warning<<e.what()<<endl;
3808 }
c0063e60 3809
e6ec15bf
RG
3810 startLuaConfigDelayedThreads(delayedLuaThreads, g_luaconfs.getCopy().generation);
3811
49a699c4 3812 makeThreadPipes();
3ddb9247 3813
5d4dd7fe
BH
3814 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
3815 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
fde296a3 3816 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
a5886e6a 3817 s_maxUDPQueriesPerRound=::arg().asNum("max-udp-queries-per-round");
343257a4 3818
d705aad9
RG
3819 if (::arg().mustDo("snmp-agent")) {
3820 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
3821 g_snmpAgent->run();
3822 }
3823
b47026fd 3824 int port = ::arg().asNum("udp-source-port-min");
58da9034 3825 if(port < 1024 || port > 65535){
e6a9dde5 3826 g_log<<Logger::Error<<"Unable to launch, udp-source-port-min is not a valid port number"<<endl;
bf6f28ca
CHB
3827 exit(99); // this isn't going to fix itself either
3828 }
3829 s_minUdpSourcePort = port;
b47026fd 3830 port = ::arg().asNum("udp-source-port-max");
58da9034 3831 if(port < 1024 || port > 65535 || port < s_minUdpSourcePort){
e6a9dde5 3832 g_log<<Logger::Error<<"Unable to launch, udp-source-port-max is not a valid port number or is smaller than udp-source-port-min"<<endl;
bf6f28ca
CHB
3833 exit(99); // this isn't going to fix itself either
3834 }
3835 s_maxUdpSourcePort = port;
3836 std::vector<string> parts {};
b47026fd 3837 stringtok(parts, ::arg()["udp-source-port-avoid"], ", ");
bf6f28ca
CHB
3838 for (const auto &part : parts)
3839 {
3840 port = std::stoi(part);
58da9034 3841 if(port < 1024 || port > 65535){
e6a9dde5 3842 g_log<<Logger::Error<<"Unable to launch, udp-source-port-avoid contains an invalid port number: "<<part<<endl;
bf6f28ca
CHB
3843 exit(99); // this isn't going to fix itself either
3844 }
3845 s_avoidUdpSourcePorts.insert(port);
3846 }
3847
b243ca3b 3848 unsigned int currentThreadId = 1;
8fd25133 3849 const auto cpusMap = parseCPUMap();
d77abca1 3850
c3828c03 3851 if(g_numThreads == 1) {
e6a9dde5 3852 g_log<<Logger::Warning<<"Operating unthreaded"<<endl;
6b6720de
PL
3853#ifdef HAVE_SYSTEMD
3854 sd_notify(0, "READY=1");
3855#endif
b243ca3b
RG
3856
3857 /* This thread handles the web server, carbon, statistics and the control channel */
3858 auto& handlerInfos = s_threadInfos.at(0);
3859 handlerInfos.isHandler = true;
c390b2da 3860 handlerInfos.thread = std::thread(recursorThread, 0, "main");
b243ca3b
RG
3861
3862 setCPUMap(cpusMap, currentThreadId, pthread_self());
3863
3864 auto& infos = s_threadInfos.at(currentThreadId);
3865 infos.isListener = true;
3866 infos.isWorker = true;
c390b2da 3867 recursorThread(currentThreadId++, "worker");
76698c6e
BH
3868 }
3869 else {
8fd25133 3870
b243ca3b
RG
3871 if (g_weDistributeQueries) {
3872 g_log<<Logger::Warning<<"Launching "<< g_numDistributorThreads <<" distributor threads"<<endl;
3873 for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
3874 auto& infos = s_threadInfos.at(currentThreadId);
3875 infos.isListener = true;
c390b2da 3876 infos.thread = std::thread(recursorThread, currentThreadId++, "distr");
b243ca3b
RG
3877
3878 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
3879 }
3880 }
8fd25133 3881
62b549e0
RG
3882 g_log<<Logger::Warning<<"Launching "<< g_numWorkerThreads <<" worker threads"<<endl;
3883
b243ca3b
RG
3884 for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
3885 auto& infos = s_threadInfos.at(currentThreadId);
3886 infos.isListener = g_weDistributeQueries ? false : true;
3887 infos.isWorker = true;
c390b2da 3888 infos.thread = std::thread(recursorThread, currentThreadId++, "worker");
b243ca3b
RG
3889
3890 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
76698c6e 3891 }
b243ca3b 3892
6b6720de
PL
3893#ifdef HAVE_SYSTEMD
3894 sd_notify(0, "READY=1");
3895#endif
b243ca3b
RG
3896
3897 /* This thread handles the web server, carbon, statistics and the control channel */
3898 auto& infos = s_threadInfos.at(0);
3899 infos.isHandler = true;
c390b2da 3900 infos.thread = std::thread(recursorThread, 0, "web+stat");
b243ca3b
RG
3901
3902 s_threadInfos.at(0).thread.join();
bb4bdbaf 3903 }
bb4bdbaf
BH
3904 return 0;
3905}
3906
c390b2da 3907static void* recursorThread(unsigned int n, const string& threadName)
bb4bdbaf
BH
3908try
3909{
d77abca1 3910 t_id=n;
b243ca3b 3911 auto& threadInfo = s_threadInfos.at(t_id);
c390b2da
PL
3912
3913 static string threadPrefix = "pdns-r/";
519f5484 3914 setThreadName(threadPrefix + threadName);
c390b2da 3915
49a699c4 3916 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
a712cb56 3917 SyncRes::setDomainMap(g_initialDomainMap);
49a699c4 3918 t_allowFrom = g_initialAllowFrom;
f26bf547
RG
3919 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
3920 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
49a699c4 3921 primeHints();
3ddb9247 3922
f26bf547 3923 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
3ddb9247 3924
aa7929a3 3925#ifdef HAVE_PROTOBUF
f26bf547 3926 t_uuidGenerator = std::unique_ptr<boost::uuids::random_generator>(new boost::uuids::random_generator());
aa7929a3 3927#endif
e6a9dde5 3928 g_log<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3ddb9247 3929
af1377b7 3930#ifdef NOD_ENABLED
41c542ec
NC
3931 if (threadInfo.isWorker)
3932 setupNODThread();
af1377b7
NC
3933#endif /* NOD_ENABLED */
3934
8fb594ba 3935 if(threadInfo.isWorker) {
5b388d28
PD
3936 try {
3937 if(!::arg()["lua-dns-script"].empty()) {
3938 t_pdl = std::make_shared<RecursorLua4>();
3939 t_pdl->loadFile(::arg()["lua-dns-script"]);
3940 g_log<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
3941 }
3942 }
3943 catch(std::exception &e) {
3944 g_log<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
3945 _exit(99);
674cf0f6 3946 }
674cf0f6 3947 }
3ddb9247 3948
f8f243b0 3949 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
92011b8f 3950 if(ringsize) {
f26bf547 3951 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
b243ca3b
RG
3952 if(g_weDistributeQueries)
3953 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries") / g_numDistributorThreads);
f8f243b0 3954 else
3ddb9247 3955 t_remotes->set_capacity(ringsize);
f26bf547 3956 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 3957 t_servfailremotes->set_capacity(ringsize);
66f2e6ad
KM
3958 t_bogusremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3959 t_bogusremotes->set_capacity(ringsize);
f26bf547 3960 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 3961 t_largeanswerremotes->set_capacity(ringsize);
621ccf89 3962 t_timeouts = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3963 t_timeouts->set_capacity(ringsize);
92011b8f 3964
f26bf547 3965 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 3966 t_queryring->set_capacity(ringsize);
f26bf547 3967 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 3968 t_servfailqueryring->set_capacity(ringsize);
66f2e6ad
KM
3969 t_bogusqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3970 t_bogusqueryring->set_capacity(ringsize);
92011b8f 3971 }
3ddb9247 3972
f26bf547 3973 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
3ddb9247 3974
63341e8d
RG
3975#ifdef HAVE_PROTOBUF
3976 /* start protobuf export threads if needed */
3977 auto luaconfsLocal = g_luaconfs.getLocal();
3978 checkProtobufExport(luaconfsLocal);
3979 checkOutgoingProtobufExport(luaconfsLocal);
3980#endif /* HAVE_PROTOBUF */
3981
bb4bdbaf
BH
3982 PacketID pident;
3983
3984 t_fdm=getMultiplexer();
d77abca1 3985
b243ca3b 3986 if(threadInfo.isHandler) {
d07bf7ff 3987 if(::arg().mustDo("webserver")) {
e6a9dde5 3988 g_log<<Logger::Warning << "Enabling web server" << endl;
8989097d 3989 try {
1ce57618 3990 new RecursorWebServer(t_fdm);
8989097d
CH
3991 }
3992 catch(PDNSException &e) {
e6a9dde5 3993 g_log<<Logger::Error<<"Exception: "<<e.reason<<endl;
8989097d
CH
3994 exit(99);
3995 }
f3d1d67b 3996 }
377602e3 3997 g_log<<Logger::Info<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
f3d1d67b 3998 }
810ff705 3999 else {
d77abca1 4000
b243ca3b
RG
4001 t_fdm->addReadFD(threadInfo.pipes.readToThread, handlePipeRequest);
4002 t_fdm->addReadFD(threadInfo.pipes.readQueriesToThread, handlePipeRequest);
4003
4004 if (threadInfo.isListener) {
4005 if (g_reusePort) {
4006 /* then every listener has its own FDs */
4007 for(const auto deferred : threadInfo.deferredAdds) {
4008 t_fdm->addReadFD(deferred.first, deferred.second);
4009 }
810ff705 4010 }
b243ca3b
RG
4011 else {
4012 /* otherwise all listeners are listening on the same ones */
4013 for(const auto deferred : g_deferredAdds) {
4014 t_fdm->addReadFD(deferred.first, deferred.second);
d77abca1
RG
4015 }
4016 }
4017 }
810ff705 4018 }
3ddb9247 4019
b0b37121 4020 registerAllStats();
d77abca1 4021
b243ca3b 4022 if(threadInfo.isHandler) {
674cf0f6
BH
4023 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
4024 }
1bc3c142 4025
f7c1d4e3 4026 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3ddb9247 4027
f7c1d4e3 4028 bool listenOnTCP(true);
49a699c4 4029
cb1523d1 4030 time_t last_stat = 0;
a2f87dd1 4031 time_t last_carbon=0, last_lua_maintenance=0;
2c78bd57 4032 time_t carbonInterval=::arg().asNum("carbon-interval");
a2f87dd1 4033 time_t luaMaintenanceInterval=::arg().asNum("lua-maintenance-interval");
ac0995bb 4034 counter.store(0); // used to periodically execute certain tasks
f7c1d4e3 4035 for(;;) {
ac0e821b 4036 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3ddb9247 4037
3427fa8a
BH
4038 if(!(counter%500)) {
4039 MT->makeThread(houseKeeping, 0);
f7c1d4e3
BH
4040 }
4041
d2392145 4042 if(!(counter%55)) {
d8f6d49f 4043 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
bb4bdbaf 4044 expired_t expired=t_fdm->getTimeouts(g_now);
3ddb9247 4045
f7c1d4e3 4046 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
cd989c87 4047 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4957a608 4048 if(g_logCommonErrors)
e6a9dde5 4049 g_log<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4957a608 4050 t_fdm->removeReadFD(i->first);
f7c1d4e3
BH
4051 }
4052 }
3ddb9247 4053
f7c1d4e3
BH
4054 counter++;
4055
b243ca3b 4056 if(threadInfo.isHandler) {
cb1523d1
RG
4057 if(statsWanted || (g_statisticsInterval > 0 && (g_now.tv_sec - last_stat) >= g_statisticsInterval)) {
4058 doStats();
4059 last_stat = g_now.tv_sec;
4060 }
f7c1d4e3 4061
cb1523d1 4062 Utility::gettimeofday(&g_now, 0);
2c78bd57 4063
cb1523d1
RG
4064 if((g_now.tv_sec - last_carbon) >= carbonInterval) {
4065 MT->makeThread(doCarbonDump, 0);
4066 last_carbon = g_now.tv_sec;
4067 }
2c78bd57 4068 }
2a0276a9 4069 if (t_pdl != nullptr) {
9adbe790 4070 // lua-dns-script directive is present, call the maintenance callback if needed
b243ca3b 4071 if (threadInfo.isWorker) {
2a0276a9
CHB
4072 // Only on threads processing queries
4073 if(g_now.tv_sec - last_lua_maintenance >= luaMaintenanceInterval) {
4074 t_pdl->maintenance();
4075 last_lua_maintenance = g_now.tv_sec;
4076 }
9adbe790 4077 }
a2f87dd1 4078 }
2c78bd57 4079
bb4bdbaf 4080 t_fdm->run(&g_now);
3ea54bf0 4081 // 'run' updates g_now for us
f7c1d4e3 4082
b243ca3b 4083 if(threadInfo.isListener) {
5c889cf5 4084 if(listenOnTCP) {
c47f201b
RG
4085 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
4086 for(const auto fd : threadInfo.tcpSockets) {
4087 t_fdm->removeReadFD(fd);
b243ca3b 4088 }
c47f201b
RG
4089 listenOnTCP=false;
4090 }
f7c1d4e3 4091 }
5c889cf5 4092 else {
c47f201b
RG
4093 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
4094 for(const auto fd : threadInfo.tcpSockets) {
4095 t_fdm->addReadFD(fd, handleNewTCPQuestion);
b243ca3b 4096 }
c47f201b
RG
4097 listenOnTCP=true;
4098 }
f7c1d4e3
BH
4099 }
4100 }
4101 }
4102}
3f81d239 4103catch(PDNSException &ae) {
e6a9dde5 4104 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
bb4bdbaf
BH
4105 return 0;
4106}
4107catch(std::exception &e) {
e6a9dde5 4108 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
bb4bdbaf
BH
4109 return 0;
4110}
4111catch(...) {
e6a9dde5 4112 g_log<<Logger::Error<<"any other exception in main: "<<endl;
bb4bdbaf
BH
4113 return 0;
4114}
4115
51e2144e 4116
3ddb9247 4117int main(int argc, char **argv)
288f4aa9 4118{
dbd23fc2
BH
4119 g_argc = argc;
4120 g_argv = argv;
5e3de507 4121 g_stats.startupTime=time(0);
3e135495 4122 versionSetProduct(ProductRecursor);
8a63d3ce 4123 reportBasicTypes();
0007c2e5 4124 reportOtherTypes();
ea634573 4125
22030c37 4126 int ret = EXIT_SUCCESS;
caa6eefa 4127
288f4aa9 4128 try {
f888311c 4129 ::arg().set("stack-size","stack size per mthread")="200000";
2e3d8a19 4130 ::arg().set("soa-minimum-ttl","Don't change")="0";
2e3d8a19 4131 ::arg().set("no-shuffle","Don't change")="off";
2e3d8a19 4132 ::arg().set("local-port","port to listen on")="53";
32252594 4133 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
fec7dd5a 4134 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
77499b05 4135 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
a6415142 4136 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
c87e1876 4137 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
13c46e62 4138 ::arg().set("signature-inception-skew", "Allow the signature inception to be off by this number of seconds")="60";
d3f809bf 4139 ::arg().set("daemon","Operate as a daemon")="no";
191f2e47 4140 ::arg().setSwitch("write-pid","Write a PID file")="yes";
9afa8662 4141 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
b6cfa948 4142 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
b18fa400 4143 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
22e0810c 4144 ::arg().set("log-common-errors","If we should log rather common errors")="no";
2e3d8a19
BH
4145 ::arg().set("chroot","switch to chroot jail")="";
4146 ::arg().set("setgid","If set, change group id to this gid for more security")="";
4147 ::arg().set("setuid","If set, change user id to this uid for more security")="";
c83ee49d 4148 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
bb4bdbaf 4149 ::arg().set("threads", "Launch this number of threads")="2";
b243ca3b 4150 ::arg().set("distributor-threads", "Launch this number of distributor threads, distributing queries to other threads")="0";
adabfcb9 4151 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
5124de27 4152 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
d07bf7ff 4153 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
479e0976 4154 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
479e0976 4155 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
d07bf7ff
PL
4156 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
4157 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
4158 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
be3e1477 4159 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
cc08b5a9 4160 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
e12f8407 4161 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
2c78bd57 4162 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
f7a645ec
RG
4163 ::arg().set("carbon-namespace", "If set overwrites the first part of the carbon string")="pdns";
4164 ::arg().set("carbon-instance", "If set overwrites the the instance name default")="recursor";
4165
0ec489bf 4166 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
c038218b 4167 ::arg().set("quiet","Suppress logging of questions and answers")="";
f27e6356 4168 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
2e3d8a19 4169 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
fdbf35ac
BH
4170 ::arg().set("socket-owner","Owner of socket")="";
4171 ::arg().set("socket-group","Group of socket")="";
4172 ::arg().set("socket-mode", "Permissions for socket")="";
3ddb9247 4173
f0f3f0b0 4174 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
2e3d8a19
BH
4175 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
4176 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
d4fb76e9 4177 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
2e3d8a19 4178 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
85c32340 4179 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
2e3d8a19 4180 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
324dc148 4181 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
979edd70 4182 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
2e3d8a19 4183 ::arg().set("hint-file", "If set, load root hints from this file")="";
b45eb27c 4184 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
a9af3782 4185 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
c3e753c7 4186 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
1051f8a9 4187 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
927c12b0 4188 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
1051f8a9 4189 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
950626be 4190 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname, set custom or 'disabled'")="";
92011b8f 4191 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
ba1a571d 4192 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
49a699c4 4193 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
2c95fc65 4194 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
51e2144e 4195 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3ddb9247 4196 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4e120339 4197 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
fde296a3 4198 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
0d5f0a9f 4199 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4ef015cd 4200 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
5605c067 4201 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3e61e7f7 4202 ::arg().set("lua-config-file", "More powerful configuration options")="";
644dd1da 4203
5605c067 4204 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
927c12b0
BH
4205 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
4206 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
5605c067 4207 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
ac0b4eb3 4208 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3ea54bf0 4209 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
e498dac1 4210 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
4485aa35 4211 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
a2f87dd1 4212 ::arg().set("lua-maintenance-interval", "Number of seconds between calls to the lua user defined maintenance() function")="1";
08f3f638 4213 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3ddb9247 4214 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
35695d18 4215 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
4216 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
3f975863 4217 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
2fe3354d 4218 ::arg().set("ecs-add-for", "List of client netmasks for which EDNS Client Subnet will be added")="0.0.0.0/0, ::/0, " LOCAL_NETS_INVERSE;
8a3a3822 4219 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
a16c4536 4220 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
e498dac1 4221 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
4ca2d205 4222 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
e661a20b 4223 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
b3adda56 4224 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
00b8cadc 4225 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
54c36063
PL
4226 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1232";
4227 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1232";
aadceba8 4228 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
173d790e 4229 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
c5950146 4230 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
7c3398aa 4231 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
78227847 4232 ::arg().set("max-udp-queries-per-round", "Maximum number of UDP queries processed per recvmsg() round, before returning back to normal processing")="10000";
a09a8ce0 4233
68e6df3c 4234 ::arg().set("include-dir","Include *.conf files from this directory")="";
d67620e4 4235 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
2332f42d 4236
4237 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
2e3d8a19 4238
d705aad9 4239 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
396f126e 4240 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
d705aad9 4241
0735b17e 4242 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
d377bb54 4243 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
0735b17e 4244
8fd25133
RG
4245 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
4246
98d36505
RG
4247 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
4248
5cc8371b 4249 ::arg().set("xpf-allow-from","XPF information is only processed from these subnets")="";
59cb4a79 4250 ::arg().set("xpf-rr-code","XPF option code to use")="0";
5cc8371b 4251
58da9034 4252 ::arg().set("udp-source-port-min", "Minimum UDP port to bind on")="1024";
b47026fd
CHB
4253 ::arg().set("udp-source-port-max", "Maximum UDP port to bind on")="65535";
4254 ::arg().set("udp-source-port-avoid", "List of comma separated UDP port number to avoid")="11211";
e97cb679 4255 ::arg().set("rng", "Specify random number generator to use. Valid values are auto,sodium,openssl,getrandom,arc4random,urandom.")="auto";
d6f3fcfa 4256 ::arg().set("public-suffix-list-file", "Path to the Public Suffix List file, if any")="";
af1377b7
NC
4257#ifdef NOD_ENABLED
4258 ::arg().set("new-domain-tracking", "Track newly observed domains (i.e. never seen before).")="no";
4259 ::arg().set("new-domain-log", "Log newly observed domains.")="yes";
4260 ::arg().set("new-domain-lookup", "Perform a DNS lookup newly observed domains as a subdomain of the configured domain")="";
4261 ::arg().set("new-domain-history-dir", "Persist new domain tracking data here to persist between restarts")=string(NODCACHEDIR)+"/nod";
4262 ::arg().set("new-domain-whitelist", "List of domains (and implicitly all subdomains) which will never be considered a new domain")="";
b78727c6 4263 ::arg().set("new-domain-db-size", "Size of the DB used to track new domains in terms of number of cells. Defaults to 67108864")="67108864";
ca2526f5 4264 ::arg().set("new-domain-pb-tag", "If protobuf is configured, the tag to use for messages containing newly observed domains. Defaults to 'pdns-nod'")="pdns-nod";
41c542ec
NC
4265 ::arg().set("unique-response-tracking", "Track unique responses (tuple of query name, type and RR).")="no";
4266 ::arg().set("unique-response-log", "Log unique responses")="yes";
4267 ::arg().set("unique-response-history-dir", "Persist unique response tracking data here to persist between restarts")=string(NODCACHEDIR)+"/udr";
b78727c6 4268 ::arg().set("unique-response-db-size", "Size of the DB used to track unique responses in terms of number of cells. Defaults to 67108864")="67108864";
ca2526f5 4269 ::arg().set("unique-response-pb-tag", "If protobuf is configured, the tag to use for messages containing unique DNS responses. Defaults to 'pdns-udr'")="pdns-udr";
af1377b7 4270#endif /* NOD_ENABLED */
2e3d8a19 4271 ::arg().setCmd("help","Provide a helpful message");
ba1a571d 4272 ::arg().setCmd("version","Print version string");
d5141417 4273 ::arg().setCmd("config","Output blank configuration");
e6a9dde5 4274 g_log.toConsole(Logger::Info);
2e3d8a19 4275 ::arg().laxParse(argc,argv); // do a lax parse
c75a6a9e 4276
2d733c0f
CH
4277 string configname=::arg()["config-dir"]+"/recursor.conf";
4278 if(::arg()["config-name"]!="") {
4279 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
5124de27 4280 s_programname+="-"+::arg()["config-name"];
2d733c0f
CH
4281 }
4282 cleanSlashes(configname);
5124de27 4283
5cc1ea1d
CH
4284 if(!::arg().getCommands().empty()) {
4285 cerr<<"Fatal: non-option on the command line, perhaps a '--setting=123' statement missed the '='?"<<endl;
4286 exit(99);
4287 }
4288
577cf284
BH
4289 if(::arg().mustDo("config")) {
4290 cout<<::arg().configstring()<<endl;
4291 exit(0);
4292 }
4293
3ddb9247 4294 if(!::arg().file(configname.c_str()))
e6a9dde5 4295 g_log<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
c75a6a9e 4296
2e3d8a19 4297 ::arg().parse(argc,argv);
c836dc19 4298
2054afbb
CH
4299 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() ) {
4300 g_log<<Logger::Error<<"Using chroot and enabling the API is not possible"<<endl;
f0f3f0b0
PL
4301 exit(EXIT_FAILURE);
4302 }
4303
4304 if (::arg()["socket-dir"].empty()) {
4305 if (::arg()["chroot"].empty())
4306 ::arg().set("socket-dir") = LOCALSTATEDIR;
4307 else
4308 ::arg().set("socket-dir") = "/";
4309 }
4310
2e3d8a19 4311 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
562588a3 4312
b243ca3b
RG
4313 if(::arg().asNum("threads")==1) {
4314 if (::arg().mustDo("pdns-distributes-queries")) {
4315 g_log<<Logger::Warning<<"Only one thread, no need to distribute queries ourselves"<<endl;
4316 ::arg().set("pdns-distributes-queries")="no";
4317 }
4318 }
4319
4320 if(::arg().mustDo("pdns-distributes-queries") && ::arg().asNum("distributor-threads") <= 0) {
4321 g_log<<Logger::Warning<<"Asked to run with pdns-distributes-queries set but no distributor threads, raising to 1"<<endl;
4322 ::arg().set("distributor-threads")="1";
4323 }
4324
4325 if (!::arg().mustDo("pdns-distributes-queries")) {
4326 ::arg().set("distributor-threads")="0";
4327 }
61d74169 4328
2e3d8a19 4329 if(::arg().mustDo("help")) {
ff5ba4f9
WA
4330 cout<<"syntax:"<<endl<<endl;
4331 cout<<::arg().helpstring(::arg()["help"])<<endl;
4332 exit(0);
b636533b 4333 }
5e3de507 4334 if(::arg().mustDo("version")) {
ba1a571d 4335 showProductVersion();
3613a51c 4336 showBuildConfiguration();
67076869 4337 exit(0);
5e3de507 4338 }
b636533b 4339
34162f8f 4340 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
f48d7b65 4341
34162f8f
CH
4342 if (logUrgency < Logger::Error)
4343 logUrgency = Logger::Error;
f48d7b65 4344 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
4345 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
4346 }
e6a9dde5
PL
4347 g_log.setLoglevel(logUrgency);
4348 g_log.toConsole(logUrgency);
34162f8f 4349
f7c1d4e3 4350 serviceMain(argc, argv);
288f4aa9 4351 }
3f81d239 4352 catch(PDNSException &ae) {
e6a9dde5 4353 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
22030c37 4354 ret=EXIT_FAILURE;
288f4aa9 4355 }
fdbf35ac 4356 catch(std::exception &e) {
e6a9dde5 4357 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
22030c37 4358 ret=EXIT_FAILURE;
288f4aa9
BH
4359 }
4360 catch(...) {
e6a9dde5 4361 g_log<<Logger::Error<<"any other exception in main: "<<endl;
22030c37 4362 ret=EXIT_FAILURE;
288f4aa9 4363 }
3ddb9247 4364
22030c37 4365 return ret;
288f4aa9 4366}