]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/pdns_recursor.cc
Set socket-dir to RUNTIME_DIRECTORY under systemd.
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
CommitLineData
288f4aa9 1/*
6edbf68a
PL
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
870a0fe4
AT
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
3e61e7f7 25
76473b92
KM
26#include <netdb.h>
27#include <sys/stat.h>
28#include <unistd.h>
f097141b 29#ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
b47026fd 30#include <boost/container/flat_set.hpp>
f097141b 31#endif
2470b36e 32#include "ws-recursor.hh"
c390b2da 33#include <thread>
519f5484 34#include "threadname.hh"
3ea54bf0 35#include "recpacketcache.hh"
3ddb9247 36#include "utility.hh"
51e2144e 37#include "dns_random.hh"
d1b28475
KM
38#ifdef HAVE_LIBSODIUM
39#include <sodium.h>
40#endif
3afde9b2 41#include "opensslsigners.hh"
288f4aa9
BH
42#include <iostream>
43#include <errno.h>
81859ba5 44#include <boost/static_assert.hpp>
288f4aa9
BH
45#include <map>
46#include <set>
97bb160b 47#include "recursor_cache.hh"
38c9ceaa 48#include "cachecleaner.hh"
288f4aa9 49#include <stdio.h>
c75a6a9e 50#include <signal.h>
288f4aa9 51#include <stdlib.h>
bb4bdbaf 52#include "misc.hh"
288f4aa9
BH
53#include "mtasker.hh"
54#include <utility>
288f4aa9
BH
55#include "arguments.hh"
56#include "syncres.hh"
88def049
BH
57#include <fcntl.h>
58#include <fstream>
3e61e7f7 59#include "sortlist.hh"
5c633640
BH
60#include "sstuff.hh"
61#include <boost/tuple/tuple.hpp>
62#include <boost/tuple/tuple_comparison.hpp>
72df400f 63#include <boost/shared_array.hpp>
7f1fa77d 64#include <boost/function.hpp>
5605c067 65#include <boost/algorithm/string.hpp>
8f7473d7 66#ifdef MALLOC_TRACE
67#include "malloctrace.hh"
68#endif
40a3dd64 69#include <netinet/tcp.h>
f12666f2 70#include "capabilities.hh"
ea634573
BH
71#include "dnsparser.hh"
72#include "dnswriter.hh"
73#include "dnsrecords.hh"
f814d7c8 74#include "zoneparser-tng.hh"
1d5b3ce6 75#include "rec_channel.hh"
aaacf7f2 76#include "logger.hh"
c8ddb7c2 77#include "iputils.hh"
09e6702a 78#include "mplexer.hh"
c038218b 79#include "config.h"
808c5ef7 80#include "lua-recursor4.hh"
ba1a571d 81#include "version.hh"
79332bff 82#include "responsestats.hh"
d67620e4 83#include "secpoll-recursor.hh"
c5c066bf 84#include "dnsname.hh"
644dd1da 85#include "filterpo.hh"
86#include "rpzloader.hh"
b3f0ed10 87#include "validate-recursor.hh"
f3c18728 88#include "rec-lua-conf.hh"
5c3b5e7f 89#include "ednsoptions.hh"
85c7ca75 90#include "gettime.hh"
5216ddcc 91#include "proxy-protocol.hh"
d6f3fcfa 92#include "pubsuffix.hh"
bbec1961 93#include "shuffle.hh"
af1377b7
NC
94#ifdef NOD_ENABLED
95#include "nod.hh"
96#endif /* NOD_ENABLED */
f3c18728 97
d9d3f9c1 98#include "rec-protobuf.hh"
d705aad9 99#include "rec-snmp.hh"
aa7929a3 100
6b6720de
PL
101#ifdef HAVE_SYSTEMD
102#include <systemd/sd-daemon.h>
103#endif
104
d187038c
RG
105#include "namespaces.hh"
106
d61aa945
RG
107#ifdef HAVE_PROTOBUF
108#include "uuid-utils.hh"
b9fa43e0 109#endif /* HAVE_PROTOBUF */
d61aa945 110
5cc8371b
RG
111#include "xpf.hh"
112
d187038c
RG
113typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
114
f26bf547 115static thread_local std::shared_ptr<RecursorLua4> t_pdl;
b243ca3b 116static thread_local unsigned int t_id = 0;
f26bf547
RG
117static thread_local std::shared_ptr<Regex> t_traceRegex;
118static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
63341e8d 119#ifdef HAVE_PROTOBUF
3fe06137 120static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_protobufServers{nullptr};
b773359c 121static thread_local uint64_t t_protobufServersGeneration;
3fe06137 122static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_outgoingProtobufServers{nullptr};
b773359c 123static thread_local uint64_t t_outgoingProtobufServersGeneration;
63341e8d 124#endif /* HAVE_PROTOBUF */
f26bf547 125
b9fa43e0 126#ifdef HAVE_FSTRM
10ba6d01 127static thread_local std::shared_ptr<std::vector<std::unique_ptr<FrameStreamLogger>>> t_frameStreamServers{nullptr};
b9fa43e0
OM
128static thread_local uint64_t t_frameStreamServersGeneration;
129#endif /* HAVE_FSTRM */
130
f26bf547 131thread_local std::unique_ptr<MT_t> MT; // the big MTasker
7ce9aad6 132std::unique_ptr<MemRecursorCache> s_RC;
a7956123
OM
133
134
f26bf547 135thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
3337c2f7 136thread_local FDMultiplexer* t_fdm{nullptr};
be9078b3 137thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes, t_bogusremotes;
66f2e6ad 138thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring, t_bogusqueryring;
f26bf547 139thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
af1377b7
NC
140#ifdef NOD_ENABLED
141thread_local std::shared_ptr<nod::NODDB> t_nodDBp;
41c542ec 142thread_local std::shared_ptr<nod::UniqueResponseDB> t_udrDBp;
af1377b7 143#endif /* NOD_ENABLED */
d187038c 144__thread struct timeval g_now; // timestamp, updated (too) frequently
d7dae798 145
b243ca3b
RG
146typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
147
d7dae798 148// for communicating with our threads
b243ca3b
RG
149// effectively readonly after startup
150struct RecThreadInfo
151{
152 struct ThreadPipeSet
153 {
154 int writeToThread{-1};
155 int readToThread{-1};
156 int writeFromThread{-1};
157 int readFromThread{-1};
158 int writeQueriesToThread{-1}; // this one is non-blocking
159 int readQueriesToThread{-1};
160 };
161
adb6cd72 162 /* FD corresponding to TCP sockets this thread is listening
c47f201b 163 on.
adb6cd72
RG
164 These FDs are also in deferredAdds when we have one
165 socket per listener, and in g_deferredAdds instead. */
166 std::set<int> tcpSockets;
b243ca3b
RG
167 /* FD corresponding to listening sockets if we have one socket per
168 listener (with reuseport), otherwise all listeners share the
169 same FD and g_deferredAdds is then used instead */
170 deferredAdd_t deferredAdds;
171 struct ThreadPipeSet pipes;
172 std::thread thread;
144040be
RG
173 MT_t* mt{nullptr};
174 uint64_t numberOfDistributedQueries{0};
b243ca3b
RG
175 /* handle the web server, carbon, statistics and the control channel */
176 bool isHandler{false};
177 /* accept incoming queries (and distributes them to the workers if pdns-distributes-queries is set) */
178 bool isListener{false};
179 /* process queries */
180 bool isWorker{false};
49a699c4 181};
810ff705 182
b243ca3b
RG
183/* first we have the handler thread, t_id == 0 (some other
184 helper threads like SNMP might have t_id == 0 as well)
185 then the distributor threads if any
186 and finally the workers */
187static std::vector<RecThreadInfo> s_threadInfos;
188/* without reuseport, all listeners share the same sockets */
189static deferredAdd_t g_deferredAdds;
faf580f5 190
d187038c
RG
191typedef vector<int> tcpListenSockets_t;
192typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
3ea54bf0 193
d187038c 194static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
d187038c 195static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
d187038c
RG
196static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
197static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
198static AtomicCounter counter;
9065eb05 199static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
f26bf547 200static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
5cc8371b 201static NetmaskGroup g_XPFAcl;
5216ddcc 202static NetmaskGroup g_proxyProtocolACL;
ef3ee606
RG
203static boost::optional<ComboAddress> g_dns64Prefix{boost::none};
204static DNSName g_dns64PrefixReverse;
5216ddcc 205static size_t g_proxyProtocolMaximumSize;
d187038c 206static size_t g_tcpMaxQueriesPerConn;
a5886e6a 207static size_t s_maxUDPQueriesPerRound;
d187038c
RG
208static uint64_t g_latencyStatSize;
209static uint32_t g_disthashseed;
210static unsigned int g_maxTCPPerClient;
d187038c 211static unsigned int g_maxMThreads;
b243ca3b 212static unsigned int g_numDistributorThreads;
d187038c
RG
213static unsigned int g_numWorkerThreads;
214static int g_tcpTimeout;
215static uint16_t g_udpTruncationThreshold;
59cb4a79 216static uint16_t g_xpfRRCode{0};
d187038c
RG
217static std::atomic<bool> statsWanted;
218static std::atomic<bool> g_quiet;
219static bool g_logCommonErrors;
220static bool g_anyToTcp;
b243ca3b 221static bool g_weDistributeQueries; // if true, 1 or more threads listen on the incoming query sockets and distribute them to workers
810ff705 222static bool g_reusePort{false};
00b8cadc 223static bool g_gettagNeedsEDNSOptions{false};
0ec489bf 224static time_t g_statisticsInterval;
9065eb05 225static bool g_useIncomingECS;
c29d820c 226static bool g_useKernelTimestamp;
a6f7f5fe 227std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
af1377b7
NC
228#ifdef NOD_ENABLED
229static bool g_nodEnabled;
230static DNSName g_nodLookupDomain;
231static bool g_nodLog;
232static SuffixMatchNode g_nodDomainWL;
ca2526f5 233static std::string g_nod_pbtag;
41c542ec
NC
234static bool g_udrEnabled;
235static bool g_udrLog;
ca2526f5 236static std::string g_udr_pbtag;
af1377b7 237#endif /* NOD_ENABLED */
f097141b 238#ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
bf6f28ca 239static boost::container::flat_set<uint16_t> s_avoidUdpSourcePorts;
f097141b
CHB
240#else
241static std::set<uint16_t> s_avoidUdpSourcePorts;
242#endif
bf6f28ca
CHB
243static uint16_t s_minUdpSourcePort;
244static uint16_t s_maxUdpSourcePort;
144040be 245static double s_balancingFactor;
49a699c4 246
b243ca3b 247RecursorControlChannel s_rcc; // only active in the handler thread
d187038c 248RecursorStats g_stats;
2d733c0f 249string s_programname="pdns_recursor";
d187038c 250string s_pidfname;
c1c29961 251bool g_lowercaseOutgoing;
bf19ccfd 252unsigned int g_networkTimeoutMsec;
d187038c
RG
253unsigned int g_numThreads;
254uint16_t g_outgoingEDNSBufsize;
98d36505 255bool g_logRPZChanges{false};
c3828c03 256
559b6c93
PL
257// Used in the Syncres to not throttle certain servers
258GlobalStateHolder<SuffixMatchNode> g_dontThrottleNames;
259GlobalStateHolder<NetmaskGroup> g_dontThrottleNetmasks;
260
12cd44ee 261#define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
2fe3354d 262#define LOCAL_NETS_INVERSE "!127.0.0.0/8, !10.0.0.0/8, !100.64.0.0/10, !169.254.0.0/16, !192.168.0.0/16, !172.16.0.0/12, !::1/128, !fc00::/7, !fe80::/10"
12cd44ee 263// Bad Nets taken from both:
3ddb9247 264// http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
12cd44ee 265// and
266// http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
267// where such a network may not be considered a valid destination
268#define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
269#define DONT_QUERY LOCAL_NETS ", " BAD_NETS
49a699c4 270
d7dae798 271//! used to send information to a newborn mthread
ea634573 272struct DNSComboWriter {
08b02366 273 DNSComboWriter(const std::string& query, const struct timeval& now): d_mdp(true, query), d_now(now), d_query(query)
2749c3fe
RG
274 {
275 }
5cc8371b 276
b502d522 277 DNSComboWriter(const std::string& query, const struct timeval& now, std::unordered_set<std::string>&& policyTags, LuaContext::LuaObject&& data, std::vector<DNSRecord>&& records): d_mdp(true, query), d_now(now), d_query(query), d_policyTags(std::move(policyTags)), d_records(std::move(records)), d_data(std::move(data))
5164bac3
RG
278 {
279 }
280
5cc8371b
RG
281 void setRemote(const ComboAddress& sa)
282 {
283 d_remote=sa;
284 }
285
286 void setSource(const ComboAddress& sa)
ea634573 287 {
5cc8371b 288 d_source=sa;
ea634573
BH
289 }
290
b71b60ee 291 void setLocal(const ComboAddress& sa)
292 {
293 d_local=sa;
294 }
295
5cc8371b
RG
296 void setDestination(const ComboAddress& sa)
297 {
298 d_destination=sa;
299 }
b71b60ee 300
ea634573
BH
301 void setSocket(int sock)
302 {
303 d_socket=sock;
304 }
a1754c6a
BH
305
306 string getRemote() const
307 {
5cc8371b
RG
308 if (d_source == d_remote) {
309 return d_source.toStringWithPort();
310 }
311 return d_source.toStringWithPort() + " (proxied by " + d_remote.toStringWithPort() + ")";
a1754c6a
BH
312 }
313
5216ddcc 314 std::vector<ProxyProtocolValue> d_proxyProtocolValues;
5cc8371b 315 MOADNSParser d_mdp;
c9e9e5e0 316 struct timeval d_now;
5cc8371b
RG
317 /* Remote client, might differ from d_source
318 in case of XPF, in which case d_source holds
319 the IP of the client and d_remote of the proxy
320 */
321 ComboAddress d_remote;
322 ComboAddress d_source;
323 /* Destination address, might differ from
324 d_destination in case of XPF, in which case
325 d_destination holds the IP of the proxy and
326 d_local holds our own. */
327 ComboAddress d_local;
328 ComboAddress d_destination;
aa7929a3
RG
329#ifdef HAVE_PROTOBUF
330 boost::uuids::uuid d_uuid;
67e31ebe 331 string d_requestorId;
590388d2 332 string d_deviceId;
0a6a45c8 333 string d_deviceName;
c29d820c 334 struct timeval d_kernelTimestamp{0,0};
aa7929a3 335#endif
08b02366 336 std::string d_query;
b502d522 337 std::unordered_set<std::string> d_policyTags;
163ed916 338 std::string d_routingTag;
37a919d4 339 std::vector<DNSRecord> d_records;
5164bac3 340 LuaContext::LuaObject d_data;
b40562da 341 EDNSSubnetOpts d_ednssubnet;
5164bac3 342 shared_ptr<TCPConnection> d_tcpConnection;
37a919d4 343 boost::optional<int> d_rcode{boost::none};
e53b77e2 344 int d_socket{-1};
b673817a 345 unsigned int d_tag{0};
e9f63d47 346 uint32_t d_qhash{0};
70fb28d9 347 uint32_t d_ttlCap{std::numeric_limits<uint32_t>::max()};
08b02366
RG
348 uint16_t d_ecsBegin{0};
349 uint16_t d_ecsEnd{0};
70fb28d9 350 bool d_variable{false};
5164bac3
RG
351 bool d_ecsFound{false};
352 bool d_ecsParsed{false};
37a919d4 353 bool d_followCNAMERecords{false};
406b722e 354 bool d_logResponse{false};
a73da04b 355 bool d_tcp{false};
ea634573
BH
356};
357
06857845
RG
358MT_t* getMT()
359{
360 return MT ? MT.get() : nullptr;
361}
ea634573 362
288f4aa9
BH
363ArgvMap &arg()
364{
365 static ArgvMap theArg;
366 return theArg;
367}
4ef015cd 368
8fb594ba 369unsigned int getRecursorThreadId()
b4015453 370{
30da2030 371 return t_id;
b4015453 372}
09e6702a 373
b243ca3b
RG
374static bool isDistributorThread()
375{
376 if (t_id == 0) {
377 return false;
378 }
379
380 return g_weDistributeQueries && s_threadInfos.at(t_id).isListener;
381}
382
383static bool isHandlerThread()
384{
385 if (t_id == 0) {
386 return true;
387 }
388
389 return s_threadInfos.at(t_id).isHandler;
390}
391
d187038c 392static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 393
50c81227 394// -1 is error, 0 is timeout, 1 is success
3ddb9247 395int asendtcp(const string& data, Socket* sock)
5c633640
BH
396{
397 PacketID pident;
398 pident.sock=sock;
399 pident.outMSG=data;
3ddb9247 400
bb4bdbaf 401 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
50c81227 402 string packet;
5c633640 403
5b0ddd18 404 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
23db0a09 405
9170fbaf 406 if(!ret || ret==-1) { // timeout
bb4bdbaf 407 t_fdm->removeWriteFD(sock->getHandle());
5c633640 408 }
50c81227
BH
409 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
410 return -1;
411 }
9170fbaf 412 return ret;
5c633640
BH
413}
414
d187038c 415static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 416
9170fbaf 417// -1 is error, 0 is timeout, 1 is success
a683e8bd 418int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
288f4aa9 419{
50c81227 420 data.clear();
5c633640
BH
421 PacketID pident;
422 pident.sock=sock;
423 pident.inNeeded=len;
825fa717 424 pident.inIncompleteOkay=incompleteOkay;
bb4bdbaf 425 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
5c633640 426
bb4bdbaf 427 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
9170fbaf 428 if(!ret || ret==-1) { // timeout
bb4bdbaf 429 t_fdm->removeReadFD(sock->getHandle());
288f4aa9 430 }
50c81227
BH
431 else if(data.empty()) {// error, EOF or other
432 return -1;
433 }
434
9170fbaf 435 return ret;
288f4aa9
BH
436}
437
d187038c 438static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
4465e941 439{
fba1e944 440 PacketID pident=*any_cast<PacketID>(&var);
4465e941 441 char resp[512];
7c77ce63
RG
442 ComboAddress fromaddr;
443 socklen_t addrlen=sizeof(fromaddr);
444
445 ssize_t ret=recvfrom(fd, resp, sizeof(resp), 0, (sockaddr *)&fromaddr, &addrlen);
446 if (fromaddr != pident.remote) {
e6a9dde5 447 g_log<<Logger::Notice<<"Response received from the wrong remote host ("<<fromaddr.toStringWithPort()<<" instead of "<<pident.remote.toStringWithPort()<<"), discarding"<<endl;
7c77ce63
RG
448
449 }
450
4465e941 451 t_fdm->removeReadFD(fd);
452 if(ret >= 0) {
a683e8bd 453 string data(resp, (size_t) ret);
fba1e944 454 MT->sendEvent(pident, &data);
4465e941 455 }
456 else {
fba1e944 457 string empty;
458 MT->sendEvent(pident, &empty);
04360367 459 // cerr<<"Had some kind of error: "<<ret<<", "<<stringerror()<<endl;
4465e941 460 }
461}
fba1e944 462string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
4465e941 463{
4465e941 464 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
465 s.setNonBlocking();
466 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
467
468 s.bind(local);
469 s.connect(dest);
4465e941 470 s.send(query);
471
472 PacketID pident;
473 pident.sock=&s;
7c77ce63 474 pident.remote=dest;
4465e941 475 pident.type=0;
fba1e944 476 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
4465e941 477
478 string data;
fba1e944 479
4465e941 480 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
fba1e944 481
4465e941 482 if(!ret || ret==-1) { // timeout
4465e941 483 t_fdm->removeReadFD(s.getHandle());
484 }
485 else if(data.empty()) {// error, EOF or other
fba1e944 486 // we could special case this
4465e941 487 return data;
488 }
4465e941 489 return data;
490}
491
d7dae798 492//! pick a random query local address
1652a63e 493ComboAddress getQueryLocalAddress(int family, uint16_t port)
5a38281c 494{
1652a63e 495 ComboAddress ret;
5a38281c 496 if(family==AF_INET) {
3ddb9247 497 if(g_localQueryAddresses4.empty())
1652a63e 498 ret = g_local4;
3ddb9247 499 else
1652a63e
BH
500 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
501 ret.sin4.sin_port = htons(port);
5a38281c
BH
502 }
503 else {
504 if(g_localQueryAddresses6.empty())
1652a63e
BH
505 ret = g_local6;
506 else
507 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
3ddb9247 508
1652a63e 509 ret.sin6.sin6_port = htons(port);
5a38281c 510 }
1652a63e 511 return ret;
5a38281c 512}
4ef015cd 513
d187038c 514static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
09e6702a 515
d187038c 516static void setSocketBuffer(int fd, int optname, uint32_t size)
d7dae798
BH
517{
518 uint32_t psize=0;
519 socklen_t len=sizeof(psize);
3ddb9247 520
d7dae798 521 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
e6a9dde5 522 g_log<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
3ddb9247 523 return;
d7dae798
BH
524 }
525
a2a81d42
OM
526 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0) {
527 int err = errno;
296ddbfe 528 g_log << Logger::Error << "Unable to raise socket buffer size to " << size << ": " << stringerror(err) << endl;
a2a81d42 529 }
d7dae798
BH
530}
531
532
533static void setSocketReceiveBuffer(int fd, uint32_t size)
534{
535 setSocketBuffer(fd, SO_RCVBUF, size);
536}
537
538static void setSocketSendBuffer(int fd, uint32_t size)
539{
540 setSocketBuffer(fd, SO_SNDBUF, size);
541}
542
543
4ef015cd
BH
544// you can ask this class for a UDP socket to send a query from
545// this socket is not yours, don't even think about deleting it
546// but after you call 'returnSocket' on it, don't assume anything anymore
547class UDPClientSocks
548{
4ef015cd 549 unsigned int d_numsocks;
4ef015cd 550public:
e2642526 551 UDPClientSocks() : d_numsocks(0)
4ef015cd
BH
552 {
553 }
554
2ee280cf 555 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
d8f6d49f 556 int getSocket(const ComboAddress& toaddr, int* fd)
4ef015cd 557 {
d8f6d49f
BH
558 *fd=makeClientSocket(toaddr.sin4.sin_family);
559 if(*fd < 0) // temporary error - receive exception otherwise
2ee280cf 560 return -2;
d8f6d49f
BH
561
562 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
563 int err = errno;
a7b68ae7
RG
564 try {
565 closesocket(*fd);
566 }
567 catch(const PDNSException& e) {
e6a9dde5 568 g_log<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
a7b68ae7
RG
569 }
570
d8f6d49f 571 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
4957a608 572 return -2;
998a4334 573 return -1;
d8f6d49f 574 }
998a4334 575
998a4334 576 d_numsocks++;
d8f6d49f 577 return 0;
4ef015cd
BH
578 }
579
580 // return a socket to the pool, or simply erase it
2bee9b7c 581 void returnSocket(int fd)
4ef015cd 582 {
80baf329 583 try {
2bee9b7c 584 t_fdm->removeReadFD(fd);
80baf329 585 }
2bee9b7c 586 catch(const FDMultiplexerException& e) {
bb4bdbaf 587 // we sometimes return a socket that has not yet been assigned to t_fdm
80baf329 588 }
2bee9b7c 589
a7b68ae7 590 try {
2bee9b7c 591 closesocket(fd);
a7b68ae7
RG
592 }
593 catch(const PDNSException& e) {
e6a9dde5 594 g_log<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
a7b68ae7 595 }
3ddb9247 596
998a4334 597 --d_numsocks;
4ef015cd 598 }
d8f6d49f 599
2bee9b7c
RG
600private:
601
d8f6d49f 602 // returns -1 for errors which might go away, throws for ones that won't
bb4bdbaf 603 static int makeClientSocket(int family)
d8f6d49f 604 {
a683e8bd 605 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
42c235e5 606
d8f6d49f
BH
607 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
608 return ret;
3ddb9247
PD
609
610 if(ret<0)
335da0ba 611 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
36855b53 612
7eb73ffa 613 // setCloseOnExec(ret); // we're not going to exec
5a38281c 614
d8f6d49f 615 int tries=10;
3aa91c3e 616 ComboAddress sin;
d8f6d49f 617 while(--tries) {
1652a63e 618 uint16_t port;
3ddb9247 619
d8f6d49f 620 if(tries==1) // fall back to kernel 'random'
4957a608 621 port = 0;
bf6f28ca
CHB
622 else {
623 do {
624 port = s_minUdpSourcePort + dns_random(s_maxUdpSourcePort - s_minUdpSourcePort + 1);
625 }
626 while (s_avoidUdpSourcePorts.count(port));
627 }
5a38281c 628
3aa91c3e 629 sin=getQueryLocalAddress(family, port); // does htons for us
5a38281c 630
3ddb9247 631 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
4957a608 632 break;
d8f6d49f 633 }
9ec48f21
RG
634
635 if(!tries) {
636 closesocket(ret);
3aa91c3e 637 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
9ec48f21
RG
638 }
639
640 try {
641 setReceiveSocketErrors(ret, family);
642 setNonBlocking(ret);
643 }
644 catch(...) {
645 closesocket(ret);
646 throw;
647 }
3ddb9247 648
d8f6d49f
BH
649 return ret;
650 }
49a699c4
BH
651};
652
f26bf547 653static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
4ef015cd 654
288f4aa9 655/* these two functions are used by LWRes */
34801ab1 656// -2 is OS error, -1 is error that depends on the remote, > 0 is success
a683e8bd 657int asendto(const char *data, size_t len, int flags,
3ddb9247 658 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
288f4aa9 659{
34801ab1
BH
660
661 PacketID pident;
787e5eab
BH
662 pident.domain = domain;
663 pident.remote = toaddr;
664 pident.type = qtype;
34801ab1
BH
665
666 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
667 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
668
669 for(; chain.first != chain.second; chain.first++) {
670 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
e27e91a8 671 /*
4665c31e
BH
672 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
673 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
4957a608 674 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
e27e91a8 675 */
34801ab1
BH
676 chain.first->key.chain.insert(id); // we can chain
677 *fd=-1; // gets used in waitEvent / sendEvent later on
678 return 1;
679 }
680 }
681
49a699c4 682 int ret=t_udpclientsocks->getSocket(toaddr, fd);
d8f6d49f
BH
683 if(ret < 0)
684 return ret;
34801ab1 685
998a4334
BH
686 pident.fd=*fd;
687 pident.id=id;
3ddb9247 688
bb4bdbaf
BH
689 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
690 ret = send(*fd, data, len, 0);
691
5b0ddd18 692 int tmp = errno;
bb4bdbaf 693
7302ed0a 694 if(ret < 0)
49a699c4 695 t_udpclientsocks->returnSocket(*fd);
bb4bdbaf 696
5b0ddd18 697 errno = tmp; // this is for logging purposes only
7302ed0a 698 return ret;
288f4aa9
BH
699}
700
9170fbaf 701// -1 is error, 0 is timeout, 1 is success
f128d20d 702int arecvfrom(std::string& packet, int flags, const ComboAddress& fromaddr, size_t *d_len,
c5c066bf 703 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
288f4aa9 704{
0d5f0a9f 705 static optional<unsigned int> nearMissLimit;
3ddb9247 706 if(!nearMissLimit)
0d5f0a9f
BH
707 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
708
288f4aa9 709 PacketID pident;
4ef015cd 710 pident.fd=fd;
288f4aa9 711 pident.id=id;
0d5f0a9f 712 pident.domain=domain;
787e5eab 713 pident.type = qtype;
996c89cc 714 pident.remote=fromaddr;
b636533b 715
5b0ddd18 716 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
34801ab1 717
9ec48f21 718 /* -1 means error, 0 means timeout, 1 means a result from handleUDPServerResponse() which might still be an error */
9170fbaf 719 if(ret > 0) {
9ec48f21 720 /* handleUDPServerResponse() will close the socket for us no matter what */
996c89cc 721 if(packet.empty()) // means "error"
3ddb9247 722 return -1;
998a4334 723
a683e8bd 724 *d_len=packet.size();
f128d20d 725
0d5f0a9f 726 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
e6a9dde5 727 g_log<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
0d5f0a9f 728 g_stats.spoofCount++;
35ce8576
BH
729 return -1;
730 }
288f4aa9 731 }
09e6702a 732 else {
9ec48f21 733 /* getting there means error or timeout, it's up to us to close the socket */
34801ab1 734 if(fd >= 0)
49a699c4 735 t_udpclientsocks->returnSocket(fd);
09e6702a 736 }
9170fbaf 737 return ret;
288f4aa9
BH
738}
739
88def049
BH
740static void writePid(void)
741{
191f2e47 742 if(!::arg().mustDo("write-pid"))
743 return;
18e7758c 744 ofstream of(s_pidfname.c_str(), std::ios_base::app);
88def049 745 if(of)
705f31ae 746 of<< Utility::getpid() <<endl;
a2a81d42
OM
747 else {
748 int err = errno;
749 g_log << Logger::Error << "Writing pid for " << Utility::getpid() << " to " << s_pidfname << " failed: "
296ddbfe 750 << stringerror(err) << endl;
a2a81d42 751 }
88def049
BH
752}
753
c51c551e
OM
754uint16_t TCPConnection::s_maxInFlight;
755
2749c3fe 756TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : data(2, 0), d_remote(addr), d_fd(fd)
3ddb9247
PD
757{
758 ++s_currentConnections;
cd989c87 759 (*t_tcpClientCounts)[d_remote]++;
0e408828 760}
cd989c87
BH
761
762TCPConnection::~TCPConnection()
0e408828 763{
a7b68ae7
RG
764 try {
765 if(closesocket(d_fd) < 0)
e6a9dde5 766 g_log<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
a7b68ae7
RG
767 }
768 catch(const PDNSException& e) {
e6a9dde5 769 g_log<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
a7b68ae7
RG
770 }
771
3ddb9247 772 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
cd989c87 773 t_tcpClientCounts->erase(d_remote);
1bc9e6bd 774 --s_currentConnections;
0e408828 775}
0e9d9ce2 776
3ddb9247 777AtomicCounter TCPConnection::s_currentConnections;
d187038c
RG
778
779static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
6dcd28c3 780
92011b8f 781// the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
d187038c 782static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
2cc13433 783{
92011b8f 784 if(packetsize > 1000 && t_largeanswerremotes)
785 t_largeanswerremotes->push_back(remote);
2cc13433
BH
786 switch(res) {
787 case RCode::ServFail:
92011b8f 788 if(t_servfailremotes) {
789 t_servfailremotes->push_back(remote);
5af86fdc 790 if(query && t_servfailqueryring) // packet cache
92011b8f 791 t_servfailqueryring->push_back(make_pair(*query, qtype));
792 }
2cc13433
BH
793 g_stats.servFails++;
794 break;
795 case RCode::NXDomain:
796 g_stats.nxDomains++;
797 break;
798 case RCode::NoError:
799 g_stats.noErrors++;
800 break;
801 }
802}
803
9a864da4 804static string makeLoginfo(const std::unique_ptr<DNSComboWriter>& dc)
a903b39c 805try
806{
5cc8371b 807 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->getRemote())+")";
a903b39c 808}
809catch(...)
810{
811 return "Exception making error message for exception";
812}
813
aa7929a3 814#ifdef HAVE_PROTOBUF
b502d522 815static void protobufLogQuery(uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::unordered_set<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId, const std::string& deviceName)
aa7929a3 816{
b773359c
RG
817 if (!t_protobufServers) {
818 return;
819 }
820
e1c8a4bb 821 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
79816288 822 ComboAddress requestor = requestorNM.getMaskedNetwork();
5d2e9a83 823 requestor.setPort(remote.getPort());
e1c8a4bb 824 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
c165308b 825 message.setServerIdentity(SyncRes::s_serverID);
d14121a8 826 message.setEDNSSubnet(ednssubnet, ednssubnet.isIPv4() ? maskV4 : maskV6);
67e31ebe 827 message.setRequestorId(requestorId);
590388d2 828 message.setDeviceId(deviceId);
0a6a45c8 829 message.setDeviceName(deviceName);
02b47f43 830
02b47f43 831 if (!policyTags.empty()) {
d9d3f9c1 832 message.setPolicyTags(policyTags);
02b47f43 833 }
aa7929a3 834
d9d3f9c1 835// cerr <<message.toDebugString()<<endl;
aa7929a3 836 std::string str;
d9d3f9c1 837 message.serialize(str);
b773359c
RG
838
839 for (auto& server : *t_protobufServers) {
840 server->queueData(str);
841 }
aa7929a3
RG
842}
843
b773359c 844static void protobufLogResponse(const RecProtoBufMessage& message)
aa7929a3 845{
b773359c
RG
846 if (!t_protobufServers) {
847 return;
848 }
849
d9d3f9c1 850// cerr <<message.toDebugString()<<endl;
aa7929a3 851 std::string str;
d9d3f9c1 852 message.serialize(str);
b773359c
RG
853
854 for (auto& server : *t_protobufServers) {
855 server->queueData(str);
856 }
aa7929a3
RG
857}
858#endif
859
53508135
PL
860/**
861 * Chases the CNAME provided by the PolicyCustom RPZ policy.
862 *
863 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
864 * @param qtype: The QType of the original query
865 * @param sr: A SyncRes
866 * @param res: An integer that will contain the RCODE of the lookup we do
867 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
868 */
d187038c 869static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
53508135
PL
870{
871 if (spoofed.d_type == QType::CNAME) {
30ee601a
RG
872 bool oldWantsRPZ = sr.getWantsRPZ();
873 sr.setWantsRPZ(false);
53508135 874 vector<DNSRecord> ans;
6da513b2 875 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, QClass::IN, ans);
53508135
PL
876 for (const auto& rec : ans) {
877 if(rec.d_place == DNSResourceRecord::ANSWER) {
878 ret.push_back(rec);
879 }
880 }
881 // Reset the RPZ state of the SyncRes
30ee601a 882 sr.setWantsRPZ(oldWantsRPZ);
53508135
PL
883 }
884}
885
70fb28d9 886static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, uint32_t ttlCap, const uint16_t maxAnswerSize)
97c6d7e5 887{
70fb28d9 888 pw.startRecord(rec.d_name, rec.d_type, (rec.d_ttl > ttlCap ? ttlCap : rec.d_ttl), rec.d_class, rec.d_place);
97c6d7e5
RG
889
890 if(rec.d_type != QType::OPT) // their TTL ain't real
891 minTTL = min(minTTL, rec.d_ttl);
892
893 rec.d_content->toPacket(pw);
894 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
895 pw.rollback();
896 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
897 pw.getHeader()->tc=1;
898 pw.truncate();
899 }
900 return false;
901 }
902
903 return true;
904}
905
63341e8d 906#ifdef HAVE_PROTOBUF
3fe06137 907static std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> startProtobufServers(const ProtobufExportConfig& config)
63341e8d 908{
3fe06137 909 auto result = std::make_shared<std::vector<std::unique_ptr<RemoteLogger>>>();
b773359c
RG
910
911 for (const auto& server : config.servers) {
912 try {
5d6c7a46
RG
913 auto logger = make_unique<RemoteLogger>(server, config.timeout, 100*config.maxQueuedEntries, config.reconnectWaitTime, config.asyncConnect);
914 logger->setLogQueries(config.logQueries);
915 logger->setLogResponses(config.logResponses);
916 result->emplace_back(std::move(logger));
b773359c
RG
917 }
918 catch(const std::exception& e) {
919 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.what()<<endl;
920 }
921 catch(const PDNSException& e) {
922 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.reason<<endl;
923 }
63341e8d
RG
924 }
925
926 return result;
927}
928
929static bool checkProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
930{
931 if (!luaconfsLocal->protobufExportConfig.enabled) {
b773359c
RG
932 if (t_protobufServers) {
933 for (auto& server : *t_protobufServers) {
934 server->stop();
935 }
936 t_protobufServers.reset();
63341e8d
RG
937 }
938
939 return false;
940 }
941
942 /* if the server was not running, or if it was running according to a
943 previous configuration */
b773359c
RG
944 if (!t_protobufServers ||
945 t_protobufServersGeneration < luaconfsLocal->generation) {
63341e8d 946
b773359c
RG
947 if (t_protobufServers) {
948 for (auto& server : *t_protobufServers) {
949 server->stop();
950 }
63341e8d 951 }
b773359c 952 t_protobufServers.reset();
63341e8d 953
b773359c
RG
954 t_protobufServers = startProtobufServers(luaconfsLocal->protobufExportConfig);
955 t_protobufServersGeneration = luaconfsLocal->generation;
63341e8d
RG
956 }
957
958 return true;
959}
960
961static bool checkOutgoingProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
962{
963 if (!luaconfsLocal->outgoingProtobufExportConfig.enabled) {
b773359c
RG
964 if (t_outgoingProtobufServers) {
965 for (auto& server : *t_outgoingProtobufServers) {
966 server->stop();
967 }
63341e8d 968 }
b773359c 969 t_outgoingProtobufServers.reset();
63341e8d
RG
970
971 return false;
972 }
973
974 /* if the server was not running, or if it was running according to a
975 previous configuration */
b773359c
RG
976 if (!t_outgoingProtobufServers ||
977 t_outgoingProtobufServersGeneration < luaconfsLocal->generation) {
63341e8d 978
b773359c
RG
979 if (t_outgoingProtobufServers) {
980 for (auto& server : *t_outgoingProtobufServers) {
981 server->stop();
982 }
63341e8d 983 }
b773359c 984 t_outgoingProtobufServers.reset();
63341e8d 985
b773359c
RG
986 t_outgoingProtobufServers = startProtobufServers(luaconfsLocal->outgoingProtobufExportConfig);
987 t_outgoingProtobufServersGeneration = luaconfsLocal->generation;
63341e8d
RG
988 }
989
990 return true;
991}
b9fa43e0
OM
992
993#ifdef HAVE_FSTRM
994
10ba6d01 995static std::shared_ptr<std::vector<std::unique_ptr<FrameStreamLogger>>> startFrameStreamServers(const FrameStreamExportConfig& config)
b9fa43e0 996{
10ba6d01 997 auto result = std::make_shared<std::vector<std::unique_ptr<FrameStreamLogger>>>();
b9fa43e0
OM
998
999 for (const auto& server : config.servers) {
1000 try {
573f4ff0
OM
1001 std::unordered_map<string,unsigned> options;
1002 options["bufferHint"] = config.bufferHint;
1003 options["flushTimeout"] = config.flushTimeout;
1004 options["inputQueueSize"] = config.inputQueueSize;
1005 options["outputQueueSize"] = config.outputQueueSize;
1006 options["queueNotifyThreshold"] = config.queueNotifyThreshold;
1007 options["reopenInterval"] = config.reopenInterval;
dea8a6bc
OM
1008 FrameStreamLogger *fsl = nullptr;
1009 try {
1010 ComboAddress address(server);
1011 fsl = new FrameStreamLogger(address.sin4.sin_family, address.toStringWithPort(), true, options);
1012 }
1013 catch (const PDNSException& e) {
1014 fsl = new FrameStreamLogger(AF_UNIX, server, true, options);
1015 }
573f4ff0
OM
1016 fsl->setLogQueries(config.logQueries);
1017 fsl->setLogResponses(config.logResponses);
1018 result->emplace_back(fsl);
b9fa43e0
OM
1019 }
1020 catch(const std::exception& e) {
1021 g_log<<Logger::Error<<"Error while starting dnstap framestream logger to '"<<server<<": "<<e.what()<<endl;
1022 }
1023 catch(const PDNSException& e) {
1024 g_log<<Logger::Error<<"Error while starting dnstap framestream logger to '"<<server<<": "<<e.reason<<endl;
1025 }
1026 }
1027
1028 return result;
1029}
1030
1031static bool checkFrameStreamExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
1032{
1033 if (!luaconfsLocal->frameStreamExportConfig.enabled) {
1034 if (t_frameStreamServers) {
1035 // dt's take care of cleanup
1036 t_frameStreamServers.reset();
1037 }
1038
1039 return false;
1040 }
1041
1042 /* if the server was not running, or if it was running according to a
1043 previous configuration */
1044 if (!t_frameStreamServers ||
1045 t_frameStreamServersGeneration < luaconfsLocal->generation) {
1046
1047 if (t_frameStreamServers) {
1048 // dt's take care of cleanup
1049 t_frameStreamServers.reset();
1050 }
1051
1052 t_frameStreamServers = startFrameStreamServers(luaconfsLocal->frameStreamExportConfig);
1053 t_frameStreamServersGeneration = luaconfsLocal->generation;
1054 }
1055
1056 return true;
1057}
1058#endif /* HAVE_FSTRM */
63341e8d
RG
1059#endif /* HAVE_PROTOBUF */
1060
af1377b7 1061#ifdef NOD_ENABLED
41c542ec 1062static bool nodCheckNewDomain(const DNSName& dname)
af1377b7
NC
1063{
1064 static const QType qt(QType::A);
1065 static const uint16_t qc(QClass::IN);
41c542ec 1066 bool ret = false;
af1377b7
NC
1067 // First check the (sub)domain isn't whitelisted for NOD purposes
1068 if (!g_nodDomainWL.check(dname)) {
ef2ea4bf 1069 // Now check the NODDB (note this is probabilistic so can have FNs/FPs)
af1377b7
NC
1070 if (t_nodDBp && t_nodDBp->isNewDomain(dname)) {
1071 if (g_nodLog) {
1072 // This should probably log to a dedicated log file
1073 g_log<<Logger::Notice<<"Newly observed domain nod="<<dname.toLogString()<<endl;
1074 }
1075 if (!(g_nodLookupDomain.isRoot())) {
1076 // Send a DNS A query to <domain>.g_nodLookupDomain
1077 DNSName qname = dname;
1078 vector<DNSRecord> dummy;
1079 qname += g_nodLookupDomain;
1080 directResolve(qname, qt, qc, dummy);
1081 }
41c542ec 1082 ret = true;
af1377b7
NC
1083 }
1084 }
41c542ec 1085 return ret;
af1377b7
NC
1086}
1087
41c542ec
NC
1088static bool udrCheckUniqueDNSRecord(const DNSName& dname, uint16_t qtype, const DNSRecord& record)
1089{
1090 bool ret = false;
1091 if (record.d_place == DNSResourceRecord::ANSWER ||
1092 record.d_place == DNSResourceRecord::ADDITIONAL) {
1093 // Create a string that represent a triplet of (qname, qtype and RR[type, name, content])
1094 std::stringstream ss;
1095 ss << dname.toDNSStringLC() << ":" << qtype << ":" << qtype << ":" << record.d_type << ":" << record.d_name.toDNSStringLC() << ":" << record.d_content->getZoneRepresentation();
1096 if (t_udrDBp && t_udrDBp->isUniqueResponse(ss.str())) {
ff4d391d
NC
1097 if (g_udrLog) {
1098 // This should also probably log to a dedicated file.
1099 g_log<<Logger::Notice<<"Unique response observed: qname="<<dname.toLogString()<<" qtype="<<QType(qtype).getName()<< " rrtype=" << QType(record.d_type).getName() << " rrname=" << record.d_name.toLogString() << " rrcontent=" << record.d_content->getZoneRepresentation() << endl;
41c542ec
NC
1100 }
1101 ret = true;
1102 }
1103 }
1104 return ret;
1105}
af1377b7
NC
1106#endif /* NOD_ENABLED */
1107
37a919d4
RG
1108int followCNAMERecords(vector<DNSRecord>& ret, const QType& qtype)
1109{
1110 vector<DNSRecord> resolved;
1111 DNSName target;
1112 for(const DNSRecord& rr : ret) {
1113 if(rr.d_type == QType::CNAME) {
1114 auto rec = getRR<CNAMERecordContent>(rr);
1115 if(rec) {
1116 target=rec->getTarget();
1117 break;
1118 }
1119 }
1120 }
1121
1122 if(target.empty()) {
1123 return 0;
1124 }
1125
1126 int rcode = directResolve(target, qtype, QClass::IN, resolved);
1127
1128 for(DNSRecord& rr : resolved) {
1129 ret.push_back(std::move(rr));
1130 }
1131 return rcode;
1132}
1133
ef3ee606
RG
1134int getFakeAAAARecords(const DNSName& qname, ComboAddress prefix, vector<DNSRecord>& ret)
1135{
1136 int rcode = directResolve(qname, QType(QType::A), QClass::IN, ret);
1137
1138 // Remove double CNAME records
1139 std::set<DNSName> seenCNAMEs;
1140 ret.erase(std::remove_if(
1141 ret.begin(),
1142 ret.end(),
1143 [&seenCNAMEs](DNSRecord& rr) {
1144 if (rr.d_type == QType::CNAME) {
1145 auto target = getRR<CNAMERecordContent>(rr);
1146 if (target == nullptr) {
1147 return false;
1148 }
1149 if (seenCNAMEs.count(target->getTarget()) > 0) {
1150 // We've had this CNAME before, remove it
1151 return true;
1152 }
1153 seenCNAMEs.insert(target->getTarget());
1154 }
1155 return false;
1156 }),
1157 ret.end());
1158
1159 bool seenA = false;
1160 for (DNSRecord& rr : ret) {
1161 if (rr.d_type == QType::A && rr.d_place == DNSResourceRecord::ANSWER) {
1162 if (auto rec = getRR<ARecordContent>(rr)) {
1163 ComboAddress ipv4(rec->getCA());
75e31a0b 1164 memcpy(&prefix.sin6.sin6_addr.s6_addr[12], &ipv4.sin4.sin_addr.s_addr, sizeof(ipv4.sin4.sin_addr.s_addr));
ef3ee606
RG
1165 rr.d_content = std::make_shared<AAAARecordContent>(prefix);
1166 rr.d_type = QType::AAAA;
1167 }
1168 seenA = true;
1169 }
1170 }
1171
1172 if (seenA) {
1173 // We've seen an A in the ANSWER section, so there is no need to keep any
1174 // SOA in the AUTHORITY section as this is not a NODATA response.
1175 ret.erase(std::remove_if(
1176 ret.begin(),
1177 ret.end(),
1178 [](DNSRecord& rr) {
1179 return (rr.d_type == QType::SOA && rr.d_place == DNSResourceRecord::AUTHORITY);
1180 }),
1181 ret.end());
1182 }
1183 return rcode;
1184}
1185
1186int getFakePTRRecords(const DNSName& qname, vector<DNSRecord>& ret)
1187{
1188 /* qname has a reverse ordered IPv6 address, need to extract the underlying IPv4 address from it
1189 and turn it into an IPv4 in-addr.arpa query */
1190 ret.clear();
1191 vector<string> parts = qname.getRawLabels();
1192
1193 if (parts.size() < 8) {
1194 return -1;
1195 }
1196
1197 string newquery;
1198 for (int n = 0; n < 4; ++n) {
1199 newquery +=
1200 std::to_string(stoll(parts[n*2], 0, 16) + 16*stoll(parts[n*2+1], 0, 16));
1201 newquery.append(1, '.');
1202 }
1203 newquery += "in-addr.arpa.";
1204
1205 DNSRecord rr;
1206 rr.d_name = qname;
1207 rr.d_type = QType::CNAME;
1208 rr.d_content = std::make_shared<CNAMERecordContent>(newquery);
1209 ret.push_back(rr);
1210
1211 int rcode = directResolve(DNSName(newquery), QType(QType::PTR), QClass::IN, ret);
1212
1213 return rcode;
1214}
1215
b502d522
RG
1216enum class PolicyResult : uint8_t { NoAction, HaveAnswer, Drop };
1217
1218static PolicyResult handlePolicyHit(const DNSFilterEngine::Policy& appliedPolicy, const std::unique_ptr<DNSComboWriter>& dc, SyncRes& sr, int& res, vector<DNSRecord>& ret, DNSPacketWriter& pw)
1219{
3dd06bc7
RG
1220 /* don't account truncate actions for TCP queries, since they are not applied */
1221 if (appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::Truncate || !dc->d_tcp) {
1222 ++g_stats.policyResults[appliedPolicy.d_kind];
1223 }
b502d522
RG
1224
1225 switch (appliedPolicy.d_kind) {
1226
1227 case DNSFilterEngine::PolicyKind::NoAction:
1228 return PolicyResult::NoAction;
1229
1230 case DNSFilterEngine::PolicyKind::Drop:
1231 ++g_stats.policyDrops;
1232 return PolicyResult::Drop;
1233
1234 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1235 ret.clear();
1236 res = RCode::NXDomain;
1237 return PolicyResult::HaveAnswer;
1238
1239 case DNSFilterEngine::PolicyKind::NODATA:
1240 ret.clear();
1241 res = RCode::NoError;
1242 return PolicyResult::HaveAnswer;
1243
1244 case DNSFilterEngine::PolicyKind::Truncate:
1245 if (!dc->d_tcp) {
1246 ret.clear();
1247 res = RCode::NoError;
1248 pw.getHeader()->tc = 1;
1249 return PolicyResult::HaveAnswer;
1250 }
1251 return PolicyResult::NoAction;
1252
1253 case DNSFilterEngine::PolicyKind::Custom:
1254 ret.clear();
1255 res = RCode::NoError;
1256 {
1257 auto spoofed = appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1258 for (auto& dr : spoofed) {
1259 ret.push_back(dr);
1260 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1261 }
1262 }
1263 return PolicyResult::HaveAnswer;
1264 }
1265
1266 return PolicyResult::NoAction;
1267}
1268
d187038c 1269static void startDoResolve(void *p)
288f4aa9 1270{
9a864da4 1271 auto dc=std::unique_ptr<DNSComboWriter>(reinterpret_cast<DNSComboWriter*>(p));
288f4aa9 1272 try {
5af86fdc
RG
1273 if (t_queryring)
1274 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
92011b8f 1275
32015748 1276 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
7f7b8d55 1277 EDNSOpts edo;
5164bac3 1278 std::vector<pair<uint16_t, string> > ednsOpts;
eb9444be 1279 bool variableAnswer = dc->d_variable;
8e079f3a 1280 bool haveEDNS=false;
ca2526f5
NC
1281#ifdef NOD_ENABLED
1282 bool hasUDR = false;
1283#endif /* NOD_ENABLED */
f1db0de2
PL
1284 DNSPacketWriter::optvect_t returnedEdnsOptions; // Here we stuff all the options for the return packet
1285 uint8_t ednsExtRCode = 0;
8e079f3a 1286 if(getEDNSOpts(dc->d_mdp, &edo)) {
f1db0de2
PL
1287 haveEDNS=true;
1288 if (edo.d_version != 0) {
1289 ednsExtRCode = ERCode::BADVERS;
1290 }
1291
32015748
RG
1292 if(!dc->d_tcp) {
1293 /* rfc6891 6.2.3:
1294 "Values lower than 512 MUST be treated as equal to 512."
1295 */
1296 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
1297 }
5164bac3 1298 ednsOpts = edo.d_options;
3af35968 1299 maxanswersize -= 11; // EDNS header size
b40562da 1300
1f691b94
PL
1301 for (const auto& o : edo.d_options) {
1302 if (o.first == EDNSOptionCode::ECS && g_useIncomingECS && !dc->d_ecsParsed) {
1303 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
1304 } else if (o.first == EDNSOptionCode::NSID) {
f1db0de2 1305 const static string mode_server_id = ::arg()["server-id"];
8a42919a
PL
1306 if(mode_server_id != "disabled" && !mode_server_id.empty() &&
1307 maxanswersize > (2 + 2 + mode_server_id.size())) {
f1db0de2
PL
1308 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::NSID, mode_server_id));
1309 variableAnswer = true; // Can't packetcache an answer with NSID
1310 // Option Code and Option Length are both 2
1311 maxanswersize -= 2 + 2 + mode_server_id.size();
1312 }
b40562da
RG
1313 }
1314 }
10321a98 1315 }
b40562da
RG
1316 /* perhaps there was no EDNS or no ECS but by now we looked */
1317 dc->d_ecsParsed = true;
e325f20c 1318 vector<DNSRecord> ret;
ea634573 1319 vector<uint8_t> packet;
b23b8614 1320
ad42489c 1321 auto luaconfsLocal = g_luaconfs.getLocal();
b8470add
PL
1322 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
1323 bool wantsRPZ(true);
1fbc6dc5 1324 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
aa7929a3 1325#ifdef HAVE_PROTOBUF
63341e8d 1326 if (checkProtobufExport(luaconfsLocal)) {
5cc8371b 1327 Netmask requestorNM(dc->d_source, dc->d_source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
5d2e9a83
RG
1328 ComboAddress requestor = requestorNM.getMaskedNetwork();
1329 requestor.setPort(dc->d_source.getPort());
0bd2e252 1330 pbMessage = RecProtoBufMessage(RecProtoBufMessage::Response, dc->d_uuid, &requestor, &dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass, dc->d_mdp.d_header.id, dc->d_tcp, 0);
c165308b 1331 pbMessage->setServerIdentity(SyncRes::s_serverID);
d14121a8 1332 pbMessage->setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIPv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
d9d3f9c1
RG
1333 }
1334#endif /* HAVE_PROTOBUF */
ad42489c 1335
b9fa43e0
OM
1336#ifdef HAVE_FSTRM
1337 checkFrameStreamExport(luaconfsLocal);
1338#endif
1339
3ddb9247 1340 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
ea634573
BH
1341
1342 pw.getHeader()->aa=0;
1343 pw.getHeader()->ra=1;
c154c8a4 1344 pw.getHeader()->qr=1;
bb4bdbaf 1345 pw.getHeader()->tc=0;
ea634573 1346 pw.getHeader()->id=dc->d_mdp.d_header.id;
10321a98 1347 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
57769f13 1348 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
ea634573 1349
70fb28d9
RG
1350 /* This is the lowest TTL seen in the records of the response,
1351 so we can't cache it for longer than this value.
1352 If we have a TTL cap, this value can't be larger than the
1353 cap no matter what. */
1354 uint32_t minTTL = dc->d_ttlCap;
904d3219
PD
1355
1356 SyncRes sr(dc->d_now);
37a919d4 1357 sr.setId(MT->getTid());
0c43f455 1358
2e921ec6 1359 bool DNSSECOK=false;
3457a2a0 1360 if(t_pdl) {
f26bf547 1361 sr.setLuaEngine(t_pdl);
3457a2a0 1362 }
9eec8c98 1363 if(g_dnssecmode != DNSSECMode::Off) {
30ee601a 1364 sr.setDoDNSSEC(true);
9eec8c98
PL
1365
1366 // Does the requestor want DNSSEC records?
d6c335ab 1367 if(edo.d_extFlags & EDNSOpts::DNSSECOK) {
9eec8c98
PL
1368 DNSSECOK=true;
1369 g_stats.dnssecQueries++;
1370 }
88c33dca
RG
1371 if (dc->d_mdp.d_header.cd) {
1372 /* Per rfc6840 section 5.9, "When processing a request with
1373 the Checking Disabled (CD) bit set, a resolver SHOULD attempt
1374 to return all response data, even data that has failed DNSSEC
1375 validation. */
1376 ++g_stats.dnssecCheckDisabledQueries;
1377 }
1378 if (dc->d_mdp.d_header.ad) {
1379 /* Per rfc6840 section 5.7, "the AD bit in a query as a signal
1380 indicating that the requester understands and is interested in the
1381 value of the AD bit in the response. This allows a requester to
1382 indicate that it understands the AD bit without also requesting
1383 DNSSEC data via the DO bit. */
1384 ++g_stats.dnssecAuthenticDataQueries;
1385 }
9eec8c98
PL
1386 } else {
1387 // Ignore the client-set CD flag
1388 pw.getHeader()->cd=0;
5b9853c9 1389 }
0c43f455
RG
1390 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
1391
4898a348 1392#ifdef HAVE_PROTOBUF
30ee601a 1393 sr.setInitialRequestId(dc->d_uuid);
b773359c 1394 sr.setOutgoingProtobufServers(t_outgoingProtobufServers);
4898a348 1395#endif
b9fa43e0
OM
1396#ifdef HAVE_FSTRM
1397 sr.setFrameStreamServers(t_frameStreamServers);
1398#endif
2fe3354d 1399 sr.setQuerySource(dc->d_remote, g_useIncomingECS && !dc->d_ednssubnet.source.empty() ? boost::optional<const EDNSSubnetOpts&>(dc->d_ednssubnet) : boost::none);
57769f13 1400
904d3219 1401 bool tracedQuery=false; // we could consider letting Lua know about this too
9fc36e90 1402 bool shouldNotValidate = false;
904d3219 1403
ef3b6cd7
RG
1404 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
1405 int res = RCode::NoError;
37a919d4 1406
1f1ca368 1407 DNSFilterEngine::Policy appliedPolicy;
406b722e 1408 RecursorLua4::DNSQuestion dq(dc->d_source, dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ, dc->d_logResponse);
d6c335ab 1409 dq.ednsFlags = &edo.d_extFlags;
5164bac3 1410 dq.ednsOptions = &ednsOpts;
6e505c5e
RG
1411 dq.tag = dc->d_tag;
1412 dq.discardedPolicies = &sr.d_discardedPolicies;
1413 dq.policyTags = &dc->d_policyTags;
1414 dq.appliedPolicy = &appliedPolicy;
1415 dq.currentRecords = &ret;
1416 dq.dh = &dc->d_mdp.d_header;
05c74122 1417 dq.data = dc->d_data;
67e31ebe
RG
1418#ifdef HAVE_PROTOBUF
1419 dq.requestorId = dc->d_requestorId;
590388d2 1420 dq.deviceId = dc->d_deviceId;
0a6a45c8 1421 dq.deviceName = dc->d_deviceName;
67e31ebe 1422#endif
38d8b937 1423 dq.proxyProtocolValues = &dc->d_proxyProtocolValues;
ba21fcfe 1424
6cf96227
PL
1425 if(ednsExtRCode != 0) {
1426 goto sendit;
1427 }
1428
e661a20b 1429 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
56b4d21b
PD
1430 pw.getHeader()->tc = 1;
1431 res = 0;
1432 variableAnswer = true;
e661a20b
PD
1433 goto sendit;
1434 }
1435
f26bf547 1436 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
77499b05
BH
1437 sr.setLogMode(SyncRes::Store);
1438 tracedQuery=true;
1439 }
3ddb9247 1440
976ec823 1441 if(!g_quiet || tracedQuery) {
e6a9dde5 1442 g_log<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
976ec823 1443 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
b40562da 1444 if(!dc->d_ednssubnet.source.empty()) {
e6a9dde5 1445 g_log<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
6e986f5e 1446 }
e6a9dde5 1447 g_log<<endl;
976ec823 1448 }
c75a6a9e 1449
37a919d4 1450 if(!dc->d_mdp.d_header.rd) {
c836dc19 1451 sr.setCacheOnly();
37a919d4
RG
1452 }
1453
1454 if (dc->d_rcode != boost::none) {
1455 /* we have a response ready to go, most likely from gettag_ffi */
1456 ret = std::move(dc->d_records);
1457 res = *dc->d_rcode;
1458 if (res == RCode::NoError && dc->d_followCNAMERecords) {
1459 res = followCNAMERecords(ret, QType(dc->d_mdp.d_qtype));
1460 }
1461 goto haveAnswer;
1462 }
c836dc19 1463
f26bf547
RG
1464 if (t_pdl) {
1465 t_pdl->prerpz(dq, res);
0a273054
RG
1466 }
1467
db486de5 1468 // Check if the query has a policy attached to it
e37e5795 1469 if (wantsRPZ && (appliedPolicy.d_type == DNSFilterEngine::PolicyType::None || appliedPolicy.d_kind == DNSFilterEngine::PolicyKind::NoAction)) {
b502d522
RG
1470 if (luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_source, sr.d_discardedPolicies, appliedPolicy)) {
1471 mergePolicyTags(dc->d_policyTags, appliedPolicy.getTags());
1472 }
0a273054 1473 }
644dd1da 1474
54be222b 1475 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
ef3ee606
RG
1476 if (!t_pdl || !t_pdl->preresolve(dq, res)) {
1477
1478 if (!g_dns64PrefixReverse.empty() && dq.qtype == QType::PTR && dq.qname.isPartOf(g_dns64PrefixReverse)) {
1479 res = getFakePTRRecords(dq.qname, ret);
1480 goto haveAnswer;
1481 }
b8470add 1482
30ee601a 1483 sr.setWantsRPZ(wantsRPZ);
b502d522
RG
1484 if (wantsRPZ && appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::NoAction) {
1485 auto policyResult = handlePolicyHit(appliedPolicy, dc, sr, res, ret, pw);
1486 if (policyResult == PolicyResult::HaveAnswer) {
1487 goto haveAnswer;
1488 }
1489 else if (policyResult == PolicyResult::Drop) {
1490 return;
b8470add 1491 }
db486de5
PL
1492 }
1493
b8470add 1494 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
44971ca0 1495 try {
124dd1d4 1496 sr.d_appliedPolicy = appliedPolicy;
b502d522 1497 sr.d_policyTags = std::move(dc->d_policyTags);
d6fd3cb8 1498
163ed916
OM
1499 if (!dc->d_routingTag.empty()) {
1500 sr.d_routingTag = dc->d_routingTag;
d6fd3cb8
OM
1501 }
1502
44971ca0 1503 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
9fc36e90 1504 shouldNotValidate = sr.wasOutOfBand();
44971ca0 1505 }
124dd1d4
RG
1506 catch(const ImmediateServFailException &e) {
1507 if(g_logCommonErrors) {
e6a9dde5 1508 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
124dd1d4 1509 }
44971ca0
PD
1510 res = RCode::ServFail;
1511 }
124dd1d4
RG
1512 catch(const PolicyHitException& e) {
1513 res = -2;
1514 }
1921a4c2 1515 dq.validationState = sr.getValidationState();
2996400c 1516 appliedPolicy = sr.d_appliedPolicy;
b502d522 1517 dc->d_policyTags = std::move(sr.d_policyTags);
1921a4c2 1518
b8470add
PL
1519 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
1520 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
b502d522
RG
1521 if (appliedPolicy.d_kind == DNSFilterEngine::PolicyKind::NoAction) {
1522 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
1523 }
1524 auto policyResult = handlePolicyHit(appliedPolicy, dc, sr, res, ret, pw);
1525 if (policyResult == PolicyResult::HaveAnswer) {
1526 goto haveAnswer;
1527 }
1528 else if (policyResult == PolicyResult::Drop) {
1529 return;
b8470add
PL
1530 }
1531 }
1532
e37e5795 1533 if (wantsRPZ && (appliedPolicy.d_type == DNSFilterEngine::PolicyType::None || appliedPolicy.d_kind == DNSFilterEngine::PolicyKind::NoAction)) {
b502d522
RG
1534 if (luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies, appliedPolicy)) {
1535 mergePolicyTags(dc->d_policyTags, appliedPolicy.getTags());
1536 }
b8470add 1537 }
db486de5 1538
75e31a0b 1539 if (t_pdl || (g_dns64Prefix && dq.qtype == QType::AAAA && dq.validationState != Bogus)) {
ef3ee606
RG
1540 if (res == RCode::NoError) {
1541 auto i = ret.cbegin();
1542 for(; i!= ret.cend(); ++i) {
1543 if (i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER) {
1544 break;
1545 }
1546 }
1547
1548 if (i == ret.cend()) {
1549 /* no record in the answer section, NODATA */
1550 if (t_pdl && t_pdl->nodata(dq, res)) {
1551 shouldNotValidate = true;
1552 }
1553 else if (g_dns64Prefix && dq.qtype == QType::AAAA && dq.validationState != Bogus) {
1554 res = getFakeAAAARecords(dq.qname, *g_dns64Prefix, ret);
1555 shouldNotValidate = true;
1556 }
1557 }
3ca4e735 1558
db486de5 1559 }
ef3ee606 1560 else if(res == RCode::NXDomain && t_pdl && t_pdl->nxdomain(dq, res)) {
3ca4e735 1561 shouldNotValidate = true;
ef3ee606 1562 }
db486de5 1563
ef3ee606 1564 if (t_pdl && t_pdl->postresolve(dq, res)) {
3ca4e735 1565 shouldNotValidate = true;
ef3ee606 1566 }
db486de5
PL
1567 }
1568
b8470add 1569 if (wantsRPZ) { //XXX This block is repeated, see above
b502d522
RG
1570
1571 auto policyResult = handlePolicyHit(appliedPolicy, dc, sr, res, ret, pw);
1572 if (policyResult == PolicyResult::HaveAnswer) {
1573 goto haveAnswer;
1574 }
1575 else if (policyResult == PolicyResult::Drop) {
1576 return;
b8470add 1577 }
644dd1da 1578 }
4485aa35 1579 }
644dd1da 1580 haveAnswer:;
3e8216c8 1581 if(res == PolicyDecision::DROP) {
e9c2ad3a 1582 g_stats.policyDrops++;
ae7e77ad 1583 return;
3ddb9247 1584 }
9cdfab64 1585 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1dc8f4d0 1586 {
85ffbc53
PD
1587 string trace(sr.getTrace());
1588 if(!trace.empty()) {
1589 vector<string> lines;
1590 boost::split(lines, trace, boost::is_any_of("\n"));
1dc8f4d0 1591 for(const string& line : lines) {
85ffbc53 1592 if(!line.empty())
e6a9dde5 1593 g_log<<Logger::Warning<< line << endl;
85ffbc53
PD
1594 }
1595 }
1596 }
3ddb9247 1597
9cdfab64 1598 if(res == -1) {
0fe1d080
PD
1599 pw.getHeader()->rcode=RCode::ServFail;
1600 // no commit here, because no record
1601 g_stats.servFails++;
1602 }
288f4aa9 1603 else {
ea634573 1604 pw.getHeader()->rcode=res;
92011b8f 1605
f3fe4ae6 1606 // Does the validation mode or query demand validation?
0c43f455 1607 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
b25cae9a 1608 try {
f3fe4ae6 1609 if(sr.doLog()) {
e6a9dde5 1610 g_log<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<endl;
2e921ec6 1611 }
4d2be65d
RG
1612
1613 auto state = sr.getValidationState();
1614
b25cae9a 1615 if(state == Secure) {
2e921ec6 1616 if(sr.doLog()) {
e6a9dde5 1617 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates correctly"<<endl;
2e921ec6 1618 }
b25cae9a 1619
1620 // Is the query source interested in the value of the ad-bit?
885c8881 1621 if (dc->d_mdp.d_header.ad || DNSSECOK)
b25cae9a 1622 pw.getHeader()->ad=1;
1623 }
1624 else if(state == Insecure) {
f3fe4ae6 1625 if(sr.doLog()) {
e6a9dde5 1626 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Insecure"<<endl;
12ce523e 1627 }
b25cae9a 1628
1629 pw.getHeader()->ad=0;
f3fe4ae6 1630 }
b25cae9a 1631 else if(state == Bogus) {
66f2e6ad
KM
1632 if(t_bogusremotes)
1633 t_bogusremotes->push_back(dc->d_source);
1634 if(t_bogusqueryring)
1635 t_bogusqueryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
c87e1876 1636 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
e6a9dde5 1637 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Bogus"<<endl;
b25cae9a 1638 }
1639
1640 // Does the query or validation mode sending out a SERVFAIL on validation errors?
885c8881 1641 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
b25cae9a 1642 if(sr.doLog()) {
e6a9dde5 1643 g_log<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
b25cae9a 1644 }
1645
1646 pw.getHeader()->rcode=RCode::ServFail;
1647 goto sendit;
1648 } else {
1649 if(sr.doLog()) {
e6a9dde5 1650 g_log<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
b25cae9a 1651 }
1652 }
1653 }
1654 }
124dd1d4 1655 catch(const ImmediateServFailException &e) {
b25cae9a 1656 if(g_logCommonErrors)
e6a9dde5 1657 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
b25cae9a 1658 pw.getHeader()->rcode=RCode::ServFail;
1659 goto sendit;
f3fe4ae6 1660 }
b3f0ed10 1661 }
1662
c154c8a4 1663 if(ret.size()) {
bbec1961 1664 pdns::orderAndShuffle(ret);
5cc8371b 1665 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_source)) {
20d84f77 1666 stable_sort(ret.begin(), ret.end(), *sl);
3e61e7f7 1667 variableAnswer=true;
1668 }
8e079f3a 1669 }
0afa32d4
RG
1670
1671 bool needCommit = false;
8e079f3a 1672 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
3e80ebce
KM
1673 if( ! DNSSECOK &&
1674 ( i->d_type == QType::NSEC3 ||
1675 (
1676 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1677 (
1678 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1679 i->d_place != DNSResourceRecord::ANSWER
1680 )
1681 )
1682 )
1683 ) {
2e921ec6 1684 continue;
3e80ebce
KM
1685 }
1686
70fb28d9 1687 if (!addRecordToPacket(pw, *i, minTTL, dc->d_ttlCap, maxanswersize)) {
97c6d7e5
RG
1688 needCommit = false;
1689 break;
1690 }
1691 needCommit = true;
1692
41c542ec
NC
1693#ifdef NOD_ENABLED
1694 bool udr = false;
1695 if (g_udrEnabled) {
1696 udr = udrCheckUniqueDNSRecord(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, *i);
ca2526f5
NC
1697 if (!hasUDR && udr)
1698 hasUDR = true;
41c542ec
NC
1699 }
1700#endif /* NOD ENABLED */
1701
aa7929a3 1702#ifdef HAVE_PROTOBUF
b773359c 1703 if (t_protobufServers) {
41c542ec
NC
1704#ifdef NOD_ENABLED
1705 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes, udr);
1706#else
0bd2e252 1707 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes);
41c542ec 1708#endif /* NOD_ENABLED */
aa7929a3
RG
1709 }
1710#endif
ea634573 1711 }
0afa32d4 1712 if(needCommit)
8e079f3a 1713 pw.commit();
288f4aa9 1714 }
10321a98 1715 sendit:;
b3f0ed10 1716
a0ddd130 1717 if(g_useIncomingECS && dc->d_ecsFound && !sr.wasVariable() && !variableAnswer) {
9837850d 1718 // cerr<<"Stuffing in a 0 scope because answer is static"<<endl;
5a7f99b4 1719 EDNSSubnetOpts eo;
1720 eo.source = dc->d_ednssubnet.source;
1721 ComboAddress sa;
1ef18cab 1722 sa.reset();
5a7f99b4 1723 sa.sin4.sin_family = eo.source.getNetwork().sin4.sin_family;
1724 eo.scope = Netmask(sa, 0);
1725
1726 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::ECS, makeEDNSSubnetOptsString(eo)));
1727 }
1728
97c6d7e5
RG
1729 if (haveEDNS) {
1730 /* we try to add the EDNS OPT RR even for truncated answers,
1731 as rfc6891 states:
1732 "The minimal response MUST be the DNS header, question section, and an
1733 OPT record. This MUST also occur when a truncated response (using
1734 the DNS header's TC bit) is returned."
1735 */
9b60fb71 1736 pw.addOpt(512, ednsExtRCode, DNSSECOK ? EDNSOpts::DNSSECOK : 0, returnedEdnsOptions);
1f691b94 1737 pw.commit();
97c6d7e5
RG
1738 }
1739
79332bff 1740 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
5cc8371b 1741 updateResponseStats(res, dc->d_source, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
ff4d391d
NC
1742#ifdef NOD_ENABLED
1743 bool nod = false;
1744 if (g_nodEnabled) {
1745 if (nodCheckNewDomain(dc->d_mdp.d_qname))
1746 nod = true;
1747 }
1748#endif /* NOD_ENABLED */
aa7929a3 1749#ifdef HAVE_PROTOBUF
b502d522 1750 if (t_protobufServers && !(luaconfsLocal->protobufExportConfig.taggedOnly && appliedPolicy.getName().empty() && dc->d_policyTags.empty())) {
d362f7c1
RG
1751 pbMessage->setBytes(packet.size());
1752 pbMessage->setResponseCode(pw.getHeader()->rcode);
b502d522
RG
1753 if (!appliedPolicy.getName().empty()) {
1754 pbMessage->setAppliedPolicy(appliedPolicy.getName());
d362f7c1 1755 pbMessage->setAppliedPolicyType(appliedPolicy.d_type);
0a273054 1756 }
d362f7c1 1757 pbMessage->setPolicyTags(dc->d_policyTags);
c29d820c
RG
1758 if (g_useKernelTimestamp && dc->d_kernelTimestamp.tv_sec) {
1759 pbMessage->setQueryTime(dc->d_kernelTimestamp.tv_sec, dc->d_kernelTimestamp.tv_usec);
1760 }
1761 else {
1762 pbMessage->setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1763 }
d362f7c1
RG
1764 pbMessage->setRequestorId(dq.requestorId);
1765 pbMessage->setDeviceId(dq.deviceId);
0a6a45c8 1766 pbMessage->setDeviceName(dq.deviceName);
41c542ec
NC
1767#ifdef NOD_ENABLED
1768 if (g_nodEnabled) {
ca2526f5 1769 if (nod) {
41c542ec 1770 pbMessage->setNOD(true);
ca2526f5
NC
1771 pbMessage->addPolicyTag(g_nod_pbtag);
1772 }
1773 if (hasUDR) {
1774 pbMessage->addPolicyTag(g_udr_pbtag);
1775 }
41c542ec
NC
1776 }
1777#endif /* NOD_ENABLED */
406b722e
RG
1778 if (dc->d_logResponse) {
1779 protobufLogResponse(*pbMessage);
1780 }
ac238ea7 1781#ifdef NOD_ENABLED
ca2526f5
NC
1782 if (g_nodEnabled) {
1783 pbMessage->setNOD(false);
1784 pbMessage->clearUDR();
1785 if (nod)
1786 pbMessage->removePolicyTag(g_nod_pbtag);
1787 if (hasUDR)
1788 pbMessage->removePolicyTag(g_udr_pbtag);
1789 }
ac238ea7 1790#endif /* NOD_ENABLED */
aa7929a3
RG
1791 }
1792#endif
ea634573 1793 if(!dc->d_tcp) {
b71b60ee 1794 struct msghdr msgh;
1795 struct iovec iov;
7bec330a
OM
1796 cmsgbuf_aligned cbuf;
1797 fillMSGHdr(&msgh, &iov, &cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
2c0af54f
PD
1798 msgh.msg_control=NULL;
1799
cbc03320 1800 if(g_fromtosockets.count(dc->d_socket)) {
4272d071 1801 addCMsgSrcAddr(&msgh, &cbuf, &dc->d_local, 0);
2c0af54f 1802 }
a2a81d42
OM
1803 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors) {
1804 int err = errno;
1805 g_log << Logger::Warning << "Sending UDP reply to client " << dc->getRemote() << " failed with: "
1806 << strerror(err) << endl;
1807 }
70fb28d9 1808
49dc532e 1809 if(variableAnswer || sr.wasVariable()) {
1ef18cab 1810 g_stats.variableResponses++;
49dc532e 1811 }
3762e821 1812 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
b5e675a7 1813 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, std::move(dc->d_query), dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
76e2b9e3 1814 string((const char*)&*packet.begin(), packet.size()),
3ddb9247 1815 g_now.tv_sec,
76e2b9e3 1816 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
d9d3f9c1 1817 min(minTTL,SyncRes::s_packetcachettl),
88694a6a 1818 dq.validationState,
08b02366
RG
1819 dc->d_ecsBegin,
1820 dc->d_ecsEnd,
4b0bdd5f 1821 std::move(pbMessage));
1051f8a9 1822 }
3762e821 1823 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
feccc9fc 1824 }
9c495589
BH
1825 else {
1826 char buf[2];
ea634573
BH
1827 buf[0]=packet.size()/256;
1828 buf[1]=packet.size()%256;
feccc9fc 1829
c038218b 1830 Utility::iovec iov[2];
feccc9fc 1831
ea634573
BH
1832 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1833 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
feccc9fc 1834
dd079764 1835 int wret=Utility::writev(dc->d_socket, iov, 2);
0e9d9ce2 1836 bool hadError=true;
feccc9fc 1837
dd079764 1838 if(wret == 0)
e6a9dde5 1839 g_log<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
a2a81d42
OM
1840 else if(wret < 0 ) {
1841 int err = errno;
1842 g_log << Logger::Error << "Error writing TCP answer to " << dc->getRemote() << ": " << strerror(err) << endl;
1843 } else if((unsigned int)wret != 2 + packet.size())
e6a9dde5 1844 g_log<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
0e9d9ce2 1845 else
18af64a8 1846 hadError=false;
3ddb9247 1847
b5b94beb 1848 // update tcp connection status, closing if needed and doing the fd multiplexer accounting
c51c551e
OM
1849 if (dc->d_tcpConnection->d_requestsInFlight > 0) {
1850 dc->d_tcpConnection->d_requestsInFlight--;
1851 }
3ddb9247 1852
b5b94beb
OM
1853 // In the code below, we try to remove the fd from the set, but
1854 // we don't know if another mthread already did the remove, so we can get a
1855 // "Tried to remove unlisted fd" exception. Not that an inflight < limit test
1856 // will not work since we do not know if the other mthread got an error or not.
09e6702a 1857 if(hadError) {
b5b94beb
OM
1858 try {
1859 t_fdm->removeReadFD(dc->d_socket);
1860 }
1861 catch (FDMultiplexerException &) {
1862 }
c36bc97a 1863 dc->d_socket = -1;
09e6702a 1864 }
a6ae6414 1865 else {
fde296a3
RG
1866 dc->d_tcpConnection->queriesCount++;
1867 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
b5b94beb
OM
1868 try {
1869 t_fdm->removeReadFD(dc->d_socket);
1870 }
1871 catch (FDMultiplexerException &) {
1872 }
fde296a3
RG
1873 dc->d_socket = -1;
1874 }
1875 else {
fde296a3 1876 Utility::gettimeofday(&g_now, 0); // needs to be updated
27ae2e3c 1877 struct timeval ttd = g_now;
c51c551e
OM
1878 // If we cross from max to max-1 in flight requests, the fd was not listened to, add it back
1879 if (dc->d_tcpConnection->d_requestsInFlight == TCPConnection::s_maxInFlight - 1) {
3cabb750
OM
1880 // A read error might have happened. If we add the fd back, it will most likely error again.
1881 // This is not a big issue, the next handleTCPClientReadable() will see another read error
1882 // and take action.
d5c6ec95
OM
1883 ttd.tv_sec += g_tcpTimeout;
1884 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection, &ttd);
1885 } else {
3cabb750
OM
1886 // fd might have been removed by read error code, so expect an exception
1887 try {
1888 t_fdm->setReadTTD(dc->d_socket, ttd, g_tcpTimeout);
1889 }
1890 catch (FDMultiplexerException &) {
1891 }
d5c6ec95 1892 }
fde296a3 1893 }
0e9d9ce2 1894 }
9c495589 1895 }
2c9119cd 1896 float spent=makeFloat(sr.getNow()-dc->d_now);
1d5b3ce6 1897 if(!g_quiet) {
e6a9dde5
PL
1898 g_log<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1899 g_log<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
2c9119cd 1900 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
1901 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
1902
1903 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
e6a9dde5 1904 g_log<< ", dnssec="<<vStates[sr.getValidationState()];
2c9119cd 1905 }
1906
e6a9dde5 1907 g_log<<endl;
2c9119cd 1908
c75a6a9e 1909 }
b23b8614 1910
f7b8cffa 1911 if (sr.d_outqueries || sr.d_authzonequeries) {
a7956123 1912 s_RC->cacheMisses++;
f7b8cffa
RG
1913 }
1914 else {
a7956123 1915 s_RC->cacheHits++;
f7b8cffa 1916 }
2c9119cd 1917
fe213470
BH
1918 if(spent < 0.001)
1919 g_stats.answers0_1++;
1920 else if(spent < 0.010)
1921 g_stats.answers1_10++;
1922 else if(spent < 0.1)
1923 g_stats.answers10_100++;
1924 else if(spent < 1.0)
1925 g_stats.answers100_1000++;
1926 else
1927 g_stats.answersSlow++;
1928
574af7ea 1929 uint64_t newLat=(uint64_t)(spent*1000000);
b841314c 1930 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
08f3f638 1931 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
0a6b1027 1932 // no worries, we do this for packet cache hits elsewhere
19178da9 1933
1934 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
1935 if(ourtime < 1)
1936 g_stats.ourtime0_1++;
1937 else if(ourtime < 2)
1938 g_stats.ourtime1_2++;
1939 else if(ourtime < 4)
1940 g_stats.ourtime2_4++;
1941 else if(ourtime < 8)
1942 g_stats.ourtime4_8++;
1943 else if(ourtime < 16)
1944 g_stats.ourtime8_16++;
1945 else if(ourtime < 32)
1946 g_stats.ourtime16_32++;
1947 else {
1948 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1949 g_stats.ourtimeSlow++;
1950 }
042da1a1 1951 if(ourtime >= 0.0) {
1952 newLat=ourtime*1000; // usec
1953 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
1954 }
c6d04bdc 1955 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
288f4aa9 1956 }
3f81d239 1957 catch(PDNSException &ae) {
e6a9dde5 1958 g_log<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
288f4aa9 1959 }
16ce7f18
JS
1960 catch(const MOADNSException &mde) {
1961 g_log<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<mde.what()<<endl;
7b1469bb 1962 }
fdbf35ac 1963 catch(std::exception& e) {
e6a9dde5 1964 g_log<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
068c7634
PD
1965
1966 // Luawrapper nests the exception from Lua, so we unnest it here
1967 try {
1968 std::rethrow_if_nested(e);
2010ac95 1969 } catch(const std::exception& ne) {
e6a9dde5 1970 g_log<<". Extra info: "<<ne.what();
068c7634
PD
1971 } catch(...) {}
1972
e6a9dde5 1973 g_log<<endl;
c154c8a4 1974 }
288f4aa9 1975 catch(...) {
e6a9dde5 1976 g_log<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
288f4aa9 1977 }
3ddb9247 1978
ec6eacbc 1979 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
288f4aa9
BH
1980}
1981
d187038c 1982static void makeControlChannelSocket(int processNum=-1)
1d5b3ce6 1983{
2d733c0f 1984 string sockname=::arg()["socket-dir"]+"/"+s_programname;
677e2a46 1985 if(processNum >= 0)
335da0ba 1986 sockname += "."+std::to_string(processNum);
677e2a46 1987 sockname+=".controlsocket";
41f7a068 1988 s_rcc.listen(sockname);
3ddb9247 1989
387de317
BH
1990 int sockowner = -1;
1991 int sockgroup = -1;
1992
1993 if (!::arg().isEmpty("socket-group"))
1994 sockgroup=::arg().asGid("socket-group");
1995 if (!::arg().isEmpty("socket-owner"))
1996 sockowner=::arg().asUid("socket-owner");
3ddb9247 1997
f838ad8d
BH
1998 if (sockgroup > -1 || sockowner > -1) {
1999 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
2000 unixDie("Failed to chown control socket");
2001 }
2002 }
387de317
BH
2003
2004 // do mode change if socket-mode is given
2005 if(!::arg().isEmpty("socket-mode")) {
2006 mode_t sockmode=::arg().asMode("socket-mode");
34c513f9
RG
2007 if(chmod(sockname.c_str(), sockmode) < 0) {
2008 unixDie("Failed to chmod control socket");
2009 }
387de317 2010 }
1d5b3ce6
BH
2011}
2012
5cc8371b 2013static void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass,
29e6303a 2014 bool& foundECS, EDNSSubnetOpts* ednssubnet, EDNSOptionViewMap* options,
5cc8371b 2015 bool& foundXPF, ComboAddress* xpfSource, ComboAddress* xpfDest)
02b47f43 2016{
59cb4a79 2017 const bool lookForXPF = xpfSource != nullptr && g_xpfRRCode != 0;
5cc8371b
RG
2018 const bool lookForECS = ednssubnet != nullptr;
2019 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(question.c_str());
02b47f43
RG
2020 size_t questionLen = question.length();
2021 unsigned int consumed=0;
2022 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
2023
2024 size_t pos= sizeof(dnsheader)+consumed+4;
5cc8371b
RG
2025 const size_t headerSize = /* root */ 1 + sizeof(dnsrecordheader);
2026 const uint16_t arcount = ntohs(dh->arcount);
2027
2028 for (uint16_t arpos = 0; arpos < arcount && questionLen > (pos + headerSize) && ((lookForECS && !foundECS) || (lookForXPF && !foundXPF)); arpos++) {
2029 if (question.at(pos) != 0) {
2030 /* not an OPT or a XPF, bye. */
2031 return;
2032 }
2033
2034 pos += 1;
2035 const dnsrecordheader* drh = reinterpret_cast<const dnsrecordheader*>(&question.at(pos));
2036 pos += sizeof(dnsrecordheader);
2037
2038 if (pos >= questionLen) {
2039 return;
2040 }
2041
02b47f43 2042 /* OPT root label (1) followed by type (2) */
5cc8371b 2043 if(lookForECS && ntohs(drh->d_type) == QType::OPT) {
00b8cadc
RG
2044 if (!options) {
2045 char* ecsStart = nullptr;
2046 size_t ecsLen = 0;
5cc8371b
RG
2047 /* we need to pass the record len */
2048 int res = getEDNSOption(const_cast<char*>(reinterpret_cast<const char*>(&question.at(pos - sizeof(drh->d_clen)))), questionLen - pos + sizeof(drh->d_clen), EDNSOptionCode::ECS, &ecsStart, &ecsLen);
00b8cadc
RG
2049 if (res == 0 && ecsLen > 4) {
2050 EDNSSubnetOpts eso;
2051 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
2052 *ednssubnet=eso;
5cc8371b 2053 foundECS = true;
00b8cadc
RG
2054 }
2055 }
2056 }
2057 else {
5cc8371b
RG
2058 /* we need to pass the record len */
2059 int res = getEDNSOptions(reinterpret_cast<const char*>(&question.at(pos -sizeof(drh->d_clen))), questionLen - pos + (sizeof(drh->d_clen)), *options);
00b8cadc
RG
2060 if (res == 0) {
2061 const auto& it = options->find(EDNSOptionCode::ECS);
29e6303a 2062 if (it != options->end() && !it->second.values.empty() && it->second.values.at(0).content != nullptr && it->second.values.at(0).size > 0) {
00b8cadc 2063 EDNSSubnetOpts eso;
29e6303a 2064 if(getEDNSSubnetOptsFromString(it->second.values.at(0).content, it->second.values.at(0).size, &eso)) {
00b8cadc 2065 *ednssubnet=eso;
5cc8371b 2066 foundECS = true;
00b8cadc
RG
2067 }
2068 }
02b47f43
RG
2069 }
2070 }
2071 }
59cb4a79 2072 else if (lookForXPF && ntohs(drh->d_type) == g_xpfRRCode && ntohs(drh->d_class) == QClass::IN && drh->d_ttl == 0) {
5cc8371b
RG
2073 if ((questionLen - pos) < ntohs(drh->d_clen)) {
2074 return;
2075 }
2076
2077 foundXPF = parseXPFPayload(reinterpret_cast<const char*>(&question.at(pos)), ntohs(drh->d_clen), *xpfSource, xpfDest);
2078 }
2079
2080 pos += ntohs(drh->d_clen);
02b47f43
RG
2081 }
2082}
2083
5216ddcc
RG
2084static bool handleTCPReadResult(int fd, ssize_t bytes)
2085{
2086 if (bytes == 0) {
2087 /* EOF */
2088 t_fdm->removeReadFD(fd);
2089 return false;
2090 }
2091 else if (bytes < 0) {
2092 if (errno != EAGAIN && errno != EWOULDBLOCK) {
2093 t_fdm->removeReadFD(fd);
2094 return false;
2095 }
2096 }
2097
2098 return true;
2099}
2100
d187038c 2101static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2102{
cd989c87 2103 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
c038218b 2104
5216ddcc
RG
2105 if (conn->state == TCPConnection::PROXYPROTOCOLHEADER) {
2106 ssize_t bytes = recv(conn->getFD(), &conn->data.at(conn->proxyProtocolGot), conn->proxyProtocolNeed, 0);
2107 if (bytes <= 0) {
2108 handleTCPReadResult(fd, bytes);
2109 return;
2110 }
2111
2112 conn->proxyProtocolGot += bytes;
2113 conn->data.resize(conn->proxyProtocolGot);
2114 ssize_t remaining = isProxyHeaderComplete(conn->data);
2115 if (remaining == 0) {
3bdc4508
RG
2116 if (g_logCommonErrors) {
2117 g_log<<Logger::Error<<"Unable to consume proxy protocol header in packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
2118 }
5216ddcc
RG
2119 ++g_stats.proxyProtocolInvalidCount;
2120 t_fdm->removeReadFD(fd);
2121 return;
2122 }
2123 else if (remaining < 0) {
2124 conn->proxyProtocolNeed = -remaining;
2125 conn->data.resize(conn->proxyProtocolGot + conn->proxyProtocolNeed);
2126 return;
2127 }
2128 else {
2129 /* proxy header received */
2130 /* we ignore the TCP field for now, but we could properly set whether
0dd02171 2131 the connection was received over UDP or TCP if needed */
5216ddcc 2132 bool tcp;
8c73c703 2133 bool proxy = false;
95f851d6
RG
2134 size_t used = parseProxyHeader(conn->data, proxy, conn->d_source, conn->d_destination, tcp, conn->proxyProtocolValues);
2135 if (used <= 0) {
3bdc4508
RG
2136 if (g_logCommonErrors) {
2137 g_log<<Logger::Error<<"Unable to parse proxy protocol header in packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
2138 }
2139 ++g_stats.proxyProtocolInvalidCount;
5216ddcc
RG
2140 t_fdm->removeReadFD(fd);
2141 return;
2142 }
95f851d6
RG
2143 else if (static_cast<size_t>(used) > g_proxyProtocolMaximumSize) {
2144 if (g_logCommonErrors) {
2145 g_log<<Logger::Error<<"Proxy protocol header in packet from TCP client "<< conn->d_remote.toStringWithPort() << " is larger than proxy-protocol-maximum-size (" << used << "), dropping"<< endl;
2146 }
2147 ++g_stats.proxyProtocolInvalidCount;
2148 t_fdm->removeReadFD(fd);
2149 return;
2150 }
38d8b937 2151
a4888b73
RG
2152 /* Now that we have retrieved the address of the client, as advertised by the proxy
2153 via the proxy protocol header, check that it is allowed by our ACL */
8c73c703 2154 /* note that if the proxy header used a 'LOCAL' command, the original source and destination are untouched so everything should be fine */
38d8b937
RG
2155 if (t_allowFrom && !t_allowFrom->match(&conn->d_source)) {
2156 if (!g_quiet) {
2157 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<conn->d_source.toString()<<", address not matched by allow-from"<<endl;
2158 }
2159
2160 ++g_stats.unauthorizedTCP;
2161 t_fdm->removeReadFD(fd);
2162 return;
2163 }
2164
5216ddcc
RG
2165 conn->data.resize(2);
2166 conn->state = TCPConnection::BYTE0;
2167 }
2168 }
2169
2170 if (conn->state==TCPConnection::BYTE0) {
2749c3fe 2171 ssize_t bytes=recv(conn->getFD(), &conn->data[0], 2, 0);
09e6702a 2172 if(bytes==1)
667f7e60 2173 conn->state=TCPConnection::BYTE1;
3ddb9247 2174 if(bytes==2) {
a0aa4f64 2175 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2749c3fe 2176 conn->data.resize(conn->qlen);
667f7e60
BH
2177 conn->bytesread=0;
2178 conn->state=TCPConnection::GETQUESTION;
09e6702a 2179 }
a4888b73 2180 if (bytes <= 0) {
5216ddcc 2181 handleTCPReadResult(fd, bytes);
09e6702a
BH
2182 return;
2183 }
2184 }
5216ddcc
RG
2185
2186 if (conn->state==TCPConnection::BYTE1) {
2749c3fe 2187 ssize_t bytes=recv(conn->getFD(), &conn->data[1], 1, 0);
09e6702a 2188 if(bytes==1) {
667f7e60 2189 conn->state=TCPConnection::GETQUESTION;
a0aa4f64 2190 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2749c3fe 2191 conn->data.resize(conn->qlen);
667f7e60 2192 conn->bytesread=0;
09e6702a 2193 }
a4888b73 2194 if (bytes <= 0) {
5216ddcc
RG
2195 if (!handleTCPReadResult(fd, bytes)) {
2196 if(g_logCommonErrors) {
2197 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected after first byte"<<endl;
2198 }
2199 }
09e6702a
BH
2200 return;
2201 }
2202 }
5216ddcc
RG
2203
2204 if(conn->state==TCPConnection::GETQUESTION) {
2749c3fe 2205 ssize_t bytes=recv(conn->getFD(), &conn->data[conn->bytesread], conn->qlen - conn->bytesread, 0);
5216ddcc
RG
2206 if (bytes <= 0) {
2207 if (!handleTCPReadResult(fd, bytes)) {
2208 if(g_logCommonErrors) {
2209 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected while reading question body"<<endl;
2210 }
2211 }
2212 return;
2213 }
2214 else if (bytes > std::numeric_limits<std::uint16_t>::max()) {
c0f9be19 2215 if(g_logCommonErrors) {
5216ddcc 2216 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" sent an invalid question size while reading question body"<<endl;
c0f9be19 2217 }
bb4bdbaf 2218 t_fdm->removeReadFD(fd);
09e6702a
BH
2219 return;
2220 }
b841314c 2221 conn->bytesread+=(uint16_t)bytes;
667f7e60 2222 if(conn->bytesread==conn->qlen) {
87ff2287 2223 conn->state = TCPConnection::BYTE0;
9a864da4 2224 std::unique_ptr<DNSComboWriter> dc;
09e6702a 2225 try {
9a864da4 2226 dc=std::unique_ptr<DNSComboWriter>(new DNSComboWriter(conn->data, g_now));
09e6702a 2227 }
16ce7f18 2228 catch(const MOADNSException &mde) {
3ddb9247 2229 g_stats.clientParseError++;
4957a608 2230 if(g_logCommonErrors)
e6a9dde5 2231 g_log<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4957a608 2232 return;
09e6702a 2233 }
cd989c87
BH
2234 dc->d_tcpConnection = conn; // carry the torch
2235 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
09e6702a 2236 dc->d_tcp=true;
5cc8371b 2237 dc->setRemote(conn->d_remote);
5216ddcc 2238 dc->setSource(conn->d_source);
a6147cd2 2239 ComboAddress dest;
d38e2ba9 2240 dest.reset();
a6147cd2 2241 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
2242 socklen_t len = dest.getSocklen();
2243 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
2244 dc->setLocal(dest);
5216ddcc 2245 dc->setDestination(conn->d_destination);
f00de7d2
RG
2246 /* we can't move this if we want to be able to access the values in
2247 all queries sent over this connection */
2248 dc->d_proxyProtocolValues = conn->proxyProtocolValues;
33dcceba
RG
2249 DNSName qname;
2250 uint16_t qtype=0;
2251 uint16_t qclass=0;
2252 bool needECS = false;
5cc8371b 2253 bool needXPF = g_XPFAcl.match(conn->d_remote);
67e31ebe 2254 string requestorId;
590388d2 2255 string deviceId;
0a6a45c8 2256 string deviceName;
16bbc6e3 2257 bool logQuery = false;
aa7929a3 2258#ifdef HAVE_PROTOBUF
02b47f43 2259 auto luaconfsLocal = g_luaconfs.getLocal();
63341e8d 2260 if (checkProtobufExport(luaconfsLocal)) {
33dcceba
RG
2261 needECS = true;
2262 }
b773359c 2263 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
406b722e 2264 dc->d_logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
b9fa43e0
OM
2265#endif /* HAVE_PROTOBUF */
2266
2267#ifdef HAVE_FSTRM
2268 checkFrameStreamExport(luaconfsLocal);
33dcceba
RG
2269#endif
2270
70fb28d9 2271 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag_ffi || t_pdl->d_gettag))) {
33dcceba
RG
2272
2273 try {
29e6303a 2274 EDNSOptionViewMap ednsOptions;
5cc8371b 2275 bool xpfFound = false;
b40562da 2276 dc->d_ecsParsed = true;
5cc8371b 2277 dc->d_ecsFound = false;
2749c3fe 2278 getQNameAndSubnet(conn->data, &qname, &qtype, &qclass,
5cc8371b
RG
2279 dc->d_ecsFound, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2280 xpfFound, needXPF ? &dc->d_source : nullptr, needXPF ? &dc->d_destination : nullptr);
02b47f43 2281
70fb28d9 2282 if(t_pdl) {
33dcceba 2283 try {
70fb28d9 2284 if (t_pdl->d_gettag_ffi) {
163ed916 2285 dc->d_tag = t_pdl->gettag_ffi(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_records, dc->d_data, ednsOptions, true, dc->d_proxyProtocolValues, requestorId, deviceId, deviceName, dc->d_routingTag, dc->d_rcode, dc->d_ttlCap, dc->d_variable, logQuery, dc->d_logResponse, dc->d_followCNAMERecords);
70fb28d9
RG
2286 }
2287 else if (t_pdl->d_gettag) {
163ed916 2288 dc->d_tag = t_pdl->gettag(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, deviceName, dc->d_routingTag, dc->d_proxyProtocolValues);
70fb28d9 2289 }
33dcceba 2290 }
70fb28d9 2291 catch(const std::exception& e) {
33dcceba 2292 if(g_logCommonErrors)
e6a9dde5 2293 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
33dcceba
RG
2294 }
2295 }
2296 }
70fb28d9 2297 catch(const std::exception& e)
33dcceba
RG
2298 {
2299 if(g_logCommonErrors)
e6a9dde5 2300 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
33dcceba
RG
2301 }
2302 }
f52177c3
RG
2303
2304 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(&conn->data[0]);
2305
33dcceba 2306#ifdef HAVE_PROTOBUF
b773359c 2307 if(t_protobufServers || t_outgoingProtobufServers) {
67e31ebe 2308 dc->d_requestorId = requestorId;
590388d2 2309 dc->d_deviceId = deviceId;
0a6a45c8 2310 dc->d_deviceName = deviceName;
d61aa945 2311 dc->d_uuid = getUniqueID();
4898a348 2312 }
02b47f43 2313
b773359c 2314 if(t_protobufServers) {
02b47f43 2315 try {
02b47f43 2316
845cbf4c 2317 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && dc->d_policyTags.empty())) {
0a6a45c8 2318 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, dc->d_source, dc->d_destination, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId, dc->d_deviceName);
b790ef3d 2319 }
02b47f43
RG
2320 }
2321 catch(std::exception& e) {
2322 if(g_logCommonErrors)
e6a9dde5 2323 g_log<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
02b47f43
RG
2324 }
2325 }
aa7929a3 2326#endif
5034517a
RG
2327 if(t_pdl) {
2328 if(t_pdl->ipfilter(dc->d_source, dc->d_destination, *dh)) {
2329 if(!g_quiet)
2330 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED TCP question from "<<dc->d_source.toStringWithPort()<<(dc->d_source != dc->d_remote ? " (via "+dc->d_remote.toStringWithPort()+")" : "")<<" based on policy"<<endl;
2331 g_stats.policyDrops++;
2332 return;
2333 }
2334 }
2335
879b3f70 2336 if(dc->d_mdp.d_header.qr) {
048f5db6 2337 g_stats.ignoredCount++;
c0f9be19
RG
2338 if(g_logCommonErrors) {
2339 g_log<<Logger::Error<<"Ignoring answer from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
2340 }
4957a608 2341 return;
879b3f70 2342 }
3abcdab2 2343 if(dc->d_mdp.d_header.opcode) {
048f5db6 2344 g_stats.ignoredCount++;
c0f9be19
RG
2345 if(g_logCommonErrors) {
2346 g_log<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
2347 }
c0f9be19
RG
2348 return;
2349 }
2350 else if (dh->qdcount == 0) {
2351 g_stats.emptyQueriesCount++;
2352 if(g_logCommonErrors) {
2353 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<< dc->getRemote() <<" on server socket!"<<endl;
2354 }
3abcdab2
PD
2355 return;
2356 }
09e6702a 2357 else {
4957a608
BH
2358 ++g_stats.qcounter;
2359 ++g_stats.tcpqcounter;
87ff2287 2360 ++conn->d_requestsInFlight;
c51c551e 2361 if (conn->d_requestsInFlight >= TCPConnection::s_maxInFlight) {
87ff2287
OM
2362 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
2363 } else {
2364 Utility::gettimeofday(&g_now, 0); // needed?
2365 struct timeval ttd = g_now;
2366 t_fdm->setReadTTD(fd, ttd, g_tcpTimeout);
2367 }
d5c6ec95 2368 MT->makeThread(startDoResolve, dc.release()); // deletes dc
4957a608 2369 return;
09e6702a
BH
2370 }
2371 }
2372 }
2373}
2374
5216ddcc
RG
2375static bool expectProxyProtocol(const ComboAddress& from)
2376{
2377 return g_proxyProtocolACL.match(from);
2378}
2379
6dcd28c3 2380//! Handle new incoming TCP connection
d187038c 2381static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
09e6702a 2382{
37d3f960 2383 ComboAddress addr;
09e6702a 2384 socklen_t addrlen=sizeof(addr);
a683e8bd 2385 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
b841314c 2386 if(newsock>=0) {
85c32340
BH
2387 if(MT->numProcesses() > g_maxMThreads) {
2388 g_stats.overCapacityDrops++;
a7b68ae7
RG
2389 try {
2390 closesocket(newsock);
2391 }
2392 catch(const PDNSException& e) {
e6a9dde5 2393 g_log<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
a7b68ae7 2394 }
85c32340
BH
2395 return;
2396 }
2397
38d8b937 2398 if(t_remotes) {
92011b8f 2399 t_remotes->push_back(addr);
38d8b937
RG
2400 }
2401
2402 bool fromProxyProtocolSource = expectProxyProtocol(addr);
2403 if(t_allowFrom && !t_allowFrom->match(&addr) && !fromProxyProtocolSource) {
3ddb9247 2404 if(!g_quiet)
38d8b937 2405 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address neither matched by allow-from nor proxy-protocol-from"<<endl;
2914b022 2406
09e6702a 2407 g_stats.unauthorizedTCP++;
a7b68ae7
RG
2408 try {
2409 closesocket(newsock);
2410 }
2411 catch(const PDNSException& e) {
e6a9dde5 2412 g_log<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
a7b68ae7 2413 }
09e6702a
BH
2414 return;
2415 }
38d8b937 2416
bd0289fc 2417 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
09e6702a 2418 g_stats.tcpClientOverflow++;
a7b68ae7
RG
2419 try {
2420 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
2421 }
2422 catch(const PDNSException& e) {
e6a9dde5 2423 g_log<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
a7b68ae7 2424 }
09e6702a
BH
2425 return;
2426 }
3ddb9247 2427
3897b9e1 2428 setNonBlocking(newsock);
f26bf547 2429 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
5216ddcc
RG
2430 tc->d_source = addr;
2431 tc->d_destination.reset();
2432 tc->d_destination.sin4.sin_family = addr.sin4.sin_family;
2433 socklen_t len = tc->d_destination.getSocklen();
2434 getsockname(tc->getFD(), reinterpret_cast<sockaddr*>(&tc->d_destination), &len); // if this fails, we're ok with it
2435
38d8b937 2436 if (fromProxyProtocolSource) {
5216ddcc
RG
2437 tc->proxyProtocolNeed = s_proxyProtocolMinimumHeaderSize;
2438 tc->data.resize(tc->proxyProtocolNeed);
2439 tc->state = TCPConnection::PROXYPROTOCOLHEADER;
2440 }
2441 else {
2442 tc->state = TCPConnection::BYTE0;
2443 }
3ddb9247 2444
27ae2e3c
RG
2445 struct timeval ttd;
2446 Utility::gettimeofday(&ttd, 0);
2447 ttd.tv_sec += g_tcpTimeout;
c038218b 2448
27ae2e3c 2449 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc, &ttd);
09e6702a
BH
2450 }
2451}
3ddb9247 2452
5216ddcc 2453static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, ComboAddress source, ComboAddress destination, struct timeval tv, int fd, std::vector<ProxyProtocolValue>& proxyProtocolValues)
1bc3c142 2454{
183eb877 2455 gettimeofday(&g_now, 0);
c29d820c
RG
2456 if (tv.tv_sec) {
2457 struct timeval diff = g_now - tv;
2458 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
183eb877 2459
c29d820c
RG
2460 if(delta > 1000.0) {
2461 g_stats.tooOldDrops++;
2462 return nullptr;
2463 }
b71b60ee 2464 }
2465
1bc3c142 2466 ++g_stats.qcounter;
d7f10541
BH
2467 if(fromaddr.sin4.sin_family==AF_INET6)
2468 g_stats.ipv6qcounter++;
1bc3c142
BH
2469
2470 string response;
93f0da94 2471 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
49a3500d 2472 unsigned int ctag=0;
f57486f1 2473 uint32_t qhash = 0;
12aff2e5 2474 bool needECS = false;
5cc8371b 2475 bool needXPF = g_XPFAcl.match(fromaddr);
b502d522 2476 std::unordered_set<std::string> policyTags;
5fd2577f 2477 LuaContext::LuaObject data;
67e31ebe 2478 string requestorId;
590388d2 2479 string deviceId;
0a6a45c8 2480 string deviceName;
163ed916 2481 string routingTag;
16bbc6e3 2482 bool logQuery = false;
406b722e 2483 bool logResponse = false;
12aff2e5 2484#ifdef HAVE_PROTOBUF
02b47f43 2485 boost::uuids::uuid uniqueId;
02b47f43 2486 auto luaconfsLocal = g_luaconfs.getLocal();
63341e8d 2487 if (checkProtobufExport(luaconfsLocal)) {
d61aa945 2488 uniqueId = getUniqueID();
02b47f43 2489 needECS = true;
63341e8d 2490 } else if (checkOutgoingProtobufExport(luaconfsLocal)) {
d61aa945 2491 uniqueId = getUniqueID();
02b47f43 2492 }
b773359c 2493 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
406b722e 2494 logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
b9fa43e0
OM
2495#endif
2496#ifdef HAVE_FSTRM
2497 checkFrameStreamExport(luaconfsLocal);
12aff2e5 2498#endif
b40562da
RG
2499 EDNSSubnetOpts ednssubnet;
2500 bool ecsFound = false;
2501 bool ecsParsed = false;
08b02366
RG
2502 uint16_t ecsBegin = 0;
2503 uint16_t ecsEnd = 0;
37a919d4
RG
2504 std::vector<DNSRecord> records;
2505 boost::optional<int> rcode = boost::none;
70fb28d9
RG
2506 uint32_t ttlCap = std::numeric_limits<uint32_t>::max();
2507 bool variable = false;
37a919d4 2508 bool followCNAMEs = false;
1bc3c142 2509 try {
02b47f43
RG
2510 DNSName qname;
2511 uint16_t qtype=0;
2512 uint16_t qclass=0;
1bc3c142 2513 uint32_t age;
c15ff3df 2514 bool qnameParsed=false;
8f7473d7 2515#ifdef MALLOC_TRACE
2516 /*
2517 static uint64_t last=0;
2518 if(!last)
2519 g_mtracer->clearAllocators();
2520 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
2521 last=g_mtracer->getAllocs();
2522 cout<<g_mtracer->topAllocatorsString()<<endl;
2523 g_mtracer->clearAllocators();
2524 */
2525#endif
55a1378f 2526
70fb28d9 2527 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag || t_pdl->d_gettag_ffi))) {
b2eacd67 2528 try {
29e6303a 2529 EDNSOptionViewMap ednsOptions;
5cc8371b
RG
2530 bool xpfFound = false;
2531
2532 ecsFound = false;
2533
2534 getQNameAndSubnet(question, &qname, &qtype, &qclass,
2535 ecsFound, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2536 xpfFound, needXPF ? &source : nullptr, needXPF ? &destination : nullptr);
2537
c15ff3df
RG
2538 qnameParsed = true;
2539 ecsParsed = true;
12aff2e5 2540
70fb28d9 2541 if(t_pdl) {
12aff2e5 2542 try {
70fb28d9 2543 if (t_pdl->d_gettag_ffi) {
163ed916 2544 ctag = t_pdl->gettag_ffi(source, ednssubnet.source, destination, qname, qtype, &policyTags, records, data, ednsOptions, false, proxyProtocolValues, requestorId, deviceId, deviceName, routingTag, rcode, ttlCap, variable, logQuery, logResponse, followCNAMEs);
70fb28d9
RG
2545 }
2546 else if (t_pdl->d_gettag) {
163ed916 2547 ctag = t_pdl->gettag(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, deviceName, routingTag, proxyProtocolValues);
70fb28d9 2548 }
12aff2e5 2549 }
70fb28d9 2550 catch(const std::exception& e) {
12aff2e5 2551 if(g_logCommonErrors)
e6a9dde5 2552 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 2553 }
8ea8c302 2554 }
b2eacd67 2555 }
70fb28d9 2556 catch(const std::exception& e)
b2eacd67 2557 {
2558 if(g_logCommonErrors)
e6a9dde5 2559 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 2560 }
12ce523e 2561 }
3ddb9247 2562
02b47f43 2563 bool cacheHit = false;
1fbc6dc5 2564 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
02b47f43 2565#ifdef HAVE_PROTOBUF
b773359c 2566 if (t_protobufServers) {
d362f7c1 2567 pbMessage = RecProtoBufMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
c165308b 2568 pbMessage->setServerIdentity(SyncRes::s_serverID);
845cbf4c 2569 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && policyTags.empty())) {
0a6a45c8 2570 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, source, destination, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId, deviceName);
b790ef3d 2571 }
d9d3f9c1
RG
2572 }
2573#endif /* HAVE_PROTOBUF */
02b47f43 2574
70fb28d9
RG
2575 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
2576 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
2577 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
8467ec26 2578 vState valState;
c15ff3df 2579 if (qnameParsed) {
08b02366 2580 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
c15ff3df
RG
2581 }
2582 else {
08b02366 2583 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, &qtype, &qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
c15ff3df
RG
2584 }
2585
d9d3f9c1 2586 if (cacheHit) {
8467ec26
KM
2587 if(valState == Bogus) {
2588 if(t_bogusremotes)
2589 t_bogusremotes->push_back(source);
2590 if(t_bogusqueryring)
2591 t_bogusqueryring->push_back(make_pair(qname, qtype));
2592 }
2593
d9d3f9c1 2594#ifdef HAVE_PROTOBUF
b773359c 2595 if(t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && pbMessage->getAppliedPolicy().empty() && pbMessage->getPolicyTags().empty())) {
5cc8371b 2596 Netmask requestorNM(source, source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
5d2e9a83
RG
2597 ComboAddress requestor = requestorNM.getMaskedNetwork();
2598 requestor.setPort(source.getPort());
d362f7c1 2599 pbMessage->update(uniqueId, &requestor, &destination, false, dh->id);
d14121a8 2600 pbMessage->setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIPv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
c29d820c
RG
2601 if (g_useKernelTimestamp && tv.tv_sec) {
2602 pbMessage->setQueryTime(tv.tv_sec, tv.tv_usec);
2603 }
2604 else {
2605 pbMessage->setQueryTime(g_now.tv_sec, g_now.tv_usec);
2606 }
d362f7c1
RG
2607 pbMessage->setRequestorId(requestorId);
2608 pbMessage->setDeviceId(deviceId);
0a6a45c8 2609 pbMessage->setDeviceName(deviceName);
b773359c 2610 protobufLogResponse(*pbMessage);
02b47f43 2611 }
d9d3f9c1 2612#endif /* HAVE_PROTOBUF */
49a3500d 2613 if(!g_quiet)
e6a9dde5 2614 g_log<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<endl;
8f7473d7 2615
1bc3c142
BH
2616 g_stats.packetCacheHits++;
2617 SyncRes::s_queries++;
2618 ageDNSPacket(response, age);
b71b60ee 2619 struct msghdr msgh;
2620 struct iovec iov;
7bec330a
OM
2621 cmsgbuf_aligned cbuf;
2622 fillMSGHdr(&msgh, &iov, &cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
2c0af54f
PD
2623 msgh.msg_control=NULL;
2624
cbc03320 2625 if(g_fromtosockets.count(fd)) {
7bec330a 2626 addCMsgSrcAddr(&msgh, &cbuf, &destaddr, 0);
b71b60ee 2627 }
a2a81d42
OM
2628 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors) {
2629 int err = errno;
2630 g_log << Logger::Warning << "Sending UDP reply to client " << source.toStringWithPort()
2631 << (source != fromaddr ? " (via " + fromaddr.toStringWithPort() + ")" : "") << " failed with: "
2632 << strerror(err) << endl;
2633 }
97bee66d 2634 if(response.length() >= sizeof(struct dnsheader)) {
dd079764
RG
2635 struct dnsheader tmpdh;
2636 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
5cc8371b 2637 updateResponseStats(tmpdh.rcode, source, response.length(), 0, 0);
97bee66d 2638 }
08f3f638 2639 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
19178da9 2640 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
1bc3c142
BH
2641 return 0;
2642 }
3ddb9247 2643 }
1bc3c142 2644 catch(std::exception& e) {
4b4566e8
RG
2645 if(g_logCommonErrors)
2646 g_log<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1bc3c142
BH
2647 return 0;
2648 }
3ddb9247 2649
f26bf547 2650 if(t_pdl) {
5cc8371b 2651 if(t_pdl->ipfilter(source, destination, *dh)) {
4ea94941 2652 if(!g_quiet)
e6a9dde5 2653 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" based on policy"<<endl;
4ea94941 2654 g_stats.policyDrops++;
2655 return 0;
2656 }
2657 }
2658
1bc3c142 2659 if(MT->numProcesses() > g_maxMThreads) {
461df9d2 2660 if(!g_quiet)
e6a9dde5 2661 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<", over capacity"<<endl;
461df9d2 2662
1bc3c142
BH
2663 g_stats.overCapacityDrops++;
2664 return 0;
2665 }
3ddb9247 2666
37a919d4 2667 auto dc = std::unique_ptr<DNSComboWriter>(new DNSComboWriter(question, g_now, std::move(policyTags), std::move(data), std::move(records)));
1bc3c142 2668 dc->setSocket(fd);
49a3500d 2669 dc->d_tag=ctag;
e9f63d47 2670 dc->d_qhash=qhash;
5cc8371b
RG
2671 dc->setRemote(fromaddr);
2672 dc->setSource(source);
b71b60ee 2673 dc->setLocal(destaddr);
5cc8371b 2674 dc->setDestination(destination);
1bc3c142 2675 dc->d_tcp=false;
b40562da
RG
2676 dc->d_ecsFound = ecsFound;
2677 dc->d_ecsParsed = ecsParsed;
08b02366
RG
2678 dc->d_ecsBegin = ecsBegin;
2679 dc->d_ecsEnd = ecsEnd;
b40562da 2680 dc->d_ednssubnet = ednssubnet;
70fb28d9
RG
2681 dc->d_ttlCap = ttlCap;
2682 dc->d_variable = variable;
37a919d4
RG
2683 dc->d_followCNAMERecords = followCNAMEs;
2684 dc->d_rcode = rcode;
406b722e 2685 dc->d_logResponse = logResponse;
aa7929a3 2686#ifdef HAVE_PROTOBUF
b773359c 2687 if (t_protobufServers || t_outgoingProtobufServers) {
5164bac3 2688 dc->d_uuid = std::move(uniqueId);
d9d3f9c1 2689 }
67e31ebe 2690 dc->d_requestorId = requestorId;
590388d2 2691 dc->d_deviceId = deviceId;
0a6a45c8 2692 dc->d_deviceName = deviceName;
c29d820c 2693 dc->d_kernelTimestamp = tv;
aa7929a3 2694#endif
5216ddcc 2695 dc->d_proxyProtocolValues = std::move(proxyProtocolValues);
1b54dc37 2696 dc->d_routingTag = std::move(routingTag);
aa7929a3 2697
9a864da4 2698 MT->makeThread(startDoResolve, (void*) dc.release()); // deletes dc
1bc3c142 2699 return 0;
3ddb9247
PD
2700}
2701
b71b60ee 2702
d187038c 2703static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
5db529f8 2704{
a683e8bd 2705 ssize_t len;
5216ddcc 2706 static const size_t maxIncomingQuerySize = g_proxyProtocolACL.empty() ? 512 : (512 + g_proxyProtocolMaximumSize);
04896b99 2707 static thread_local std::string data;
5db529f8 2708 ComboAddress fromaddr;
5216ddcc
RG
2709 ComboAddress source;
2710 ComboAddress destination;
b71b60ee 2711 struct msghdr msgh;
2712 struct iovec iov;
7bec330a 2713 cmsgbuf_aligned cbuf;
390f1dab 2714 bool firstQuery = true;
5216ddcc 2715 std::vector<ProxyProtocolValue> proxyProtocolValues;
b71b60ee 2716
c0a00acd 2717 for(size_t queriesCounter = 0; queriesCounter < s_maxUDPQueriesPerRound; queriesCounter++) {
5216ddcc 2718 bool proxyProto = false;
c0a00acd
RG
2719 data.resize(maxIncomingQuerySize);
2720 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
7bec330a 2721 fillMSGHdr(&msgh, &iov, &cbuf, sizeof(cbuf), &data[0], data.size(), &fromaddr);
b71b60ee 2722
c0a00acd 2723 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
390f1dab 2724
c0a00acd 2725 firstQuery = false;
6b8829d5
RG
2726
2727 if (msgh.msg_flags & MSG_TRUNC) {
2728 g_stats.truncatedDrops++;
2729 if (!g_quiet) {
2730 g_log<<Logger::Error<<"Ignoring truncated query from "<<fromaddr.toString()<<endl;
2731 }
2732 return;
2733 }
2734
5216ddcc
RG
2735 data.resize(static_cast<size_t>(len));
2736
2737 if (expectProxyProtocol(fromaddr)) {
2738 bool tcp;
8c73c703 2739 ssize_t used = parseProxyHeader(data, proxyProto, source, destination, tcp, proxyProtocolValues);
5216ddcc
RG
2740 if (used <= 0) {
2741 ++g_stats.proxyProtocolInvalidCount;
2742 if (!g_quiet) {
95f851d6 2743 g_log<<Logger::Error<<"Ignoring invalid proxy protocol ("<<std::to_string(len)<<", "<<std::to_string(used)<<") query from "<<fromaddr.toStringWithPort()<<endl;
5216ddcc
RG
2744 }
2745 return;
2746 }
95f851d6
RG
2747 else if (static_cast<size_t>(used) > g_proxyProtocolMaximumSize) {
2748 if (g_quiet) {
2749 g_log<<Logger::Error<<"Proxy protocol header in UDP packet from "<< fromaddr.toStringWithPort() << " is larger than proxy-protocol-maximum-size (" << used << "), dropping"<< endl;
2750 }
2751 ++g_stats.proxyProtocolInvalidCount;
2752 return;
2753 }
2754
5216ddcc
RG
2755 data.erase(0, used);
2756 }
6b8829d5
RG
2757 else if (len > 512) {
2758 /* we only allow UDP packets larger than 512 for those with a proxy protocol header */
2759 g_stats.truncatedDrops++;
2760 if (!g_quiet) {
95f851d6 2761 g_log<<Logger::Error<<"Ignoring truncated query from "<<fromaddr.toStringWithPort()<<endl;
6b8829d5
RG
2762 }
2763 return;
5216ddcc 2764 }
390f1dab 2765
5216ddcc 2766 if (data.size() < sizeof(dnsheader)) {
c0a00acd
RG
2767 g_stats.ignoredCount++;
2768 if (!g_quiet) {
5216ddcc 2769 g_log<<Logger::Error<<"Ignoring too-short ("<<std::to_string(data.size())<<") query from "<<fromaddr.toString()<<endl;
c0a00acd
RG
2770 }
2771 return;
04896b99 2772 }
04896b99 2773
6b8829d5
RG
2774 if (!proxyProto) {
2775 source = fromaddr;
ba892c7f 2776 }
b23b8614 2777
c0a00acd
RG
2778 if(t_remotes) {
2779 t_remotes->push_back(fromaddr);
2780 }
81859ba5 2781
38d8b937 2782 if(t_allowFrom && !t_allowFrom->match(&source)) {
c0a00acd 2783 if(!g_quiet) {
3bdc4508 2784 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<source.toString()<<", address not matched by allow-from"<<endl;
c0a00acd 2785 }
3ddb9247 2786
c0a00acd
RG
2787 g_stats.unauthorizedUDP++;
2788 return;
5db529f8 2789 }
3bdc4508 2790
c0a00acd
RG
2791 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
2792 if(!fromaddr.sin4.sin_port) { // also works for IPv6
2793 if(!g_quiet) {
2794 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
2795 }
2796
2797 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
2798 return;
3abcdab2 2799 }
c0a00acd
RG
2800
2801 try {
c0a00acd
RG
2802 dnsheader* dh=(dnsheader*)&data[0];
2803
2804 if(dh->qr) {
2805 g_stats.ignoredCount++;
2806 if(g_logCommonErrors) {
2807 g_log<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
2808 }
2809 }
2810 else if(dh->opcode) {
2811 g_stats.ignoredCount++;
2812 if(g_logCommonErrors) {
2813 g_log<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
2814 }
a6147cd2 2815 }
c0f9be19
RG
2816 else if (dh->qdcount == 0) {
2817 g_stats.emptyQueriesCount++;
2818 if(g_logCommonErrors) {
2819 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<<fromaddr.toString()<<" on server socket!"<<endl;
2820 }
2821 }
a6147cd2 2822 else {
c0a00acd
RG
2823 struct timeval tv={0,0};
2824 HarvestTimestamp(&msgh, &tv);
2825 ComboAddress dest;
2826 dest.reset(); // this makes sure we ignore this address if not returned by recvmsg above
2827 auto loc = rplookup(g_listenSocketsAddresses, fd);
2828 if(HarvestDestinationAddress(&msgh, &dest)) {
2829 // but.. need to get port too
2830 if(loc) {
2831 dest.sin4.sin_port = loc->sin4.sin_port;
2832 }
a6147cd2 2833 }
2834 else {
c0a00acd
RG
2835 if(loc) {
2836 dest = *loc;
2837 }
2838 else {
2839 dest.sin4.sin_family = fromaddr.sin4.sin_family;
2840 socklen_t slen = dest.getSocklen();
2841 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
2842 }
2843 }
5216ddcc
RG
2844 if (!proxyProto) {
2845 destination = dest;
2846 }
c0a00acd
RG
2847
2848 if(g_weDistributeQueries) {
5216ddcc 2849 distributeAsyncFunction(data, boost::bind(doProcessUDPQuestion, data, fromaddr, dest, source, destination, tv, fd, proxyProtocolValues));
c0a00acd
RG
2850 }
2851 else {
144040be 2852 ++s_threadInfos[t_id].numberOfDistributedQueries;
5216ddcc 2853 doProcessUDPQuestion(data, fromaddr, dest, source, destination, tv, fd, proxyProtocolValues);
a6147cd2 2854 }
2855 }
c0a00acd 2856 }
16ce7f18 2857 catch(const MOADNSException &mde) {
c0a00acd
RG
2858 g_stats.clientParseError++;
2859 if(g_logCommonErrors) {
2860 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
2861 }
2862 }
2863 catch(const std::runtime_error& e) {
2864 g_stats.clientParseError++;
2865 if(g_logCommonErrors) {
2866 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
2867 }
5db529f8
BH
2868 }
2869 }
c0a00acd
RG
2870 else {
2871 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
2872 if(firstQuery && errno == EAGAIN) {
2873 g_stats.noPacketError++;
2874 }
390f1dab 2875
c0a00acd
RG
2876 break;
2877 }
ac0e821b 2878 }
5db529f8
BH
2879}
2880
adb6cd72 2881static void makeTCPServerSockets(deferredAdd_t& deferredAdds, std::set<int>& tcpSockets)
9c495589 2882{
37d3f960 2883 int fd;
f28307ad 2884 vector<string>locals;
2e3d8a19 2885 stringtok(locals,::arg()["local-address"]," ,");
9c495589 2886
f28307ad 2887 if(locals.empty())
3f81d239 2888 throw PDNSException("No local address specified");
3ddb9247 2889
f28307ad 2890 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
2891 ServiceTuple st;
2892 st.port=::arg().asNum("local-port");
2893 parseService(*i, st);
3ddb9247 2894
32252594
BH
2895 ComboAddress sin;
2896
d38e2ba9 2897 sin.reset();
37d3f960 2898 sin.sin4.sin_family = AF_INET;
32252594 2899 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 2900 sin.sin6.sin6_family = AF_INET6;
f71bc087 2901 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 2902 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
37d3f960
BH
2903 }
2904
2905 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
3ddb9247 2906 if(fd<0)
3f81d239 2907 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
f28307ad 2908
3897b9e1 2909 setCloseOnExec(fd);
a903b39c 2910
f28307ad 2911 int tmp=1;
810ff705 2912 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
e6a9dde5 2913 g_log<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
c8ddb7c2 2914 exit(1);
f28307ad 2915 }
0dfa94ab 2916 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
a2a81d42
OM
2917 int err = errno;
2918 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(err)<<endl;
0dfa94ab 2919 }
2920
c8ddb7c2 2921#ifdef TCP_DEFER_ACCEPT
38ac0821 2922 if(setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
37d3f960 2923 if(i==locals.begin())
377602e3 2924 g_log<<Logger::Info<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
c8ddb7c2
BH
2925 }
2926#endif
2927
fec7dd5a
SS
2928 if( ::arg().mustDo("non-local-bind") )
2929 Utility::setBindAny(AF_INET, fd);
2930
2332f42d 2931#ifdef SO_REUSEPORT
810ff705
RG
2932 if(g_reusePort) {
2933 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
2332f42d 2934 throw PDNSException("SO_REUSEPORT: "+stringerror());
2935 }
2936#endif
2937
0735b17e
RG
2938 if (::arg().asNum("tcp-fast-open") > 0) {
2939#ifdef TCP_FASTOPEN
2940 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
2941 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
a2a81d42
OM
2942 int err = errno;
2943 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(err)<<endl;
0735b17e
RG
2944 }
2945#else
e6a9dde5 2946 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
0735b17e
RG
2947#endif
2948 }
2949
32252594 2950 sin.sin4.sin_port = htons(st.port);
a683e8bd 2951 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
3ddb9247 2952 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
3f81d239 2953 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
3ddb9247 2954
3897b9e1 2955 setNonBlocking(fd);
49a699c4 2956 setSocketSendBuffer(fd, 65000);
37d3f960 2957 listen(fd, 128);
b243ca3b 2958 deferredAdds.push_back(make_pair(fd, handleNewTCPQuestion));
adb6cd72
RG
2959 tcpSockets.insert(fd);
2960
84433b79 2961 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
2962 // - fd is not that which we know here, but returned from accept()
3ddb9247 2963 if(sin.sin4.sin_family == AF_INET)
377602e3 2964 g_log<<Logger::Info<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 2965 else
377602e3 2966 g_log<<Logger::Info<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 2967 }
9c495589
BH
2968}
2969
b243ca3b 2970static void makeUDPServerSockets(deferredAdd_t& deferredAdds)
288f4aa9 2971{
fec7dd5a 2972 int one=1;
f28307ad 2973 vector<string>locals;
2e3d8a19 2974 stringtok(locals,::arg()["local-address"]," ,");
288f4aa9 2975
f28307ad 2976 if(locals.empty())
3f81d239 2977 throw PDNSException("No local address specified");
3ddb9247 2978
f28307ad 2979 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
2980 ServiceTuple st;
2981 st.port=::arg().asNum("local-port");
2982 parseService(*i, st);
2983
37d3f960 2984 ComboAddress sin;
996c89cc 2985
d38e2ba9 2986 sin.reset();
37d3f960 2987 sin.sin4.sin_family = AF_INET;
32252594 2988 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 2989 sin.sin6.sin6_family = AF_INET6;
f71bc087 2990 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 2991 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
37d3f960 2992 }
3ddb9247 2993
bb4bdbaf 2994 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
d3b4137e 2995 if(fd < 0) {
a2a81d42 2996 throw PDNSException("Making a UDP server socket for resolver: "+stringerror());
d3b4137e 2997 }
915b0c39 2998 if (!setSocketTimestamps(fd))
e6a9dde5 2999 g_log<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
0dfa94ab 3000
b71b60ee 3001 if(IsAnyAddress(sin)) {
cbc03320 3002 if(sin.sin4.sin_family == AF_INET)
3003 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
3004 g_fromtosockets.insert(fd);
757d3179 3005#ifdef IPV6_RECVPKTINFO
cbc03320 3006 if(sin.sin4.sin_family == AF_INET6)
3007 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
3008 g_fromtosockets.insert(fd);
757d3179 3009#endif
0dfa94ab 3010 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
a2a81d42
OM
3011 int err = errno;
3012 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(err)<<endl;
0dfa94ab 3013 }
b71b60ee 3014 }
fec7dd5a
SS
3015 if( ::arg().mustDo("non-local-bind") )
3016 Utility::setBindAny(AF_INET6, fd);
3017
3897b9e1 3018 setCloseOnExec(fd);
a903b39c 3019
4e9a20e6 3020 setSocketReceiveBuffer(fd, 250000);
32252594 3021 sin.sin4.sin_port = htons(st.port);
37d3f960 3022
2332f42d 3023
2573d4a6 3024#ifdef SO_REUSEPORT
810ff705 3025 if(g_reusePort) {
2332f42d 3026 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
3027 throw PDNSException("SO_REUSEPORT: "+stringerror());
3028 }
3029#endif
90f9fbc0
RG
3030
3031 if (sin.isIPv4()) {
3032 try {
3033 setSocketIgnorePMTU(fd);
3034 }
3035 catch(const std::exception& e) {
3036 g_log<<Logger::Warning<<"Failed to set IP_MTU_DISCOVER on UDP server socket: "<<e.what()<<endl;
3037 }
3038 }
3039
3040 socklen_t socklen=sin.getSocklen();
3ddb9247 3041 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
335da0ba 3042 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
3ddb9247 3043
3897b9e1 3044 setNonBlocking(fd);
c2136bf0 3045
b243ca3b 3046 deferredAdds.push_back(make_pair(fd, handleNewUDPQuestion));
40a3dd64 3047 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
3ddb9247 3048 if(sin.sin4.sin_family == AF_INET)
377602e3 3049 g_log<<Logger::Info<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 3050 else
377602e3 3051 g_log<<Logger::Info<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 3052 }
c836dc19 3053}
caa6eefa 3054
d187038c 3055static void daemonize(void)
c836dc19
BH
3056{
3057 if(fork())
3058 exit(0); // bye bye
3ddb9247
PD
3059
3060 setsid();
c836dc19 3061
27a5ead5 3062 int i=open("/dev/null",O_RDWR); /* open stdin */
3ddb9247 3063 if(i < 0)
e6a9dde5 3064 g_log<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
27a5ead5
BH
3065 else {
3066 dup2(i,0); /* stdin */
3067 dup2(i,1); /* stderr */
3068 dup2(i,2); /* stderr */
3069 close(i);
3070 }
288f4aa9 3071}
caa6eefa 3072
9f374187
FL
3073static void termIntHandler(int)
3074{
cd180a71 3075 doExit();
9f374187
FL
3076}
3077
d187038c 3078static void usr1Handler(int)
c75a6a9e
BH
3079{
3080 statsWanted=true;
3081}
ae1b2e98 3082
d187038c 3083static void usr2Handler(int)
9170fbaf 3084{
f1f34cc2 3085 g_quiet= !g_quiet;
3086 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
3087 ::arg().set("quiet")=g_quiet ? "" : "no";
9170fbaf
BH
3088}
3089
d187038c 3090static void doStats(void)
c75a6a9e 3091{
16beeaa4
BH
3092 static time_t lastOutputTime;
3093 static uint64_t lastQueryCount;
d299d4f5 3094
cdde2458
OM
3095 uint64_t cacheHits = s_RC->cacheHits;
3096 uint64_t cacheMisses = s_RC->cacheMisses;
3097 uint64_t cacheSize = s_RC->size();
7ce9aad6
OM
3098 auto rc_stats = s_RC->stats();
3099 double r = rc_stats.second == 0 ? 0.0 : (100.0 * rc_stats.first / rc_stats.second);
3100
d299d4f5 3101 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
e6a9dde5 3102 g_log<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
cdde2458 3103 cacheSize << " cache entries, "<<
3427fa8a 3104 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
3ddb9247 3105 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
7ce9aad6 3106 g_log << Logger::Notice<< "stats: cache contended/acquired " << rc_stats.first << '/' << rc_stats.second << " = " << r << '%' << endl;
3ddb9247 3107
e6a9dde5 3108 g_log<<Logger::Notice<<"stats: throttle map: "
3427fa8a 3109 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
60e5208a 3110 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<", failed ns: "
bbc7101c
OM
3111 << broadcastAccFunction<uint64_t>(pleaseGetFailedServersSize)<<", ednsmap: "
3112 <<broadcastAccFunction<uint64_t>(pleaseGetEDNSStatusesSize)<<endl;
e6a9dde5
PL
3113 g_log<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
3114 g_log<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
525b8a7c 3115 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
e6a9dde5 3116 g_log<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
3427fa8a 3117 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
81883dcc 3118
e6a9dde5 3119 //g_log<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
16beeaa4 3120 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
3ddb9247 3121
e6a9dde5 3122 g_log<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
16beeaa4 3123 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
3ddb9247 3124
144040be
RG
3125 size_t idx = 0;
3126 for (const auto& threadInfo : s_threadInfos) {
3127 if(threadInfo.isWorker) {
ad9fc3dc 3128 g_log<<Logger::Notice<<"stats: thread "<<idx<<" has been distributed "<<threadInfo.numberOfDistributedQueries<<" queries"<<endl;
144040be
RG
3129 ++idx;
3130 }
3131 }
3132
16beeaa4
BH
3133 time_t now = time(0);
3134 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
e6a9dde5 3135 g_log<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
16beeaa4
BH
3136 }
3137 lastOutputTime = now;
3138 lastQueryCount = SyncRes::s_queries;
c75a6a9e 3139 }
3ddb9247 3140 else if(statsWanted)
e6a9dde5 3141 g_log<<Logger::Notice<<"stats: no stats yet!"<<endl;
7becf07f 3142
c75a6a9e
BH
3143 statsWanted=false;
3144}
c836dc19 3145
29f0b1ce 3146static void houseKeeping(void *)
c836dc19 3147{
cdde2458
OM
3148 static thread_local time_t last_rootupdate, last_secpoll, last_trustAnchorUpdate{0}, last_RC_prune;
3149 static thread_local struct timeval last_prune;
3150
3337c2f7
RG
3151 static thread_local int cleanCounter=0;
3152 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
e4ae55e5
PL
3153 auto luaconfsLocal = g_luaconfs.getLocal();
3154
3155 if (last_trustAnchorUpdate == 0 && !luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0) {
3156 // Loading the Lua config file already "refreshed" the TAs
3157 last_trustAnchorUpdate = g_now.tv_sec + luaconfsLocal->trustAnchorFileInfo.interval * 3600;
3158 }
3159
cc59bce6 3160 try {
6b0d90ea 3161 if(s_running) {
cc59bce6 3162 return;
6b0d90ea 3163 }
cc59bce6 3164 s_running=true;
3ddb9247 3165
b9715061
OM
3166 struct timeval now, past;
3167 Utility::gettimeofday(&now, nullptr);
3168 past = now;
3169 past.tv_sec -= 5;
3170 if (last_prune < past) {
a6f7f5fe 3171 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
a6f7f5fe 3172 SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
3ddb9247 3173
b9715061 3174 time_t limit;
cc59bce6 3175 if(!((cleanCounter++)%40)) { // this is a full scan!
b9715061 3176 limit=now.tv_sec-300;
a712cb56 3177 SyncRes::pruneNSSpeeds(limit);
cc59bce6 3178 }
b9715061
OM
3179 limit = now.tv_sec - SyncRes::s_serverdownthrottletime * 10;
3180 SyncRes::pruneFailedServers(limit);
3181 limit = now.tv_sec - 2*3600;
3182 SyncRes::pruneEDNSStatuses(limit);
3183 SyncRes::pruneThrottledServers();
3184 Utility::gettimeofday(&last_prune, nullptr);
d67620e4 3185 }
3ddb9247 3186
b243ca3b 3187 if(isHandlerThread()) {
cdde2458
OM
3188 if (now.tv_sec - last_RC_prune > 5) {
3189 s_RC->doPrune(g_maxCacheEntries);
3190 last_RC_prune = now.tv_sec;
3191 }
3192 // XXX !!! global
3193 if(now.tv_sec - last_rootupdate > 7200) {
3194 int res = SyncRes::getRootNS(g_now, nullptr);
3195 if (!res) {
3196 last_rootupdate=now.tv_sec;
3197 primeRootNSZones(g_dnssecmode != DNSSECMode::Off);
3198 }
3199 }
3ddb9247 3200
cc59bce6 3201 if(now.tv_sec - last_secpoll >= 3600) {
3202 try {
3203 doSecPoll(&last_secpoll);
3204 }
124dd1d4 3205 catch(const std::exception& e)
581d4ea3 3206 {
e6a9dde5 3207 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
581d4ea3 3208 }
124dd1d4 3209 catch(const PDNSException& e)
47e9b74f 3210 {
e6a9dde5 3211 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
47e9b74f 3212 }
124dd1d4 3213 catch(const ImmediateServFailException &e)
d0992a65 3214 {
e6a9dde5 3215 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
d0992a65 3216 }
124dd1d4
RG
3217 catch(const PolicyHitException& e) {
3218 g_log<<Logger::Error<<"Policy hit while performing security poll"<<endl;
3219 }
47e9b74f 3220 catch(...)
3221 {
e6a9dde5 3222 g_log<<Logger::Error<<"Exception while performing security poll"<<endl;
47e9b74f 3223 }
18b73338 3224 }
e4ae55e5
PL
3225
3226 if (!luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0 &&
3227 g_now.tv_sec - last_trustAnchorUpdate >= (luaconfsLocal->trustAnchorFileInfo.interval * 3600)) {
3228 g_log<<Logger::Debug<<"Refreshing Trust Anchors from file"<<endl;
3229 try {
3230 map<DNSName, dsmap_t> dsAnchors;
3231 if (updateTrustAnchorsFromFile(luaconfsLocal->trustAnchorFileInfo.fname, dsAnchors)) {
3232 g_luaconfs.modify([&dsAnchors](LuaConfigItems& lci) {
3233 lci.dsAnchors = dsAnchors;
3234 });
3235 }
3236 last_trustAnchorUpdate = now.tv_sec;
3237 } catch (const PDNSException &pe) {
3238 g_log<<Logger::Error<<"Unable to update Trust Anchors: "<<pe.reason<<endl;
3239 }
3240 }
d67620e4 3241 }
6b0d90ea 3242 s_running=false;
d67620e4 3243 }
cc59bce6 3244 catch(PDNSException& ae)
3245 {
3246 s_running=false;
e6a9dde5 3247 g_log<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
cc59bce6 3248 throw;
3249 }
779828c4 3250}
d6d5dea7 3251
d187038c 3252static void makeThreadPipes()
49a699c4 3253{
ee271fc4
RG
3254 auto pipeBufferSize = ::arg().asNum("distribution-pipe-buffer-size");
3255 if (pipeBufferSize > 0) {
3256 g_log<<Logger::Info<<"Resizing the buffer of the distribution pipe to "<<pipeBufferSize<<endl;
3257 }
3258
b243ca3b
RG
3259 /* thread 0 is the handler / SNMP, we start at 1 */
3260 for(unsigned int n = 1; n <= (g_numWorkerThreads + g_numDistributorThreads); ++n) {
3261 auto& threadInfos = s_threadInfos.at(n);
3262
49a699c4
BH
3263 int fd[2];
3264 if(pipe(fd) < 0)
3265 unixDie("Creating pipe for inter-thread communications");
3ddb9247 3266
b243ca3b
RG
3267 threadInfos.pipes.readToThread = fd[0];
3268 threadInfos.pipes.writeToThread = fd[1];
3ddb9247 3269
49a699c4
BH
3270 if(pipe(fd) < 0)
3271 unixDie("Creating pipe for inter-thread communications");
b243ca3b
RG
3272
3273 threadInfos.pipes.readFromThread = fd[0];
3274 threadInfos.pipes.writeFromThread = fd[1];
3ddb9247 3275
cf8cda18
RG
3276 if(pipe(fd) < 0)
3277 unixDie("Creating pipe for inter-thread communications");
d10307c5 3278
b243ca3b
RG
3279 threadInfos.pipes.readQueriesToThread = fd[0];
3280 threadInfos.pipes.writeQueriesToThread = fd[1];
3281
ee271fc4
RG
3282 if (pipeBufferSize > 0) {
3283 if (!setPipeBufferSize(threadInfos.pipes.writeQueriesToThread, pipeBufferSize)) {
a2a81d42
OM
3284 int err = errno;
3285 g_log<<Logger::Warning<<"Error resizing the buffer of the distribution pipe for thread "<<n<<" to "<<pipeBufferSize<<": "<<strerror(err)<<endl;
ee271fc4
RG
3286 auto existingSize = getPipeBufferSize(threadInfos.pipes.writeQueriesToThread);
3287 if (existingSize > 0) {
3288 g_log<<Logger::Warning<<"The current size of the distribution pipe's buffer for thread "<<n<<" is "<<existingSize<<endl;
3289 }
3290 }
3291 }
3292
b243ca3b 3293 if (!setNonBlocking(threadInfos.pipes.writeQueriesToThread)) {
d10307c5
RG
3294 unixDie("Making pipe for inter-thread communications non-blocking");
3295 }
49a699c4
BH
3296 }
3297}
3298
00c9b8c1
BH
3299struct ThreadMSG
3300{
3301 pipefunc_t func;
3302 bool wantAnswer;
3303};
3304
b4e76a18 3305void broadcastFunction(const pipefunc_t& func)
49a699c4 3306{
b243ca3b
RG
3307 /* This function might be called by the worker with t_id 0 during startup
3308 for the initialization of ACLs and domain maps. After that it should only
3309 be called by the handler. */
d77abca1 3310
b243ca3b
RG
3311 if (s_threadInfos.empty() && isHandlerThread()) {
3312 /* the handler and distributors will call themselves below, but
3313 during startup we get called while s_threadInfos has not been
3314 populated yet to update the ACL or domain maps, so we need to
3315 handle that case.
3316 */
3317 func();
3318 }
b4e76a18 3319
b243ca3b
RG
3320 unsigned int n = 0;
3321 for (const auto& threadInfo : s_threadInfos) {
49a699c4 3322 if(n++ == t_id) {
b4e76a18 3323 func(); // don't write to ourselves!
49a699c4
BH
3324 continue;
3325 }
3ddb9247 3326
00c9b8c1
BH
3327 ThreadMSG* tmsg = new ThreadMSG();
3328 tmsg->func = func;
3329 tmsg->wantAnswer = true;
b243ca3b 3330 if(write(threadInfo.pipes.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
b841314c 3331 delete tmsg;
b243ca3b 3332
49a699c4 3333 unixDie("write to thread pipe returned wrong size or error");
b841314c 3334 }
3ddb9247 3335
49467864 3336 string* resp = nullptr;
b243ca3b 3337 if(read(threadInfo.pipes.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
49a699c4 3338 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 3339
49a699c4 3340 if(resp) {
49a699c4 3341 delete resp;
49467864 3342 resp = nullptr;
49a699c4
BH
3343 }
3344 }
3345}
06ea9015 3346
592d7ade 3347static bool trySendingQueryToWorker(unsigned int target, ThreadMSG* tmsg)
00c9b8c1 3348{
144040be 3349 auto& targetInfo = s_threadInfos[target];
b243ca3b
RG
3350 if(!targetInfo.isWorker) {
3351 g_log<<Logger::Error<<"distributeAsyncFunction() tried to assign a query to a non-worker thread"<<endl;
d77abca1 3352 exit(1);
00c9b8c1 3353 }
d77abca1 3354
b243ca3b 3355 const auto& tps = targetInfo.pipes;
3ddb9247 3356
cf8cda18
RG
3357 ssize_t written = write(tps.writeQueriesToThread, &tmsg, sizeof(tmsg));
3358 if (written > 0) {
3359 if (static_cast<size_t>(written) != sizeof(tmsg)) {
3360 delete tmsg;
3361 unixDie("write to thread pipe returned wrong size or error");
3362 }
3363 }
3364 else {
3365 int error = errno;
cf8cda18 3366 if (error == EAGAIN || error == EWOULDBLOCK) {
592d7ade 3367 return false;
cf8cda18 3368 } else {
592d7ade 3369 delete tmsg;
17634427 3370 unixDie("write to thread pipe returned wrong size or error:" + std::to_string(error));
cf8cda18 3371 }
b841314c 3372 }
592d7ade 3373
144040be
RG
3374 ++targetInfo.numberOfDistributedQueries;
3375
592d7ade
RG
3376 return true;
3377}
3378
144040be
RG
3379static unsigned int getWorkerLoad(size_t workerIdx)
3380{
3381 const auto mt = s_threadInfos[/* skip handler */ 1 + g_numDistributorThreads + workerIdx].mt;
3382 if (mt != nullptr) {
3383 return mt->numProcesses();
3384 }
3385 return 0;
3386}
3387
3388static unsigned int selectWorker(unsigned int hash)
3389{
3390 if (s_balancingFactor == 0) {
3391 return /* skip handler */ 1 + g_numDistributorThreads + (hash % g_numWorkerThreads);
3392 }
3393
3394 /* we start with one, representing the query we are currently handling */
3395 double currentLoad = 1;
3396 std::vector<unsigned int> load(g_numWorkerThreads);
3397 for (size_t idx = 0; idx < g_numWorkerThreads; idx++) {
3398 load[idx] = getWorkerLoad(idx);
3399 currentLoad += load[idx];
3400 // cerr<<"load for worker "<<idx<<" is "<<load[idx]<<endl;
3401 }
3402
3403 double targetLoad = (currentLoad / g_numWorkerThreads) * s_balancingFactor;
3404 // cerr<<"total load is "<<currentLoad<<", number of workers is "<<g_numWorkerThreads<<", target load is "<<targetLoad<<endl;
3405
3406 unsigned int worker = hash % g_numWorkerThreads;
1b9d2d46 3407 /* at least one server has to be at or below the average load */
596bf482
RG
3408 if (load[worker] > targetLoad) {
3409 ++g_stats.rebalancedQueries;
3410 do {
3411 // cerr<<"worker "<<worker<<" is above the target load, selecting another one"<<endl;
3412 worker = (worker + 1) % g_numWorkerThreads;
3413 }
3414 while(load[worker] > targetLoad);
144040be
RG
3415 }
3416
3417 return /* skip handler */ 1 + g_numDistributorThreads + worker;
3418}
3419
592d7ade
RG
3420// This function is only called by the distributor threads, when pdns-distributes-queries is set
3421void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
3422{
3423 if (!isDistributorThread()) {
3424 g_log<<Logger::Error<<"distributeAsyncFunction() has been called by a worker ("<<t_id<<")"<<endl;
3425 exit(1);
3426 }
3427
3428 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
144040be 3429 unsigned int target = selectWorker(hash);
592d7ade
RG
3430
3431 ThreadMSG* tmsg = new ThreadMSG();
3432 tmsg->func = func;
3433 tmsg->wantAnswer = false;
3434
3435 if (!trySendingQueryToWorker(target, tmsg)) {
3436 /* if this function failed but did not raise an exception, it means that the pipe
3437 was full, let's try another one */
3438 unsigned int newTarget = 0;
3439 do {
3440 newTarget = /* skip handler */ 1 + g_numDistributorThreads + dns_random(g_numWorkerThreads);
3441 } while (newTarget == target);
3442
3443 if (!trySendingQueryToWorker(newTarget, tmsg)) {
3444 g_stats.queryPipeFullDrops++;
3445 delete tmsg;
3446 }
3447 }
00c9b8c1 3448}
3427fa8a 3449
d187038c 3450static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
49a699c4 3451{
f26bf547 3452 ThreadMSG* tmsg = nullptr;
3ddb9247 3453
cf8cda18 3454 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread || fd == readQueriesToThread
49a699c4
BH
3455 unixDie("read from thread pipe returned wrong size or error");
3456 }
3ddb9247 3457
2f22827a 3458 void *resp=0;
3459 try {
3460 resp = tmsg->func();
3461 }
3462 catch(std::exception& e) {
6d2010a8 3463 if(g_logCommonErrors)
e6a9dde5 3464 g_log<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2f22827a 3465 }
3466 catch(PDNSException& e) {
6d2010a8 3467 if(g_logCommonErrors)
e6a9dde5 3468 g_log<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2f22827a 3469 }
d7c676a5 3470 if(tmsg->wantAnswer) {
b243ca3b
RG
3471 const auto& threadInfo = s_threadInfos.at(t_id);
3472 if(write(threadInfo.pipes.writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
d7c676a5 3473 delete tmsg;
00c9b8c1 3474 unixDie("write to thread pipe returned wrong size or error");
d7c676a5
RG
3475 }
3476 }
3ddb9247 3477
00c9b8c1 3478 delete tmsg;
49a699c4 3479}
09e6702a 3480
13034931
BH
3481template<class T> void *voider(const boost::function<T*()>& func)
3482{
3483 return func();
3484}
3485
050e6877 3486static vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
92011b8f 3487{
3488 a.insert(a.end(), b.begin(), b.end());
3489 return a;
3490}
3491
050e6877 3492static vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
3ddb9247
PD
3493{
3494 a.insert(a.end(), b.begin(), b.end());
3495 return a;
3496}
3497
92011b8f 3498
387b9ca6
RG
3499/*
3500 This function should only be called by the handler to gather metrics, wipe the cache,
788eeb4c
RG
3501 reload the Lua script (not the Lua config) or change the current trace regex,
3502 and by the SNMP thread to gather metrics. */
b4e76a18 3503template<class T> T broadcastAccFunction(const boost::function<T*()>& func)
3427fa8a 3504{
b243ca3b 3505 if (!isHandlerThread()) {
788eeb4c 3506 g_log<<Logger::Error<<"broadcastAccFunction has been called by a worker ("<<t_id<<")"<<endl;
d77abca1 3507 exit(1);
d77abca1
RG
3508 }
3509
b243ca3b 3510 unsigned int n = 0;
3427fa8a 3511 T ret=T();
b243ca3b
RG
3512 for (const auto& threadInfo : s_threadInfos) {
3513 if (n++ == t_id) {
3514 continue;
3515 }
3516
3517 const auto& tps = threadInfo.pipes;
00c9b8c1
BH
3518 ThreadMSG* tmsg = new ThreadMSG();
3519 tmsg->func = boost::bind(voider<T>, func);
3520 tmsg->wantAnswer = true;
3ddb9247 3521
b841314c
RG
3522 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
3523 delete tmsg;
3427fa8a 3524 unixDie("write to thread pipe returned wrong size or error");
b841314c 3525 }
3ddb9247 3526
49467864 3527 T* resp = nullptr;
3427fa8a
BH
3528 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
3529 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 3530
3427fa8a 3531 if(resp) {
3427fa8a
BH
3532 ret += *resp;
3533 delete resp;
49467864 3534 resp = nullptr;
3427fa8a
BH
3535 }
3536 }
3537 return ret;
3538}
3539
b4e76a18
RG
3540template string broadcastAccFunction(const boost::function<string*()>& fun); // explicit instantiation
3541template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun); // explicit instantiation
3542template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun); // explicit instantiation
3543template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun); // explicit instantiation
5ac6d761 3544template ThreadTimes broadcastAccFunction(const boost::function<ThreadTimes*()>& fun);
3427fa8a 3545
d187038c 3546static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3547{
fbfc1809
RG
3548 try {
3549 string remote;
3550 string msg=s_rcc.recv(&remote);
3551 RecursorControlParser rcp;
3552 RecursorControlParser::func_t* command;
3ddb9247 3553
fbfc1809 3554 string answer=rcp.getAnswer(msg, &command);
f0f3f0b0 3555
fbfc1809
RG
3556 // If we are inside a chroot, we need to strip
3557 if (!arg()["chroot"].empty()) {
3558 size_t len = arg()["chroot"].length();
3559 remote = remote.substr(len);
3560 }
f0f3f0b0 3561
ab5c053d
BH
3562 s_rcc.send(answer, &remote);
3563 command();
3564 }
fbfc1809 3565 catch(const std::exception& e) {
e6a9dde5 3566 g_log<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
ab5c053d 3567 }
fbfc1809 3568 catch(const PDNSException& ae) {
e6a9dde5 3569 g_log<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
ab5c053d 3570 }
09e6702a
BH
3571}
3572
d187038c 3573static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3574{
0b18b22e 3575 PacketID* pident=any_cast<PacketID>(&var);
667f7e60 3576 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
09e6702a 3577
667f7e60 3578 shared_array<char> buffer(new char[pident->inNeeded]);
09e6702a 3579
a683e8bd 3580 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
09e6702a 3581 if(ret > 0) {
667f7e60 3582 pident->inMSG.append(&buffer[0], &buffer[ret]);
a683e8bd 3583 pident->inNeeded-=(size_t)ret;
825fa717 3584 if(!pident->inNeeded || pident->inIncompleteOkay) {
667f7e60
BH
3585 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
3586 PacketID pid=*pident;
3587 string msg=pident->inMSG;
3ddb9247 3588
bb4bdbaf 3589 t_fdm->removeReadFD(fd);
3ddb9247 3590 MT->sendEvent(pid, &msg);
09e6702a
BH
3591 }
3592 else {
667f7e60 3593 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
09e6702a
BH
3594 }
3595 }
3596 else {
667f7e60 3597 PacketID tmp=*pident;
bb4bdbaf 3598 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
09e6702a
BH
3599 string empty;
3600 MT->sendEvent(tmp, &empty); // this conveys error status
3601 }
3602}
3603
d187038c 3604static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3605{
0b18b22e 3606 PacketID* pid=any_cast<PacketID>(&var);
a683e8bd 3607 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
09e6702a 3608 if(ret > 0) {
a683e8bd 3609 pid->outPos+=(ssize_t)ret;
667f7e60
BH
3610 if(pid->outPos==pid->outMSG.size()) {
3611 PacketID tmp=*pid;
bb4bdbaf 3612 t_fdm->removeWriteFD(fd);
09e6702a
BH
3613 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
3614 }
3615 }
3616 else { // error or EOF
667f7e60 3617 PacketID tmp(*pid);
bb4bdbaf 3618 t_fdm->removeWriteFD(fd);
09e6702a 3619 string sent;
998a4334 3620 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
09e6702a
BH
3621 }
3622}
3623
34801ab1 3624// resend event to everybody chained onto it
d187038c 3625static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
34801ab1
BH
3626{
3627 if(iter->key.chain.empty())
3628 return;
e27e91a8 3629 // cerr<<"doResends called!\n";
34801ab1
BH
3630 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
3631 resend.fd=-1;
3632 resend.id=*i;
e27e91a8 3633 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
4665c31e 3634
34801ab1
BH
3635 MT->sendEvent(resend, &content);
3636 g_stats.chainResends++;
34801ab1
BH
3637 }
3638}
3639
d187038c 3640static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3641{
600fc20b 3642 PacketID pid=any_cast<PacketID>(var);
a683e8bd 3643 ssize_t len;
fae8fe07
RG
3644 std::string packet;
3645 packet.resize(g_outgoingEDNSBufsize);
996c89cc 3646 ComboAddress fromaddr;
09e6702a
BH
3647 socklen_t addrlen=sizeof(fromaddr);
3648
fae8fe07 3649 len=recvfrom(fd, &packet.at(0), packet.size(), 0, (sockaddr *)&fromaddr, &addrlen);
c1da7976 3650
a683e8bd 3651 if(len < (ssize_t) sizeof(dnsheader)) {
998a4334 3652 if(len < 0)
996c89cc 3653 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
09e6702a 3654 else {
3ddb9247 3655 g_stats.serverParseError++;
09e6702a 3656 if(g_logCommonErrors)
e6a9dde5 3657 g_log<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
e44d9fa7 3658 ": packet smaller than DNS header"<<endl;
998a4334 3659 }
34801ab1 3660
49a699c4 3661 t_udpclientsocks->returnSocket(fd);
34801ab1
BH
3662 string empty;
3663
3664 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3ddb9247 3665 if(iter != MT->d_waiters.end())
34801ab1 3666 doResends(iter, pid, empty);
3ddb9247 3667
34801ab1 3668 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
998a4334 3669 return;
3ddb9247 3670 }
998a4334 3671
fae8fe07 3672 packet.resize(len);
998a4334 3673 dnsheader dh;
fae8fe07 3674 memcpy(&dh, &packet.at(0), sizeof(dh));
3ddb9247 3675
6da3b3ad
PD
3676 PacketID pident;
3677 pident.remote=fromaddr;
3678 pident.id=dh.id;
3679 pident.fd=fd;
34801ab1 3680
33a928af 3681 if(!dh.qr && g_logCommonErrors) {
e6a9dde5 3682 g_log<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
6da3b3ad
PD
3683 }
3684
3685 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
3686 !dh.qr) { // one weird server
3687 pident.domain.clear();
3688 pident.type = 0;
3689 }
3690 else {
3691 try {
0b31e67e 3692 if(len > 12)
fae8fe07 3693 pident.domain=DNSName(&packet.at(0), len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
6da3b3ad
PD
3694 }
3695 catch(std::exception& e) {
3696 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
e6a9dde5 3697 g_log<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
6da3b3ad 3698 return;
34801ab1 3699 }
6da3b3ad 3700 }
34801ab1 3701
6da3b3ad
PD
3702 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
3703 if(iter != MT->d_waiters.end()) {
3704 doResends(iter, pident, packet);
3705 }
c1da7976 3706
6da3b3ad 3707retryWithName:
4957a608 3708
6da3b3ad 3709 if(!MT->sendEvent(pident, &packet)) {
9ec48f21
RG
3710 /* we did not find a match for this response, something is wrong */
3711
6da3b3ad
PD
3712 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
3713 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
3714 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
e325f20c 3715 pident.domain == mthread->key.domain) {
6da3b3ad 3716 mthread->key.nearMisses++;
998a4334 3717 }
6da3b3ad
PD
3718
3719 // be a bit paranoid here since we're weakening our matching
3ddb9247 3720 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
6da3b3ad
PD
3721 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
3722 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
3723 pident.domain = mthread->key.domain;
3724 pident.type = mthread->key.type;
3725 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
d4fb76e9 3726 }
09e6702a 3727 }
6da3b3ad
PD
3728 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
3729 if(g_logCommonErrors) {
e6a9dde5 3730 g_log<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
d8f6d49f 3731 }
09e6702a 3732 }
6da3b3ad 3733 else if(fd >= 0) {
9ec48f21 3734 /* we either found a waiter (1) or encountered an issue (-1), it's up to us to clean the socket anyway */
6da3b3ad
PD
3735 t_udpclientsocks->returnSocket(fd);
3736 }
09e6702a
BH
3737}
3738
050e6877 3739static FDMultiplexer* getMultiplexer()
1f4abb20
BH
3740{
3741 FDMultiplexer* ret;
f26bf547 3742 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
1f4abb20 3743 try {
f26bf547 3744 ret=i.second();
1f4abb20
BH
3745 return ret;
3746 }
98d0ee4a 3747 catch(FDMultiplexerException &fe) {
e6a9dde5 3748 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
98d0ee4a
BH
3749 }
3750 catch(...) {
e6a9dde5 3751 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
98d0ee4a 3752 }
1f4abb20 3753 }
e6a9dde5 3754 g_log<<Logger::Error<<"No working multiplexer found!"<<endl;
1f4abb20
BH
3755 exit(1);
3756}
3757
3ddb9247 3758
d187038c 3759static string* doReloadLuaScript()
4485aa35 3760{
674cf0f6 3761 string fname= ::arg()["lua-dns-script"];
4485aa35 3762 try {
674cf0f6 3763 if(fname.empty()) {
f26bf547 3764 t_pdl.reset();
377602e3 3765 g_log<<Logger::Info<<t_id<<" Unloaded current lua script"<<endl;
0f39c1a3 3766 return new string("unloaded\n");
4485aa35
BH
3767 }
3768 else {
9694e14f
AT
3769 t_pdl = std::make_shared<RecursorLua4>();
3770 t_pdl->loadFile(fname);
4485aa35
BH
3771 }
3772 }
fdbf35ac 3773 catch(std::exception& e) {
e6a9dde5 3774 g_log<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
0f39c1a3 3775 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
4485aa35 3776 }
3ddb9247 3777
e6a9dde5 3778 g_log<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
0f39c1a3 3779 return new string("(re)loaded '"+fname+"'\n");
4485aa35
BH
3780}
3781
49a699c4
BH
3782string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3783{
3ddb9247 3784 if(begin != end)
49a699c4 3785 ::arg().set("lua-dns-script") = *begin;
3ddb9247 3786
0f39c1a3 3787 return broadcastAccFunction<string>(doReloadLuaScript);
3ddb9247 3788}
49a699c4 3789
d187038c 3790static string* pleaseUseNewTraceRegex(const std::string& newRegex)
77499b05
BH
3791try
3792{
3793 if(newRegex.empty()) {
f26bf547 3794 t_traceRegex.reset();
77499b05
BH
3795 return new string("unset\n");
3796 }
3797 else {
f26bf547 3798 t_traceRegex = std::make_shared<Regex>(newRegex);
77499b05
BH
3799 return new string("ok\n");
3800 }
3801}
3f81d239 3802catch(PDNSException& ae)
77499b05
BH
3803{
3804 return new string(ae.reason+"\n");
3805}
3806
3807string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3808{
3809 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
3810}
3811
4e9a20e6 3812static void checkLinuxIPv6Limits()
3813{
3814#ifdef __linux__
3815 string line;
3816 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
335da0ba 3817 int lim=std::stoi(line);
4e9a20e6 3818 if(lim < 16384) {
e6a9dde5 3819 g_log<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
4e9a20e6 3820 }
3821 }
3822#endif
3823}
36849ff2 3824static void checkOrFixFDS()
4e9a20e6 3825{
c0063e60 3826 unsigned int availFDs=getFilenumLimit();
3827 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
3828
3829 if(wantFDs > availFDs) {
067ad20e 3830 unsigned int hardlimit= getFilenumLimit(true);
3831 if(hardlimit >= wantFDs) {
c0063e60 3832 setFilenumLimit(wantFDs);
e6a9dde5 3833 g_log<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
36849ff2 3834 }
3835 else {
067ad20e 3836 int newval = (hardlimit - 25) / g_numWorkerThreads;
e6a9dde5 3837 g_log<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
36849ff2 3838 g_maxMThreads = newval;
067ad20e 3839 setFilenumLimit(hardlimit);
36849ff2 3840 }
3841 }
4e9a20e6 3842}
77499b05 3843
c390b2da 3844static void* recursorThread(unsigned int tid, const string& threadName);
51e2144e 3845
f26bf547 3846static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
49a699c4
BH
3847{
3848 t_allowFrom = ng;
f26bf547 3849 return nullptr;
49a699c4
BH
3850}
3851
dbd23fc2
BH
3852int g_argc;
3853char** g_argv;
3854
18af64a8 3855void parseACLs()
f7c1d4e3 3856{
18af64a8 3857 static bool l_initialized;
3ddb9247 3858
49a699c4 3859 if(l_initialized) { // only reload configuration file on second call
18af64a8 3860 string configname=::arg()["config-dir"]+"/recursor.conf";
3e63da83
JR
3861 if(::arg()["config-name"]!="") {
3862 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3863 }
18af64a8 3864 cleanSlashes(configname);
3ddb9247
PD
3865
3866 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
7e818521 3867 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
49a699c4 3868 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
242b90e1 3869 ::arg().preParseFile(configname.c_str(), "include-dir");
829849d6
AT
3870 ::arg().preParse(g_argc, g_argv, "include-dir");
3871
3872 // then process includes
3873 std::vector<std::string> extraConfigs;
242b90e1
AT
3874 ::arg().gatherIncludes(extraConfigs);
3875
1dc8f4d0 3876 for(const std::string& fn : extraConfigs) {
7e818521 3877 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
3878 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
3879 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
3880 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
829849d6 3881 }
ca2c884c
AT
3882
3883 ::arg().preParse(g_argc, g_argv, "allow-from-file");
3884 ::arg().preParse(g_argc, g_argv, "allow-from");
f27e6356 3885 }
49a699c4 3886
f26bf547
RG
3887 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
3888 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
3ddb9247 3889
2c95fc65
BH
3890 if(!::arg()["allow-from-file"].empty()) {
3891 string line;
2c95fc65
BH
3892 ifstream ifs(::arg()["allow-from-file"].c_str());
3893 if(!ifs) {
9c61b9d0 3894 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2c95fc65
BH
3895 }
3896
3897 string::size_type pos;
3898 while(getline(ifs,line)) {
3899 pos=line.find('#');
3900 if(pos!=string::npos)
3901 line.resize(pos);
3902 trim(line);
3903 if(line.empty())
3904 continue;
3905
18af64a8 3906 allowFrom->addMask(line);
2c95fc65 3907 }
e6a9dde5 3908 g_log<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2c95fc65
BH
3909 }
3910 else if(!::arg()["allow-from"].empty()) {
f7c1d4e3
BH
3911 vector<string> ips;
3912 stringtok(ips, ::arg()["allow-from"], ", ");
3ddb9247 3913
e6a9dde5 3914 g_log<<Logger::Warning<<"Only allowing queries from: ";
f7c1d4e3 3915 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
18af64a8 3916 allowFrom->addMask(*i);
f7c1d4e3 3917 if(i!=ips.begin())
e6a9dde5
PL
3918 g_log<<Logger::Warning<<", ";
3919 g_log<<Logger::Warning<<*i;
f7c1d4e3 3920 }
e6a9dde5 3921 g_log<<Logger::Warning<<endl;
f7c1d4e3 3922 }
49a699c4 3923 else {
3ddb9247 3924 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
377602e3 3925 g_log<<Logger::Warning<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
f26bf547 3926 allowFrom = nullptr;
49a699c4 3927 }
3ddb9247 3928
49a699c4 3929 g_initialAllowFrom = allowFrom;
d7dae798 3930 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
f26bf547 3931 oldAllowFrom = nullptr;
3ddb9247 3932
49a699c4 3933 l_initialized = true;
18af64a8
BH
3934}
3935
795215f2 3936
756e82cf 3937static void setupDelegationOnly()
3938{
3939 vector<string> parts;
3940 stringtok(parts, ::arg()["delegation-only"], ", \t");
3941 for(const auto& p : parts) {
9065eb05 3942 SyncRes::addDelegationOnly(DNSName(p));
756e82cf 3943 }
3944}
795215f2 3945
8fd25133
RG
3946static std::map<unsigned int, std::set<int> > parseCPUMap()
3947{
3948 std::map<unsigned int, std::set<int> > result;
3949
3950 const std::string value = ::arg()["cpu-map"];
3951
3952 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
e6a9dde5 3953 g_log<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
8fd25133
RG
3954 return result;
3955 }
3956
3957 std::vector<std::string> parts;
3958
3959 stringtok(parts, value, " \t");
3960
3961 for(const auto& part : parts) {
3962 if (part.find('=') == string::npos)
3963 continue;
3964
3965 try {
3966 auto headers = splitField(part, '=');
3967 trim(headers.first);
3968 trim(headers.second);
3969
3970 unsigned int threadId = pdns_stou(headers.first);
3971 std::vector<std::string> cpus;
3972
3973 stringtok(cpus, headers.second, ",");
3974
3975 for(const auto& cpu : cpus) {
3976 int cpuId = std::stoi(cpu);
3977
3978 result[threadId].insert(cpuId);
3979 }
3980 }
3981 catch(const std::exception& e) {
e6a9dde5 3982 g_log<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
8fd25133
RG
3983 }
3984 }
3985
3986 return result;
3987}
3988
3989static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
3990{
3991 const auto& cpuMapping = cpusMap.find(n);
3992 if (cpuMapping != cpusMap.cend()) {
3993 int rc = mapThreadToCPUList(tid, cpuMapping->second);
3994 if (rc == 0) {
e6a9dde5 3995 g_log<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
8fd25133 3996 for (const auto cpu : cpuMapping->second) {
e6a9dde5 3997 g_log<<Logger::Info<<" "<<cpu;
8fd25133 3998 }
e6a9dde5 3999 g_log<<Logger::Info<<endl;
8fd25133
RG
4000 }
4001 else {
e6a9dde5 4002 g_log<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
8fd25133 4003 for (const auto cpu : cpuMapping->second) {
e6a9dde5 4004 g_log<<Logger::Info<<" "<<cpu;
8fd25133 4005 }
e6a9dde5 4006 g_log<<Logger::Info<<strerror(rc)<<endl;
8fd25133
RG
4007 }
4008 }
4009}
4010
af1377b7
NC
4011#ifdef NOD_ENABLED
4012static void setupNODThread()
4013{
4014 if (g_nodEnabled) {
b78727c6
NC
4015 uint32_t num_cells = ::arg().asNum("new-domain-db-size");
4016 t_nodDBp = std::make_shared<nod::NODDB>(num_cells);
af1377b7
NC
4017 try {
4018 t_nodDBp->setCacheDir(::arg()["new-domain-history-dir"]);
4019 }
4020 catch (const PDNSException& e) {
4021 g_log<<Logger::Error<<"new-domain-history-dir (" << ::arg()["new-domain-history-dir"] << ") is not readable or does not exist"<<endl;
4022 _exit(1);
4023 }
4024 if (!t_nodDBp->init()) {
4025 g_log<<Logger::Error<<"Could not initialize domain tracking"<<endl;
4026 _exit(1);
4027 }
41c542ec 4028 std::thread t(nod::NODDB::startHousekeepingThread, t_nodDBp, std::this_thread::get_id());
af1377b7 4029 t.detach();
ca2526f5 4030 g_nod_pbtag = ::arg()["new-domain-pb-tag"];
41c542ec
NC
4031 }
4032 if (g_udrEnabled) {
b78727c6
NC
4033 uint32_t num_cells = ::arg().asNum("unique-response-db-size");
4034 t_udrDBp = std::make_shared<nod::UniqueResponseDB>(num_cells);
41c542ec
NC
4035 try {
4036 t_udrDBp->setCacheDir(::arg()["unique-response-history-dir"]);
4037 }
4038 catch (const PDNSException& e) {
4039 g_log<<Logger::Error<<"unique-response-history-dir (" << ::arg()["unique-response-history-dir"] << ") is not readable or does not exist"<<endl;
4040 _exit(1);
4041 }
4042 if (!t_udrDBp->init()) {
4043 g_log<<Logger::Error<<"Could not initialize unique response tracking"<<endl;
4044 _exit(1);
4045 }
4046 std::thread t(nod::UniqueResponseDB::startHousekeepingThread, t_udrDBp, std::this_thread::get_id());
af1377b7 4047 t.detach();
ca2526f5 4048 g_udr_pbtag = ::arg()["unique-response-pb-tag"];
af1377b7
NC
4049 }
4050}
4051
050e6877 4052static void parseNODWhitelist(const std::string& wlist)
af1377b7
NC
4053{
4054 vector<string> parts;
4055 stringtok(parts, wlist, ",; ");
4056 for(const auto& a : parts) {
4057 g_nodDomainWL.add(DNSName(a));
4058 }
4059}
4060
4061static void setupNODGlobal()
4062{
4063 // Setup NOD subsystem
4064 g_nodEnabled = ::arg().mustDo("new-domain-tracking");
4065 g_nodLookupDomain = DNSName(::arg()["new-domain-lookup"]);
4066 g_nodLog = ::arg().mustDo("new-domain-log");
4067 parseNODWhitelist(::arg()["new-domain-whitelist"]);
41c542ec
NC
4068
4069 // Setup Unique DNS Response subsystem
4070 g_udrEnabled = ::arg().mustDo("unique-response-tracking");
4071 g_udrLog = ::arg().mustDo("unique-response-log");
af1377b7
NC
4072}
4073#endif /* NOD_ENABLED */
4074
c6042a88 4075static void checkSocketDir(void)
0127f6bd
OM
4076{
4077 struct stat st;
4078 string dir(::arg()["socket-dir"]);
4079 string msg;
c6042a88 4080
0127f6bd
OM
4081 if (stat(dir.c_str(), &st) == -1) {
4082 msg = "it does not exist or cannot access";
4083 }
4084 else if (!S_ISDIR(st.st_mode)) {
4085 msg = "it is not a directory";
4086 }
4087 else if (access(dir.c_str(), R_OK | W_OK | X_OK) != 0) {
4088 msg = "cannot read, write or search";
4089 } else {
4090 return;
4091 }
4092 g_log << Logger::Error << "Problem with socket directory " << dir << ": " << msg << "; see https://docs.powerdns.com/recursor/upgrade.html#x-to-4-3-0-or-master" << endl;
4093 _exit(1);
4094}
4095
d187038c 4096static int serviceMain(int argc, char*argv[])
18af64a8 4097{
e6a9dde5
PL
4098 g_log.setName(s_programname);
4099 g_log.disableSyslog(::arg().mustDo("disable-syslog"));
4100 g_log.setTimestamps(::arg().mustDo("log-timestamp"));
18af64a8
BH
4101
4102 if(!::arg()["logging-facility"].empty()) {
f8499e52
BH
4103 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
4104 if(val >= 0)
e6a9dde5 4105 g_log.setFacility(val);
18af64a8 4106 else
e6a9dde5 4107 g_log<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
18af64a8
BH
4108 }
4109
ba1a571d 4110 showProductVersion();
3afde9b2 4111
06ea9015 4112 g_disthashseed=dns_random(0xffffffff);
4113
b7ef5828
PL
4114 checkLinuxIPv6Limits();
4115 try {
4116 vector<string> addrs;
4117 if(!::arg()["query-local-address6"].empty()) {
4118 SyncRes::s_doIPv6=true;
e6a9dde5 4119 g_log<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
b7ef5828
PL
4120
4121 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
4122 for(const string& addr : addrs) {
4123 g_localQueryAddresses6.push_back(ComboAddress(addr));
4124 }
4125 }
4126 else {
e6a9dde5 4127 g_log<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
b7ef5828
PL
4128 }
4129 addrs.clear();
4130 stringtok(addrs, ::arg()["query-local-address"], ", ;");
4131 for(const string& addr : addrs) {
4132 g_localQueryAddresses4.push_back(ComboAddress(addr));
4133 }
4134 }
4135 catch(std::exception& e) {
e6a9dde5 4136 g_log<<Logger::Error<<"Assigning local query addresses: "<<e.what();
b7ef5828
PL
4137 exit(99);
4138 }
4139
e48c6b8a
PL
4140 // keep this ABOVE loadRecursorLuaConfig!
4141 if(::arg()["dnssec"]=="off")
4142 g_dnssecmode=DNSSECMode::Off;
4143 else if(::arg()["dnssec"]=="process-no-validate")
4144 g_dnssecmode=DNSSECMode::ProcessNoValidate;
4145 else if(::arg()["dnssec"]=="process")
4146 g_dnssecmode=DNSSECMode::Process;
4147 else if(::arg()["dnssec"]=="validate")
4148 g_dnssecmode=DNSSECMode::ValidateAll;
4149 else if(::arg()["dnssec"]=="log-fail")
4150 g_dnssecmode=DNSSECMode::ValidateForLog;
4151 else {
e6a9dde5 4152 g_log<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
e48c6b8a
PL
4153 exit(1);
4154 }
4155
9a3ab3e4
KM
4156 g_signatureInceptionSkew = ::arg().asNum("signature-inception-skew");
4157 if (g_signatureInceptionSkew < 0) {
4158 g_log<<Logger::Error<<"A negative value for 'signature-inception-skew' is not allowed"<<endl;
4159 exit(1);
4160 }
4161
e48c6b8a 4162 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
d377bb54 4163 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
e48c6b8a 4164
a6f7f5fe 4165 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
4166 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
e6ec15bf
RG
4167
4168 luaConfigDelayedThreads delayedLuaThreads;
0f5785a6 4169 try {
e6ec15bf 4170 loadRecursorLuaConfig(::arg()["lua-config-file"], delayedLuaThreads);
0f5785a6
PL
4171 }
4172 catch (PDNSException &e) {
e6a9dde5 4173 g_log<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
0f5785a6
PL
4174 exit(1);
4175 }
ad42489c 4176
18af64a8 4177 parseACLs();
d6f3fcfa 4178 initPublicSuffixList(::arg()["public-suffix-list-file"]);
92011b8f 4179
eb5bae86 4180 if(!::arg()["dont-query"].empty()) {
eb5bae86
BH
4181 vector<string> ips;
4182 stringtok(ips, ::arg()["dont-query"], ", ");
66e0b6ea
BH
4183 ips.push_back("0.0.0.0");
4184 ips.push_back("::");
c36bc97a 4185
e6a9dde5 4186 g_log<<Logger::Warning<<"Will not send queries to: ";
eb5bae86 4187 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
9065eb05 4188 SyncRes::addDontQuery(*i);
eb5bae86 4189 if(i!=ips.begin())
e6a9dde5
PL
4190 g_log<<Logger::Warning<<", ";
4191 g_log<<Logger::Warning<<*i;
eb5bae86 4192 }
e6a9dde5 4193 g_log<<Logger::Warning<<endl;
eb5bae86
BH
4194 }
4195
f7c1d4e3 4196 g_quiet=::arg().mustDo("quiet");
3ddb9247 4197
b243ca3b 4198 /* this needs to be done before parseACLs(), which call broadcastFunction() */
1bc3c142
BH
4199 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
4200 if(g_weDistributeQueries) {
b243ca3b 4201 g_log<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
1bc3c142 4202 }
3ddb9247 4203
756e82cf 4204 setupDelegationOnly();
b33c2462 4205 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
756e82cf 4206
77499b05
BH
4207 if(::arg()["trace"]=="fail") {
4208 SyncRes::setDefaultLogMode(SyncRes::Store);
4209 }
4210 else if(::arg().mustDo("trace")) {
4211 SyncRes::setDefaultLogMode(SyncRes::Log);
f7c1d4e3
BH
4212 ::arg().set("quiet")="no";
4213 g_quiet=false;
3e9c6c0a 4214 g_dnssecLOG=true;
f7c1d4e3 4215 }
43a9b290
PL
4216 string myHostname = getHostname();
4217 if (myHostname == "UNKNOWN"){
4218 g_log<<Logger::Warning<<"Unable to get the hostname, NSID and id.server values will be empty"<<endl;
4219 myHostname = "";
d0983bff 4220 }
3ddb9247 4221
aadceba8 4222 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
5cf4b2e7 4223 SyncRes::s_minimumECSTTL = ::arg().asNum("ecs-minimum-ttl-override");
aadceba8 4224
1051f8a9
BH
4225 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
4226
f7c1d4e3 4227 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
b9473937 4228 SyncRes::s_maxbogusttl=::arg().asNum("max-cache-bogus-ttl");
63637fd8 4229 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
1051f8a9 4230 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
79ec0627
PL
4231 // Cap the packetcache-servfail-ttl to the packetcache-ttl
4232 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
4233 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
628e2c7b
PA
4234 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
4235 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
f7c1d4e3 4236 SyncRes::s_serverID=::arg()["server-id"];
173d790e 4237 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
9de3e034 4238 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
7c3398aa 4239 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
01402d56 4240 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
f7c1d4e3 4241 if(SyncRes::s_serverID.empty()) {
d0983bff 4242 SyncRes::s_serverID = myHostname;
f7c1d4e3 4243 }
3ddb9247 4244
e9f9b8ec
RG
4245 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
4246 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
c9783016 4247 SyncRes::clearECSStats();
fd8898fb 4248 SyncRes::s_ecsipv4cachelimit = ::arg().asNum("ecs-ipv4-cache-bits");
4249 SyncRes::s_ecsipv6cachelimit = ::arg().asNum("ecs-ipv6-cache-bits");
ed9019c9 4250 SyncRes::s_ecscachelimitttl = ::arg().asNum("ecs-cache-limit-ttl");
e9f9b8ec 4251
116d1288 4252 SyncRes::s_qnameminimization = ::arg().mustDo("qname-minimization");
d40a915b 4253
409b8398
OM
4254 if (SyncRes::s_qnameminimization) {
4255 // With an empty cache, a rev ipv6 query with dnssec enabled takes
4256 // almost 100 queries. Default maxqperq is 60.
13c43bdd 4257 SyncRes::s_maxqperq = std::max(SyncRes::s_maxqperq, static_cast<unsigned int>(100));
409b8398
OM
4258 }
4259
d40a915b
OM
4260 SyncRes::s_hardenNXD = SyncRes::HardenNXD::DNSSEC;
4261 string value = ::arg()["nothing-below-nxdomain"];
4262 if (value == "yes") {
4263 SyncRes::s_hardenNXD = SyncRes::HardenNXD::Yes;
4264 } else if (value == "no") {
4265 SyncRes::s_hardenNXD = SyncRes::HardenNXD::No;
4266 } else if (value != "dnssec") {
4267 g_log << Logger::Error << "Unknown nothing-below-nxdomain mode: " << value << endl;
4268 exit(1);
4269 }
116d1288 4270
8a3a3822
RG
4271 if (!::arg().isEmpty("ecs-scope-zero-address")) {
4272 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
4273 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
4274 }
4275 else {
4276 bool found = false;
4277 for (const auto& addr : g_localQueryAddresses4) {
4278 if (!IsAnyAddress(addr)) {
4279 SyncRes::setECSScopeZeroAddress(Netmask(addr, 32));
4280 found = true;
4281 break;
4282 }
4283 }
4284 if (!found) {
4285 for (const auto& addr : g_localQueryAddresses6) {
4286 if (!IsAnyAddress(addr)) {
4287 SyncRes::setECSScopeZeroAddress(Netmask(addr, 128));
4288 found = true;
4289 break;
4290 }
4291 }
4292 if (!found) {
4293 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
4294 }
4295 }
4296 }
4297
2fe3354d
CH
4298 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
4299 SyncRes::parseEDNSSubnetAddFor(::arg()["ecs-add-for"]);
4300 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
4301
5cc8371b 4302 g_XPFAcl.toMasks(::arg()["xpf-allow-from"]);
59cb4a79 4303 g_xpfRRCode = ::arg().asNum("xpf-rr-code");
5cc8371b 4304
38d8b937 4305 g_proxyProtocolACL.toMasks(::arg()["proxy-protocol-from"]);
5216ddcc
RG
4306 g_proxyProtocolMaximumSize = ::arg().asNum("proxy-protocol-maximum-size");
4307
ef3ee606 4308 if (!::arg()["dns64-prefix"].empty()) {
75e31a0b
RG
4309 try {
4310 auto dns64Prefix = Netmask(::arg()["dns64-prefix"]);
4311 if (dns64Prefix.getBits() != 96) {
e9c3c56f 4312 g_log << Logger::Error << "Invalid prefix for 'dns64-prefix', the current implementation only supports /96 prefixes: " << ::arg()["dns64-prefix"] << endl;
75e31a0b
RG
4313 exit(1);
4314 }
4315 g_dns64Prefix = dns64Prefix.getNetwork();
4316 g_dns64PrefixReverse = reverseNameFromIP(*g_dns64Prefix);
4317 /* /96 is 24 nibbles + 2 for "ip6.arpa." */
4318 while (g_dns64PrefixReverse.countLabels() > 26) {
4319 g_dns64PrefixReverse.chopOff();
4320 }
ef3ee606 4321 }
75e31a0b
RG
4322 catch (const NetmaskException& ne) {
4323 g_log << Logger::Error << "Invalid prefix '" << ::arg()["dns64-prefix"] << "' for 'dns64-prefix': " << ne.reason << endl;
4324 exit(1);
ef3ee606
RG
4325 }
4326 }
4327
5b0ddd18 4328 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
bb4bdbaf 4329
49a699c4 4330 g_initialDomainMap = parseAuthAndForwards();
3ddb9247 4331
08f3f638 4332 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3ddb9247 4333
f7c1d4e3 4334 g_logCommonErrors=::arg().mustDo("log-common-errors");
98d36505 4335 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
e661a20b
PD
4336
4337 g_anyToTcp = ::arg().mustDo("any-to-tcp");
a09a8ce0
PD
4338 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
4339
b3adda56
PD
4340 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
4341
b243ca3b 4342 g_numDistributorThreads = ::arg().asNum("distributor-threads");
810ff705 4343 g_numWorkerThreads = ::arg().asNum("threads");
1c4f2e1b 4344 if (g_numWorkerThreads < 1) {
e6a9dde5 4345 g_log<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
1c4f2e1b
RG
4346 g_numWorkerThreads = 1;
4347 }
4348
b243ca3b 4349 g_numThreads = g_numDistributorThreads + g_numWorkerThreads;
810ff705
RG
4350 g_maxMThreads = ::arg().asNum("max-mthreads");
4351
c51c551e
OM
4352
4353 int64_t maxInFlight = ::arg().asNum("max-concurrent-requests-per-tcp-connection");
4354 if (maxInFlight < 1 || maxInFlight > USHRT_MAX || maxInFlight >= g_maxMThreads) {
4355 g_log<<Logger::Warning<<"Asked to run with illegal max-concurrent-requests-per-tcp-connection, setting to default (10)"<<endl;
4356 TCPConnection::s_maxInFlight = 10;
4357 } else {
4358 TCPConnection::s_maxInFlight = maxInFlight;
4359 }
4360
4361
00b8cadc
RG
4362 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
4363
0ec489bf 4364 g_statisticsInterval = ::arg().asNum("statistics-interval");
4365
559b6c93
PL
4366 {
4367 SuffixMatchNode dontThrottleNames;
4368 vector<string> parts;
52858314 4369 stringtok(parts, ::arg()["dont-throttle-names"], " ,");
559b6c93
PL
4370 for (const auto &p : parts) {
4371 dontThrottleNames.add(DNSName(p));
4372 }
d514bd03 4373 g_dontThrottleNames.setState(std::move(dontThrottleNames));
559b6c93
PL
4374
4375 NetmaskGroup dontThrottleNetmasks;
52858314 4376 stringtok(parts, ::arg()["dont-throttle-netmasks"], " ,");
559b6c93
PL
4377 for (const auto &p : parts) {
4378 dontThrottleNetmasks.addMask(Netmask(p));
4379 }
d514bd03 4380 g_dontThrottleNetmasks.setState(std::move(dontThrottleNetmasks));
559b6c93
PL
4381 }
4382
144040be 4383 s_balancingFactor = ::arg().asDouble("distribution-load-factor");
078be17f
RG
4384 if (s_balancingFactor != 0.0 && s_balancingFactor < 1.0) {
4385 s_balancingFactor = 0.0;
4386 g_log<<Logger::Warning<<"Asked to run with a distribution-load-factor below 1.0, disabling it instead"<<endl;
4387 }
144040be 4388
810ff705
RG
4389#ifdef SO_REUSEPORT
4390 g_reusePort = ::arg().mustDo("reuseport");
4391#endif
4392
b243ca3b 4393 s_threadInfos.resize(g_numDistributorThreads + g_numWorkerThreads + /* handler */ 1);
810ff705 4394
b243ca3b
RG
4395 if (g_reusePort) {
4396 if (g_weDistributeQueries) {
4397 /* first thread is the handler, then distributors */
4398 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
4399 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
adb6cd72 4400 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
b243ca3b 4401 makeUDPServerSockets(deferredAdds);
adb6cd72 4402 makeTCPServerSockets(deferredAdds, tcpSockets);
b243ca3b
RG
4403 }
4404 }
4405 else {
4406 /* first thread is the handler, there is no distributor here and workers are accepting queries */
4407 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
4408 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
adb6cd72 4409 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
b243ca3b 4410 makeUDPServerSockets(deferredAdds);
adb6cd72 4411 makeTCPServerSockets(deferredAdds, tcpSockets);
b243ca3b 4412 }
810ff705
RG
4413 }
4414 }
4415 else {
c47f201b 4416 std::set<int> tcpSockets;
b243ca3b
RG
4417 /* we don't have reuseport so we can only open one socket per
4418 listening addr:port and everyone will listen on it */
4419 makeUDPServerSockets(g_deferredAdds);
c47f201b
RG
4420 makeTCPServerSockets(g_deferredAdds, tcpSockets);
4421
4422 /* every listener (so distributor if g_weDistributeQueries, workers otherwise)
4423 needs to listen to the shared sockets */
4424 if (g_weDistributeQueries) {
4425 /* first thread is the handler, then distributors */
4426 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
4427 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
4428 }
4429 }
4430 else {
4431 /* first thread is the handler, there is no distributor here and workers are accepting queries */
4432 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
4433 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
4434 }
4435 }
810ff705 4436 }
815099b2 4437
af1377b7
NC
4438#ifdef NOD_ENABLED
4439 // Setup newly observed domain globals
4440 setupNODGlobal();
4441#endif /* NOD_ENABLED */
4442
677e2a46
BH
4443 int forks;
4444 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
1bc3c142
BH
4445 if(!fork()) // we are child
4446 break;
4447 }
3ddb9247 4448
f7c1d4e3 4449 if(::arg().mustDo("daemon")) {
e6a9dde5
PL
4450 g_log<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
4451 g_log.toConsole(Logger::Critical);
f7c1d4e3
BH
4452 daemonize();
4453 }
cd180a71 4454 if(Utility::getpid() == 1) {
807db6c8
FL
4455 /* We are running as pid 1, register sigterm and sigint handler
4456
4457 The Linux kernel will handle SIGTERM and SIGINT for all processes, except PID 1.
4458 It assumes that the processes running as pid 1 is an "init" like system.
4459 For years, this was a safe assumption, but containers change that: in
4460 most (all?) container implementations, the application itself is running
4461 as pid 1. This means that sending signals to those applications, will not
ef2ea4bf 4462 be handled by default. Results might be "your container not responding
dda61e20
FL
4463 when asking it to stop", or "ctrl-c not working even when the app is
4464 running in the foreground inside a container".
807db6c8
FL
4465
4466 So TL;DR: If we're running pid 1 (container), we should handle SIGTERM and SIGINT ourselves */
4467
cd180a71
FL
4468 signal(SIGTERM,termIntHandler);
4469 signal(SIGINT,termIntHandler);
4470 }
4471
f7c1d4e3
BH
4472 signal(SIGUSR1,usr1Handler);
4473 signal(SIGUSR2,usr2Handler);
4474 signal(SIGPIPE,SIG_IGN);
810ff705 4475
a6414fdc 4476 checkOrFixFDS();
3ddb9247 4477
d1b28475
KM
4478#ifdef HAVE_LIBSODIUM
4479 if (sodium_init() == -1) {
e6a9dde5 4480 g_log<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
d1b28475
KM
4481 exit(99);
4482 }
4483#endif
4484
3afde9b2
PL
4485 openssl_thread_setup();
4486 openssl_seed();
e97cb679
AT
4487 /* setup rng before chroot */
4488 dns_random_init();
3afde9b2 4489
bdbb07e0 4490 if(::arg()["server-id"].empty()) {
d0983bff 4491 ::arg().set("server-id") = myHostname;
bdbb07e0
PL
4492 }
4493
138435cb
BH
4494 int newgid=0;
4495 if(!::arg()["setgid"].empty())
2211dac9 4496 newgid = strToGID(::arg()["setgid"]);
138435cb
BH
4497 int newuid=0;
4498 if(!::arg()["setuid"].empty())
2211dac9 4499 newuid = strToUID(::arg()["setuid"]);
138435cb 4500
f1d6a7ce
KM
4501 Utility::dropGroupPrivs(newuid, newgid);
4502
138435cb 4503 if (!::arg()["chroot"].empty()) {
75336810
PL
4504#ifdef HAVE_SYSTEMD
4505 char *ns;
4506 ns = getenv("NOTIFY_SOCKET");
4507 if (ns != nullptr) {
e6a9dde5 4508 g_log<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
75336810
PL
4509 exit(1);
4510 }
4511#endif
138435cb 4512 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
a2a81d42
OM
4513 int err = errno;
4514 g_log<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (err)<<", exiting"<<endl;
4515 exit(1);
138435cb 4516 }
f0f3f0b0 4517 else
377602e3 4518 g_log<<Logger::Info<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
138435cb
BH
4519 }
4520
c6042a88
OM
4521 checkSocketDir();
4522
f0f3f0b0
PL
4523 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
4524 if(!s_pidfname.empty())
4525 unlink(s_pidfname.c_str()); // remove possible old pid file
4526 writePid();
4527
4528 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
4529
f1d6a7ce 4530 Utility::dropUserPrivs(newuid);
1f2b341e
RG
4531 try {
4532 /* we might still have capabilities remaining, for example if we have been started as root
4533 without --setuid (please don't do that) or as an unprivileged user with ambient capabilities
4534 like CAP_NET_BIND_SERVICE.
4535 */
4536 dropCapabilities();
4537 }
4538 catch(const std::exception& e) {
4539 g_log<<Logger::Warning<<e.what()<<endl;
4540 }
c0063e60 4541
e6ec15bf
RG
4542 startLuaConfigDelayedThreads(delayedLuaThreads, g_luaconfs.getCopy().generation);
4543
49a699c4 4544 makeThreadPipes();
3ddb9247 4545
5d4dd7fe
BH
4546 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
4547 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
fde296a3 4548 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
a5886e6a 4549 s_maxUDPQueriesPerRound=::arg().asNum("max-udp-queries-per-round");
343257a4 4550
c29d820c
RG
4551 g_useKernelTimestamp = ::arg().mustDo("protobuf-use-kernel-timestamp");
4552
563517f3
RG
4553 blacklistStats(StatComponent::API, ::arg()["stats-api-blacklist"]);
4554 blacklistStats(StatComponent::Carbon, ::arg()["stats-carbon-blacklist"]);
4555 blacklistStats(StatComponent::RecControl, ::arg()["stats-rec-control-blacklist"]);
4556 blacklistStats(StatComponent::SNMP, ::arg()["stats-snmp-blacklist"]);
72259676 4557
d705aad9
RG
4558 if (::arg().mustDo("snmp-agent")) {
4559 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
4560 g_snmpAgent->run();
4561 }
4562
b47026fd 4563 int port = ::arg().asNum("udp-source-port-min");
58da9034 4564 if(port < 1024 || port > 65535){
e6a9dde5 4565 g_log<<Logger::Error<<"Unable to launch, udp-source-port-min is not a valid port number"<<endl;
bf6f28ca
CHB
4566 exit(99); // this isn't going to fix itself either
4567 }
4568 s_minUdpSourcePort = port;
b47026fd 4569 port = ::arg().asNum("udp-source-port-max");
58da9034 4570 if(port < 1024 || port > 65535 || port < s_minUdpSourcePort){
e6a9dde5 4571 g_log<<Logger::Error<<"Unable to launch, udp-source-port-max is not a valid port number or is smaller than udp-source-port-min"<<endl;
bf6f28ca
CHB
4572 exit(99); // this isn't going to fix itself either
4573 }
4574 s_maxUdpSourcePort = port;
4575 std::vector<string> parts {};
b47026fd 4576 stringtok(parts, ::arg()["udp-source-port-avoid"], ", ");
bf6f28ca
CHB
4577 for (const auto &part : parts)
4578 {
4579 port = std::stoi(part);
58da9034 4580 if(port < 1024 || port > 65535){
e6a9dde5 4581 g_log<<Logger::Error<<"Unable to launch, udp-source-port-avoid contains an invalid port number: "<<part<<endl;
bf6f28ca
CHB
4582 exit(99); // this isn't going to fix itself either
4583 }
4584 s_avoidUdpSourcePorts.insert(port);
4585 }
4586
b243ca3b 4587 unsigned int currentThreadId = 1;
8fd25133 4588 const auto cpusMap = parseCPUMap();
d77abca1 4589
c3828c03 4590 if(g_numThreads == 1) {
e6a9dde5 4591 g_log<<Logger::Warning<<"Operating unthreaded"<<endl;
6b6720de
PL
4592#ifdef HAVE_SYSTEMD
4593 sd_notify(0, "READY=1");
4594#endif
b243ca3b
RG
4595
4596 /* This thread handles the web server, carbon, statistics and the control channel */
4597 auto& handlerInfos = s_threadInfos.at(0);
4598 handlerInfos.isHandler = true;
c390b2da 4599 handlerInfos.thread = std::thread(recursorThread, 0, "main");
b243ca3b
RG
4600
4601 setCPUMap(cpusMap, currentThreadId, pthread_self());
4602
4603 auto& infos = s_threadInfos.at(currentThreadId);
4604 infos.isListener = true;
4605 infos.isWorker = true;
c390b2da 4606 recursorThread(currentThreadId++, "worker");
8657c2af
OM
4607
4608 handlerInfos.thread.join();
76698c6e
BH
4609 }
4610 else {
8fd25133 4611
ef31b090
OM
4612
4613 if (g_weDistributeQueries) {
4614 for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
4615 auto& infos = s_threadInfos.at(currentThreadId + n);
4616 infos.isListener = true;
4617 }
4618 }
4619 for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
4620 auto& infos = s_threadInfos.at(currentThreadId + (g_weDistributeQueries ? g_numDistributorThreads : 0) + n);
4621 infos.isListener = !g_weDistributeQueries;
4622 infos.isWorker = true;
4623 }
4624
b243ca3b
RG
4625 if (g_weDistributeQueries) {
4626 g_log<<Logger::Warning<<"Launching "<< g_numDistributorThreads <<" distributor threads"<<endl;
4627 for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
4628 auto& infos = s_threadInfos.at(currentThreadId);
c390b2da 4629 infos.thread = std::thread(recursorThread, currentThreadId++, "distr");
b243ca3b
RG
4630 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
4631 }
4632 }
8fd25133 4633
62b549e0
RG
4634 g_log<<Logger::Warning<<"Launching "<< g_numWorkerThreads <<" worker threads"<<endl;
4635
b243ca3b
RG
4636 for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
4637 auto& infos = s_threadInfos.at(currentThreadId);
c390b2da 4638 infos.thread = std::thread(recursorThread, currentThreadId++, "worker");
b243ca3b 4639 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
76698c6e 4640 }
b243ca3b 4641
6b6720de
PL
4642#ifdef HAVE_SYSTEMD
4643 sd_notify(0, "READY=1");
4644#endif
b243ca3b
RG
4645
4646 /* This thread handles the web server, carbon, statistics and the control channel */
4647 auto& infos = s_threadInfos.at(0);
4648 infos.isHandler = true;
c390b2da 4649 infos.thread = std::thread(recursorThread, 0, "web+stat");
b243ca3b 4650
8657c2af
OM
4651 for (auto & ti : s_threadInfos) {
4652 ti.thread.join();
4653 }
bb4bdbaf 4654 }
da966ae0
OM
4655
4656#ifdef HAVE_PROTOBUF
4657 google::protobuf::ShutdownProtobufLibrary();
4658#endif /* HAVE_PROTOBUF */
bb4bdbaf
BH
4659 return 0;
4660}
4661
c390b2da 4662static void* recursorThread(unsigned int n, const string& threadName)
bb4bdbaf
BH
4663try
4664{
d77abca1 4665 t_id=n;
b243ca3b 4666 auto& threadInfo = s_threadInfos.at(t_id);
c390b2da
PL
4667
4668 static string threadPrefix = "pdns-r/";
519f5484 4669 setThreadName(threadPrefix + threadName);
c390b2da 4670
49a699c4 4671 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
a712cb56 4672 SyncRes::setDomainMap(g_initialDomainMap);
49a699c4 4673 t_allowFrom = g_initialAllowFrom;
f26bf547
RG
4674 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
4675 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
49a699c4 4676 primeHints();
3ddb9247 4677
f26bf547 4678 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
3ddb9247 4679
e6a9dde5 4680 g_log<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3ddb9247 4681
af1377b7 4682#ifdef NOD_ENABLED
41c542ec
NC
4683 if (threadInfo.isWorker)
4684 setupNODThread();
af1377b7 4685#endif /* NOD_ENABLED */
c1751a59
RG
4686
4687 /* the listener threads handle TCP queries */
4688 if(threadInfo.isWorker || threadInfo.isListener) {
5b388d28
PD
4689 try {
4690 if(!::arg()["lua-dns-script"].empty()) {
4691 t_pdl = std::make_shared<RecursorLua4>();
4692 t_pdl->loadFile(::arg()["lua-dns-script"]);
4693 g_log<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
4694 }
4695 }
4696 catch(std::exception &e) {
4697 g_log<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
4698 _exit(99);
674cf0f6 4699 }
674cf0f6 4700 }
3ddb9247 4701
f8f243b0 4702 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
92011b8f 4703 if(ringsize) {
f26bf547 4704 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
b243ca3b
RG
4705 if(g_weDistributeQueries)
4706 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries") / g_numDistributorThreads);
f8f243b0 4707 else
3ddb9247 4708 t_remotes->set_capacity(ringsize);
f26bf547 4709 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 4710 t_servfailremotes->set_capacity(ringsize);
66f2e6ad
KM
4711 t_bogusremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4712 t_bogusremotes->set_capacity(ringsize);
f26bf547 4713 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 4714 t_largeanswerremotes->set_capacity(ringsize);
621ccf89 4715 t_timeouts = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4716 t_timeouts->set_capacity(ringsize);
92011b8f 4717
f26bf547 4718 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 4719 t_queryring->set_capacity(ringsize);
f26bf547 4720 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 4721 t_servfailqueryring->set_capacity(ringsize);
66f2e6ad
KM
4722 t_bogusqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4723 t_bogusqueryring->set_capacity(ringsize);
92011b8f 4724 }
3ddb9247 4725
f26bf547 4726 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
144040be 4727 threadInfo.mt = MT.get();
3ddb9247 4728
63341e8d
RG
4729#ifdef HAVE_PROTOBUF
4730 /* start protobuf export threads if needed */
4731 auto luaconfsLocal = g_luaconfs.getLocal();
4732 checkProtobufExport(luaconfsLocal);
4733 checkOutgoingProtobufExport(luaconfsLocal);
4734#endif /* HAVE_PROTOBUF */
b9fa43e0
OM
4735#ifdef HAVE_FSTRM
4736 checkFrameStreamExport(luaconfsLocal);
4737#endif
63341e8d 4738
bb4bdbaf
BH
4739 PacketID pident;
4740
4741 t_fdm=getMultiplexer();
d77abca1 4742
da966ae0
OM
4743 RecursorWebServer *rws = nullptr;
4744
b243ca3b 4745 if(threadInfo.isHandler) {
d07bf7ff 4746 if(::arg().mustDo("webserver")) {
e6a9dde5 4747 g_log<<Logger::Warning << "Enabling web server" << endl;
8989097d 4748 try {
da966ae0 4749 rws = new RecursorWebServer(t_fdm);
8989097d
CH
4750 }
4751 catch(PDNSException &e) {
e6a9dde5 4752 g_log<<Logger::Error<<"Exception: "<<e.reason<<endl;
8989097d
CH
4753 exit(99);
4754 }
f3d1d67b 4755 }
377602e3 4756 g_log<<Logger::Info<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
f3d1d67b 4757 }
810ff705 4758 else {
d77abca1 4759
b243ca3b
RG
4760 t_fdm->addReadFD(threadInfo.pipes.readToThread, handlePipeRequest);
4761 t_fdm->addReadFD(threadInfo.pipes.readQueriesToThread, handlePipeRequest);
4762
4763 if (threadInfo.isListener) {
4764 if (g_reusePort) {
4765 /* then every listener has its own FDs */
7af99dff 4766 for(const auto& deferred : threadInfo.deferredAdds) {
b243ca3b
RG
4767 t_fdm->addReadFD(deferred.first, deferred.second);
4768 }
810ff705 4769 }
b243ca3b
RG
4770 else {
4771 /* otherwise all listeners are listening on the same ones */
7af99dff 4772 for(const auto& deferred : g_deferredAdds) {
b243ca3b 4773 t_fdm->addReadFD(deferred.first, deferred.second);
d77abca1
RG
4774 }
4775 }
4776 }
810ff705 4777 }
3ddb9247 4778
b0b37121 4779 registerAllStats();
d77abca1 4780
b243ca3b 4781 if(threadInfo.isHandler) {
674cf0f6
BH
4782 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
4783 }
1bc3c142 4784
f7c1d4e3 4785 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3ddb9247 4786
f7c1d4e3 4787 bool listenOnTCP(true);
49a699c4 4788
cb1523d1 4789 time_t last_stat = 0;
a2f87dd1 4790 time_t last_carbon=0, last_lua_maintenance=0;
2c78bd57 4791 time_t carbonInterval=::arg().asNum("carbon-interval");
a2f87dd1 4792 time_t luaMaintenanceInterval=::arg().asNum("lua-maintenance-interval");
ac0995bb 4793 counter.store(0); // used to periodically execute certain tasks
8657c2af
OM
4794
4795 while (!RecursorControlChannel::stop) {
ac0e821b 4796 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3ddb9247 4797
3427fa8a
BH
4798 if(!(counter%500)) {
4799 MT->makeThread(houseKeeping, 0);
f7c1d4e3
BH
4800 }
4801
d2392145 4802 if(!(counter%55)) {
d8f6d49f 4803 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
bb4bdbaf 4804 expired_t expired=t_fdm->getTimeouts(g_now);
3ddb9247 4805
f7c1d4e3 4806 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
cd989c87 4807 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4957a608 4808 if(g_logCommonErrors)
e6a9dde5 4809 g_log<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4957a608 4810 t_fdm->removeReadFD(i->first);
f7c1d4e3
BH
4811 }
4812 }
3ddb9247 4813
f7c1d4e3
BH
4814 counter++;
4815
b243ca3b 4816 if(threadInfo.isHandler) {
cb1523d1
RG
4817 if(statsWanted || (g_statisticsInterval > 0 && (g_now.tv_sec - last_stat) >= g_statisticsInterval)) {
4818 doStats();
4819 last_stat = g_now.tv_sec;
4820 }
f7c1d4e3 4821
cb1523d1 4822 Utility::gettimeofday(&g_now, 0);
2c78bd57 4823
cb1523d1
RG
4824 if((g_now.tv_sec - last_carbon) >= carbonInterval) {
4825 MT->makeThread(doCarbonDump, 0);
4826 last_carbon = g_now.tv_sec;
4827 }
2c78bd57 4828 }
2a0276a9 4829 if (t_pdl != nullptr) {
9adbe790 4830 // lua-dns-script directive is present, call the maintenance callback if needed
c1751a59
RG
4831 /* remember that the listener threads handle TCP queries */
4832 if (threadInfo.isWorker || threadInfo.isListener) {
2a0276a9
CHB
4833 // Only on threads processing queries
4834 if(g_now.tv_sec - last_lua_maintenance >= luaMaintenanceInterval) {
4835 t_pdl->maintenance();
4836 last_lua_maintenance = g_now.tv_sec;
4837 }
9adbe790 4838 }
a2f87dd1 4839 }
2c78bd57 4840
bb4bdbaf 4841 t_fdm->run(&g_now);
3ea54bf0 4842 // 'run' updates g_now for us
f7c1d4e3 4843
b243ca3b 4844 if(threadInfo.isListener) {
5c889cf5 4845 if(listenOnTCP) {
c47f201b
RG
4846 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
4847 for(const auto fd : threadInfo.tcpSockets) {
4848 t_fdm->removeReadFD(fd);
b243ca3b 4849 }
c47f201b
RG
4850 listenOnTCP=false;
4851 }
f7c1d4e3 4852 }
5c889cf5 4853 else {
c47f201b
RG
4854 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
4855 for(const auto fd : threadInfo.tcpSockets) {
4856 t_fdm->addReadFD(fd, handleNewTCPQuestion);
b243ca3b 4857 }
c47f201b
RG
4858 listenOnTCP=true;
4859 }
f7c1d4e3
BH
4860 }
4861 }
4862 }
da966ae0
OM
4863 delete rws;
4864 delete t_fdm;
8657c2af 4865 return 0;
f7c1d4e3 4866}
3f81d239 4867catch(PDNSException &ae) {
e6a9dde5 4868 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
bb4bdbaf
BH
4869 return 0;
4870}
4871catch(std::exception &e) {
e6a9dde5 4872 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
bb4bdbaf
BH
4873 return 0;
4874}
4875catch(...) {
e6a9dde5 4876 g_log<<Logger::Error<<"any other exception in main: "<<endl;
bb4bdbaf
BH
4877 return 0;
4878}
4879
51e2144e 4880
3ddb9247 4881int main(int argc, char **argv)
288f4aa9 4882{
dbd23fc2
BH
4883 g_argc = argc;
4884 g_argv = argv;
5e3de507 4885 g_stats.startupTime=time(0);
b51ef4f9 4886 Utility::srandom();
3e135495 4887 versionSetProduct(ProductRecursor);
8a63d3ce 4888 reportBasicTypes();
0007c2e5 4889 reportOtherTypes();
ea634573 4890
22030c37 4891 int ret = EXIT_SUCCESS;
caa6eefa 4892
288f4aa9 4893 try {
f888311c 4894 ::arg().set("stack-size","stack size per mthread")="200000";
2e3d8a19 4895 ::arg().set("soa-minimum-ttl","Don't change")="0";
2e3d8a19 4896 ::arg().set("no-shuffle","Don't change")="off";
2e3d8a19 4897 ::arg().set("local-port","port to listen on")="53";
32252594 4898 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
fec7dd5a 4899 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
77499b05 4900 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
a6415142 4901 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
c87e1876 4902 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
13c46e62 4903 ::arg().set("signature-inception-skew", "Allow the signature inception to be off by this number of seconds")="60";
d3f809bf 4904 ::arg().set("daemon","Operate as a daemon")="no";
191f2e47 4905 ::arg().setSwitch("write-pid","Write a PID file")="yes";
9afa8662 4906 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
b6cfa948 4907 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
b18fa400 4908 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
22e0810c 4909 ::arg().set("log-common-errors","If we should log rather common errors")="no";
2e3d8a19 4910 ::arg().set("chroot","switch to chroot jail")="";
fe9e7b8d
PL
4911 ::arg().set("setgid","If set, change group id to this gid for more security"
4912#ifdef HAVE_SYSTEMD
4913#define SYSTEMD_SETID_MSG ". When running inside systemd, use the User and Group settings in the unit-file!"
4914 SYSTEMD_SETID_MSG
4915#endif
4916 )="";
4917 ::arg().set("setuid","If set, change user id to this uid for more security"
4918#ifdef HAVE_SYSTEMD
4919 SYSTEMD_SETID_MSG
4920#endif
4921 )="";
c83ee49d 4922 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
bb4bdbaf 4923 ::arg().set("threads", "Launch this number of threads")="2";
b243ca3b 4924 ::arg().set("distributor-threads", "Launch this number of distributor threads, distributing queries to other threads")="0";
adabfcb9 4925 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
5124de27 4926 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
d07bf7ff 4927 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
479e0976 4928 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
479e0976 4929 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
d07bf7ff
PL
4930 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
4931 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
4932 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
be3e1477 4933 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
8ca656a8 4934 ::arg().set("webserver-loglevel", "Amount of logging in the webserver (none, normal, detailed)") = "normal";
cc08b5a9 4935 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
e12f8407 4936 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
2c78bd57 4937 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
f7a645ec
RG
4938 ::arg().set("carbon-namespace", "If set overwrites the first part of the carbon string")="pdns";
4939 ::arg().set("carbon-instance", "If set overwrites the the instance name default")="recursor";
4940
0ec489bf 4941 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
c038218b 4942 ::arg().set("quiet","Suppress logging of questions and answers")="";
f27e6356 4943 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
2e3d8a19 4944 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
fdbf35ac
BH
4945 ::arg().set("socket-owner","Owner of socket")="";
4946 ::arg().set("socket-group","Group of socket")="";
4947 ::arg().set("socket-mode", "Permissions for socket")="";
3ddb9247 4948
9a5b0a54
PL
4949 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+"/pdns-recursor when unset and not chrooted"
4950#ifdef HAVE_SYSTEMD
4951 + ". Set to the RUNTIME_DIRECTORY environment variable when that variable has a value (e.g. under systemd).")="";
4952 auto runtimeDir = getenv("RUNTIME_DIRECTORY");
4953 if (runtimeDir != nullptr) {
4954 ::arg().set("socket-dir") = runtimeDir;
4955 }
4956#else
4957 )="";
4958#endif
2e3d8a19
BH
4959 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
4960 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
d4fb76e9 4961 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
2e3d8a19 4962 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
85c32340 4963 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
2e3d8a19 4964 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
87ff2287 4965 ::arg().set("max-concurrent-requests-per-tcp-connection", "Maximum number of requests handled concurrently per TCP connection") = "10";
324dc148 4966 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
979edd70 4967 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
559b6c93
PL
4968 ::arg().set("dont-throttle-names", "Do not throttle nameservers with this name or suffix")="";
4969 ::arg().set("dont-throttle-netmasks", "Do not throttle nameservers with this IP netmask")="";
2e3d8a19 4970 ::arg().set("hint-file", "If set, load root hints from this file")="";
b45eb27c 4971 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
a9af3782 4972 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
b9473937 4973 ::arg().set("max-cache-bogus-ttl", "maximum number of seconds to keep a Bogus (positive or negative) cached entry in memory")="3600";
c3e753c7 4974 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
1051f8a9 4975 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
927c12b0 4976 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
1051f8a9 4977 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
950626be 4978 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname, set custom or 'disabled'")="";
92011b8f 4979 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
ba1a571d 4980 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
49a699c4 4981 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
2c95fc65 4982 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
51e2144e 4983 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3ddb9247 4984 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4e120339 4985 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
fde296a3 4986 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
0d5f0a9f 4987 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4ef015cd 4988 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
5605c067 4989 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3e61e7f7 4990 ::arg().set("lua-config-file", "More powerful configuration options")="";
0273d4ab 4991 ::arg().setSwitch("allow-trust-anchor-query", "Allow queries for trustanchor.server CH TXT and negativetrustanchor.server CH TXT")="no";
644dd1da 4992
5605c067 4993 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
927c12b0
BH
4994 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
4995 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
5605c067 4996 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
ac0b4eb3 4997 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3ea54bf0 4998 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
e498dac1 4999 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
4485aa35 5000 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
a2f87dd1 5001 ::arg().set("lua-maintenance-interval", "Number of seconds between calls to the lua user defined maintenance() function")="1";
08f3f638 5002 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3ddb9247 5003 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
35695d18 5004 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
fd8898fb 5005 ::arg().set("ecs-ipv4-cache-bits", "Maximum number of bits of IPv4 mask to cache ECS response")="24";
35695d18 5006 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
fd8898fb 5007 ::arg().set("ecs-ipv6-cache-bits", "Maximum number of bits of IPv6 mask to cache ECS response")="56";
5cf4b2e7 5008 ::arg().set("ecs-minimum-ttl-override", "Set under adverse conditions, a minimum TTL for records in ECS-specific answers")="0";
ed9019c9 5009 ::arg().set("ecs-cache-limit-ttl", "Minimum TTL to cache ECS response")="0";
3f975863 5010 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
2fe3354d 5011 ::arg().set("ecs-add-for", "List of client netmasks for which EDNS Client Subnet will be added")="0.0.0.0/0, ::/0, " LOCAL_NETS_INVERSE;
8a3a3822 5012 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
a16c4536 5013 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
e498dac1 5014 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
4ca2d205 5015 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
e661a20b 5016 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
b3adda56 5017 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
00b8cadc 5018 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
54c36063
PL
5019 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1232";
5020 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1232";
aadceba8 5021 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
409b8398 5022 ::arg().set("max-qperq", "Maximum outgoing queries per query")="60";
c5950146 5023 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
7c3398aa 5024 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
78227847 5025 ::arg().set("max-udp-queries-per-round", "Maximum number of UDP queries processed per recvmsg() round, before returning back to normal processing")="10000";
c29d820c 5026 ::arg().set("protobuf-use-kernel-timestamp", "Compute the latency of queries in protobuf messages by using the timestamp set by the kernel when the query was received (when available)")="";
ee271fc4 5027 ::arg().set("distribution-pipe-buffer-size", "Size in bytes of the internal buffer of the pipe used by the distributor to pass incoming queries to a worker thread")="0";
a09a8ce0 5028
68e6df3c 5029 ::arg().set("include-dir","Include *.conf files from this directory")="";
d67620e4 5030 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
2332f42d 5031
5032 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
2e3d8a19 5033
d705aad9 5034 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
396f126e 5035 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
d705aad9 5036
72259676
RG
5037 std::string defaultBlacklistedStats = "cache-bytes, packetcache-bytes, special-memory-usage";
5038 for (size_t idx = 0; idx < 32; idx++) {
5039 defaultBlacklistedStats += ", ecs-v4-response-bits-" + std::to_string(idx + 1);
5040 }
5041 for (size_t idx = 0; idx < 128; idx++) {
5042 defaultBlacklistedStats += ", ecs-v6-response-bits-" + std::to_string(idx + 1);
5043 }
563517f3
RG
5044 ::arg().set("stats-api-blacklist", "List of statistics that are disabled when retrieving the complete list of statistics via the API")=defaultBlacklistedStats;
5045 ::arg().set("stats-carbon-blacklist", "List of statistics that are prevented from being exported via Carbon")=defaultBlacklistedStats;
5046 ::arg().set("stats-rec-control-blacklist", "List of statistics that are prevented from being exported via rec_control get-all")=defaultBlacklistedStats;
5047 ::arg().set("stats-snmp-blacklist", "List of statistics that are prevented from being exported via SNMP")=defaultBlacklistedStats;
d705aad9 5048
0735b17e 5049 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
d377bb54 5050 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
0735b17e 5051
8fd25133
RG
5052 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
5053
98d36505
RG
5054 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
5055
5cc8371b 5056 ::arg().set("xpf-allow-from","XPF information is only processed from these subnets")="";
59cb4a79 5057 ::arg().set("xpf-rr-code","XPF option code to use")="0";
5cc8371b 5058
5216ddcc
RG
5059 ::arg().set("proxy-protocol-from", "A Proxy Protocol header is only allowed from these subnets")="";
5060 ::arg().set("proxy-protocol-maximum-size", "The maximum size of a proxy protocol payload, including the TLV values")="512";
5061
ef3ee606
RG
5062 ::arg().set("dns64-prefix", "DNS64 prefix")="";
5063
58da9034 5064 ::arg().set("udp-source-port-min", "Minimum UDP port to bind on")="1024";
b47026fd
CHB
5065 ::arg().set("udp-source-port-max", "Maximum UDP port to bind on")="65535";
5066 ::arg().set("udp-source-port-avoid", "List of comma separated UDP port number to avoid")="11211";
e97cb679 5067 ::arg().set("rng", "Specify random number generator to use. Valid values are auto,sodium,openssl,getrandom,arc4random,urandom.")="auto";
d6f3fcfa 5068 ::arg().set("public-suffix-list-file", "Path to the Public Suffix List file, if any")="";
144040be 5069 ::arg().set("distribution-load-factor", "The load factor used when PowerDNS is distributing queries to worker threads")="0.0";
8949a3e0
OM
5070
5071 ::arg().setSwitch("qname-minimization", "Use Query Name Minimization")="yes";
d40a915b 5072 ::arg().setSwitch("nothing-below-nxdomain", "When an NXDOMAIN exists in cache for a name with fewer labels than the qname, send NXDOMAIN without doing a lookup (see RFC 8020)")="dnssec";
ba3d53d1 5073 ::arg().set("max-generate-steps", "Maximum number of $GENERATE steps when loading a zone from a file")="0";
359d6c17 5074 ::arg().set("cache-shards", "Number of shards in the record cache")="1024";
d40a915b 5075
af1377b7
NC
5076#ifdef NOD_ENABLED
5077 ::arg().set("new-domain-tracking", "Track newly observed domains (i.e. never seen before).")="no";
5078 ::arg().set("new-domain-log", "Log newly observed domains.")="yes";
5079 ::arg().set("new-domain-lookup", "Perform a DNS lookup newly observed domains as a subdomain of the configured domain")="";
5080 ::arg().set("new-domain-history-dir", "Persist new domain tracking data here to persist between restarts")=string(NODCACHEDIR)+"/nod";
5081 ::arg().set("new-domain-whitelist", "List of domains (and implicitly all subdomains) which will never be considered a new domain")="";
b78727c6 5082 ::arg().set("new-domain-db-size", "Size of the DB used to track new domains in terms of number of cells. Defaults to 67108864")="67108864";
ca2526f5 5083 ::arg().set("new-domain-pb-tag", "If protobuf is configured, the tag to use for messages containing newly observed domains. Defaults to 'pdns-nod'")="pdns-nod";
41c542ec
NC
5084 ::arg().set("unique-response-tracking", "Track unique responses (tuple of query name, type and RR).")="no";
5085 ::arg().set("unique-response-log", "Log unique responses")="yes";
5086 ::arg().set("unique-response-history-dir", "Persist unique response tracking data here to persist between restarts")=string(NODCACHEDIR)+"/udr";
b78727c6 5087 ::arg().set("unique-response-db-size", "Size of the DB used to track unique responses in terms of number of cells. Defaults to 67108864")="67108864";
ca2526f5 5088 ::arg().set("unique-response-pb-tag", "If protobuf is configured, the tag to use for messages containing unique DNS responses. Defaults to 'pdns-udr'")="pdns-udr";
af1377b7 5089#endif /* NOD_ENABLED */
2e3d8a19 5090 ::arg().setCmd("help","Provide a helpful message");
ba1a571d 5091 ::arg().setCmd("version","Print version string");
d5141417 5092 ::arg().setCmd("config","Output blank configuration");
8864bdf6 5093 ::arg().setDefaults();
e6a9dde5 5094 g_log.toConsole(Logger::Info);
2e3d8a19 5095 ::arg().laxParse(argc,argv); // do a lax parse
c75a6a9e 5096
2d733c0f
CH
5097 string configname=::arg()["config-dir"]+"/recursor.conf";
5098 if(::arg()["config-name"]!="") {
5099 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
5124de27 5100 s_programname+="-"+::arg()["config-name"];
2d733c0f
CH
5101 }
5102 cleanSlashes(configname);
5124de27 5103
5cc1ea1d 5104 if(!::arg().getCommands().empty()) {
94ea3c7b
PL
5105 cerr<<"Fatal: non-option";
5106 if (::arg().getCommands().size() > 1) {
5107 cerr<<"s";
5108 }
5109 cerr<<" (";
5110 bool first = true;
7af99dff 5111 for (const auto& c : ::arg().getCommands()) {
94ea3c7b
PL
5112 if (!first) {
5113 cerr<<", ";
5114 }
5115 first = false;
5116 cerr<<c;
5117 }
5118 cerr<<") on the command line, perhaps a '--setting=123' statement missed the '='?"<<endl;
5cc1ea1d
CH
5119 exit(99);
5120 }
5121
577cf284 5122 if(::arg().mustDo("config")) {
8864bdf6 5123 cout<<::arg().configstring(false, true);
577cf284
BH
5124 exit(0);
5125 }
5126
3ddb9247 5127 if(!::arg().file(configname.c_str()))
e6a9dde5 5128 g_log<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
c75a6a9e 5129
2e3d8a19 5130 ::arg().parse(argc,argv);
c836dc19 5131
2054afbb
CH
5132 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() ) {
5133 g_log<<Logger::Error<<"Using chroot and enabling the API is not possible"<<endl;
f0f3f0b0
PL
5134 exit(EXIT_FAILURE);
5135 }
5136
5137 if (::arg()["socket-dir"].empty()) {
5138 if (::arg()["chroot"].empty())
0524add9 5139 ::arg().set("socket-dir") = std::string(LOCALSTATEDIR) + "/pdns-recursor";
f0f3f0b0
PL
5140 else
5141 ::arg().set("socket-dir") = "/";
5142 }
5143
2e3d8a19 5144 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
562588a3 5145
b243ca3b
RG
5146 if(::arg().asNum("threads")==1) {
5147 if (::arg().mustDo("pdns-distributes-queries")) {
5148 g_log<<Logger::Warning<<"Only one thread, no need to distribute queries ourselves"<<endl;
5149 ::arg().set("pdns-distributes-queries")="no";
5150 }
5151 }
5152
5153 if(::arg().mustDo("pdns-distributes-queries") && ::arg().asNum("distributor-threads") <= 0) {
5154 g_log<<Logger::Warning<<"Asked to run with pdns-distributes-queries set but no distributor threads, raising to 1"<<endl;
5155 ::arg().set("distributor-threads")="1";
5156 }
5157
5158 if (!::arg().mustDo("pdns-distributes-queries")) {
5159 ::arg().set("distributor-threads")="0";
5160 }
61d74169 5161
2e3d8a19 5162 if(::arg().mustDo("help")) {
ff5ba4f9
WA
5163 cout<<"syntax:"<<endl<<endl;
5164 cout<<::arg().helpstring(::arg()["help"])<<endl;
5165 exit(0);
b636533b 5166 }
5e3de507 5167 if(::arg().mustDo("version")) {
ba1a571d 5168 showProductVersion();
3613a51c 5169 showBuildConfiguration();
67076869 5170 exit(0);
5e3de507 5171 }
b636533b 5172
359d6c17
OM
5173 s_RC = std::unique_ptr<MemRecursorCache>(new MemRecursorCache(::arg().asNum("cache-shards")));
5174
34162f8f 5175 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
f48d7b65 5176
34162f8f
CH
5177 if (logUrgency < Logger::Error)
5178 logUrgency = Logger::Error;
f48d7b65 5179 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
5180 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
5181 }
e6a9dde5
PL
5182 g_log.setLoglevel(logUrgency);
5183 g_log.toConsole(logUrgency);
34162f8f 5184
f7c1d4e3 5185 serviceMain(argc, argv);
288f4aa9 5186 }
3f81d239 5187 catch(PDNSException &ae) {
e6a9dde5 5188 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
22030c37 5189 ret=EXIT_FAILURE;
288f4aa9 5190 }
fdbf35ac 5191 catch(std::exception &e) {
e6a9dde5 5192 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
22030c37 5193 ret=EXIT_FAILURE;
288f4aa9
BH
5194 }
5195 catch(...) {
e6a9dde5 5196 g_log<<Logger::Error<<"any other exception in main: "<<endl;
22030c37 5197 ret=EXIT_FAILURE;
288f4aa9 5198 }
3ddb9247 5199
22030c37 5200 return ret;
288f4aa9 5201}