]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/pdns_recursor.cc
rec: Only log qname parsing errors when 'log-common-errors' is set
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
CommitLineData
288f4aa9 1/*
6edbf68a
PL
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
870a0fe4
AT
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
3e61e7f7 25
76473b92
KM
26#include <netdb.h>
27#include <sys/stat.h>
28#include <unistd.h>
f097141b 29#ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
b47026fd 30#include <boost/container/flat_set.hpp>
f097141b 31#endif
2470b36e 32#include "ws-recursor.hh"
c390b2da 33#include <thread>
519f5484 34#include "threadname.hh"
3ea54bf0 35#include "recpacketcache.hh"
3ddb9247 36#include "utility.hh"
51e2144e 37#include "dns_random.hh"
d1b28475
KM
38#ifdef HAVE_LIBSODIUM
39#include <sodium.h>
40#endif
3afde9b2 41#include "opensslsigners.hh"
288f4aa9
BH
42#include <iostream>
43#include <errno.h>
81859ba5 44#include <boost/static_assert.hpp>
288f4aa9
BH
45#include <map>
46#include <set>
97bb160b 47#include "recursor_cache.hh"
38c9ceaa 48#include "cachecleaner.hh"
288f4aa9 49#include <stdio.h>
c75a6a9e 50#include <signal.h>
288f4aa9 51#include <stdlib.h>
bb4bdbaf 52#include "misc.hh"
288f4aa9
BH
53#include "mtasker.hh"
54#include <utility>
288f4aa9
BH
55#include "arguments.hh"
56#include "syncres.hh"
88def049
BH
57#include <fcntl.h>
58#include <fstream>
3e61e7f7 59#include "sortlist.hh"
5c633640
BH
60#include "sstuff.hh"
61#include <boost/tuple/tuple.hpp>
62#include <boost/tuple/tuple_comparison.hpp>
72df400f 63#include <boost/shared_array.hpp>
7f1fa77d 64#include <boost/function.hpp>
5605c067 65#include <boost/algorithm/string.hpp>
8f7473d7 66#ifdef MALLOC_TRACE
67#include "malloctrace.hh"
68#endif
40a3dd64 69#include <netinet/tcp.h>
f12666f2 70#include "capabilities.hh"
ea634573
BH
71#include "dnsparser.hh"
72#include "dnswriter.hh"
73#include "dnsrecords.hh"
f814d7c8 74#include "zoneparser-tng.hh"
1d5b3ce6 75#include "rec_channel.hh"
aaacf7f2 76#include "logger.hh"
c8ddb7c2 77#include "iputils.hh"
09e6702a 78#include "mplexer.hh"
c038218b 79#include "config.h"
808c5ef7 80#include "lua-recursor4.hh"
ba1a571d 81#include "version.hh"
79332bff 82#include "responsestats.hh"
d67620e4 83#include "secpoll-recursor.hh"
c5c066bf 84#include "dnsname.hh"
644dd1da 85#include "filterpo.hh"
86#include "rpzloader.hh"
b3f0ed10 87#include "validate-recursor.hh"
f3c18728 88#include "rec-lua-conf.hh"
5c3b5e7f 89#include "ednsoptions.hh"
85c7ca75 90#include "gettime.hh"
d6f3fcfa 91#include "pubsuffix.hh"
af1377b7
NC
92#ifdef NOD_ENABLED
93#include "nod.hh"
94#endif /* NOD_ENABLED */
f3c18728 95
d9d3f9c1 96#include "rec-protobuf.hh"
d705aad9 97#include "rec-snmp.hh"
aa7929a3 98
6b6720de
PL
99#ifdef HAVE_SYSTEMD
100#include <systemd/sd-daemon.h>
101#endif
102
d187038c
RG
103#include "namespaces.hh"
104
d61aa945
RG
105#ifdef HAVE_PROTOBUF
106#include "uuid-utils.hh"
107#endif
108
5cc8371b
RG
109#include "xpf.hh"
110
d187038c
RG
111typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
112
f26bf547 113static thread_local std::shared_ptr<RecursorLua4> t_pdl;
b243ca3b 114static thread_local unsigned int t_id = 0;
f26bf547
RG
115static thread_local std::shared_ptr<Regex> t_traceRegex;
116static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
63341e8d 117#ifdef HAVE_PROTOBUF
3fe06137 118static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_protobufServers{nullptr};
b773359c 119static thread_local uint64_t t_protobufServersGeneration;
3fe06137 120static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_outgoingProtobufServers{nullptr};
b773359c 121static thread_local uint64_t t_outgoingProtobufServersGeneration;
63341e8d 122#endif /* HAVE_PROTOBUF */
f26bf547
RG
123
124thread_local std::unique_ptr<MT_t> MT; // the big MTasker
125thread_local std::unique_ptr<MemRecursorCache> t_RC;
126thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
3337c2f7 127thread_local FDMultiplexer* t_fdm{nullptr};
be9078b3 128thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes, t_bogusremotes;
66f2e6ad 129thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring, t_bogusqueryring;
f26bf547 130thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
af1377b7
NC
131#ifdef NOD_ENABLED
132thread_local std::shared_ptr<nod::NODDB> t_nodDBp;
41c542ec 133thread_local std::shared_ptr<nod::UniqueResponseDB> t_udrDBp;
af1377b7 134#endif /* NOD_ENABLED */
d187038c 135__thread struct timeval g_now; // timestamp, updated (too) frequently
d7dae798 136
b243ca3b
RG
137typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
138
d7dae798 139// for communicating with our threads
b243ca3b
RG
140// effectively readonly after startup
141struct RecThreadInfo
142{
143 struct ThreadPipeSet
144 {
145 int writeToThread{-1};
146 int readToThread{-1};
147 int writeFromThread{-1};
148 int readFromThread{-1};
149 int writeQueriesToThread{-1}; // this one is non-blocking
150 int readQueriesToThread{-1};
151 };
152
adb6cd72 153 /* FD corresponding to TCP sockets this thread is listening
c47f201b 154 on.
adb6cd72
RG
155 These FDs are also in deferredAdds when we have one
156 socket per listener, and in g_deferredAdds instead. */
157 std::set<int> tcpSockets;
b243ca3b
RG
158 /* FD corresponding to listening sockets if we have one socket per
159 listener (with reuseport), otherwise all listeners share the
160 same FD and g_deferredAdds is then used instead */
161 deferredAdd_t deferredAdds;
162 struct ThreadPipeSet pipes;
163 std::thread thread;
144040be
RG
164 MT_t* mt{nullptr};
165 uint64_t numberOfDistributedQueries{0};
b243ca3b
RG
166 /* handle the web server, carbon, statistics and the control channel */
167 bool isHandler{false};
168 /* accept incoming queries (and distributes them to the workers if pdns-distributes-queries is set) */
169 bool isListener{false};
170 /* process queries */
171 bool isWorker{false};
49a699c4 172};
810ff705 173
b243ca3b
RG
174/* first we have the handler thread, t_id == 0 (some other
175 helper threads like SNMP might have t_id == 0 as well)
176 then the distributor threads if any
177 and finally the workers */
178static std::vector<RecThreadInfo> s_threadInfos;
179/* without reuseport, all listeners share the same sockets */
180static deferredAdd_t g_deferredAdds;
faf580f5 181
d187038c
RG
182typedef vector<int> tcpListenSockets_t;
183typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
3ea54bf0 184
d187038c 185static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
d187038c 186static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
d187038c
RG
187static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
188static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
189static AtomicCounter counter;
9065eb05 190static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
f26bf547 191static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
5cc8371b 192static NetmaskGroup g_XPFAcl;
d187038c 193static size_t g_tcpMaxQueriesPerConn;
a5886e6a 194static size_t s_maxUDPQueriesPerRound;
d187038c
RG
195static uint64_t g_latencyStatSize;
196static uint32_t g_disthashseed;
197static unsigned int g_maxTCPPerClient;
d187038c 198static unsigned int g_maxMThreads;
b243ca3b 199static unsigned int g_numDistributorThreads;
d187038c
RG
200static unsigned int g_numWorkerThreads;
201static int g_tcpTimeout;
202static uint16_t g_udpTruncationThreshold;
59cb4a79 203static uint16_t g_xpfRRCode{0};
d187038c
RG
204static std::atomic<bool> statsWanted;
205static std::atomic<bool> g_quiet;
206static bool g_logCommonErrors;
207static bool g_anyToTcp;
b243ca3b 208static bool g_weDistributeQueries; // if true, 1 or more threads listen on the incoming query sockets and distribute them to workers
810ff705 209static bool g_reusePort{false};
00b8cadc 210static bool g_gettagNeedsEDNSOptions{false};
0ec489bf 211static time_t g_statisticsInterval;
9065eb05 212static bool g_useIncomingECS;
c29d820c 213static bool g_useKernelTimestamp;
a6f7f5fe 214std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
af1377b7
NC
215#ifdef NOD_ENABLED
216static bool g_nodEnabled;
217static DNSName g_nodLookupDomain;
218static bool g_nodLog;
219static SuffixMatchNode g_nodDomainWL;
ca2526f5 220static std::string g_nod_pbtag;
41c542ec
NC
221static bool g_udrEnabled;
222static bool g_udrLog;
ca2526f5 223static std::string g_udr_pbtag;
af1377b7 224#endif /* NOD_ENABLED */
f097141b 225#ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
bf6f28ca 226static boost::container::flat_set<uint16_t> s_avoidUdpSourcePorts;
f097141b
CHB
227#else
228static std::set<uint16_t> s_avoidUdpSourcePorts;
229#endif
bf6f28ca
CHB
230static uint16_t s_minUdpSourcePort;
231static uint16_t s_maxUdpSourcePort;
144040be 232static double s_balancingFactor;
49a699c4 233
b243ca3b 234RecursorControlChannel s_rcc; // only active in the handler thread
d187038c 235RecursorStats g_stats;
2d733c0f 236string s_programname="pdns_recursor";
d187038c 237string s_pidfname;
c1c29961 238bool g_lowercaseOutgoing;
bf19ccfd 239unsigned int g_networkTimeoutMsec;
d187038c
RG
240unsigned int g_numThreads;
241uint16_t g_outgoingEDNSBufsize;
98d36505 242bool g_logRPZChanges{false};
c3828c03 243
559b6c93
PL
244// Used in the Syncres to not throttle certain servers
245GlobalStateHolder<SuffixMatchNode> g_dontThrottleNames;
246GlobalStateHolder<NetmaskGroup> g_dontThrottleNetmasks;
247
12cd44ee 248#define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
2fe3354d 249#define LOCAL_NETS_INVERSE "!127.0.0.0/8, !10.0.0.0/8, !100.64.0.0/10, !169.254.0.0/16, !192.168.0.0/16, !172.16.0.0/12, !::1/128, !fc00::/7, !fe80::/10"
12cd44ee 250// Bad Nets taken from both:
3ddb9247 251// http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
12cd44ee 252// and
253// http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
254// where such a network may not be considered a valid destination
255#define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
256#define DONT_QUERY LOCAL_NETS ", " BAD_NETS
49a699c4 257
d7dae798 258//! used to send information to a newborn mthread
ea634573 259struct DNSComboWriter {
08b02366 260 DNSComboWriter(const std::string& query, const struct timeval& now): d_mdp(true, query), d_now(now), d_query(query)
2749c3fe
RG
261 {
262 }
5cc8371b 263
08b02366 264 DNSComboWriter(const std::string& query, const struct timeval& now, std::vector<std::string>&& policyTags, LuaContext::LuaObject&& data): d_mdp(true, query), d_now(now), d_query(query), d_policyTags(std::move(policyTags)), d_data(std::move(data))
5164bac3
RG
265 {
266 }
267
5cc8371b
RG
268 void setRemote(const ComboAddress& sa)
269 {
270 d_remote=sa;
271 }
272
273 void setSource(const ComboAddress& sa)
ea634573 274 {
5cc8371b 275 d_source=sa;
ea634573
BH
276 }
277
b71b60ee 278 void setLocal(const ComboAddress& sa)
279 {
280 d_local=sa;
281 }
282
5cc8371b
RG
283 void setDestination(const ComboAddress& sa)
284 {
285 d_destination=sa;
286 }
b71b60ee 287
ea634573
BH
288 void setSocket(int sock)
289 {
290 d_socket=sock;
291 }
a1754c6a
BH
292
293 string getRemote() const
294 {
5cc8371b
RG
295 if (d_source == d_remote) {
296 return d_source.toStringWithPort();
297 }
298 return d_source.toStringWithPort() + " (proxied by " + d_remote.toStringWithPort() + ")";
a1754c6a
BH
299 }
300
5cc8371b 301 MOADNSParser d_mdp;
c9e9e5e0 302 struct timeval d_now;
5cc8371b
RG
303 /* Remote client, might differ from d_source
304 in case of XPF, in which case d_source holds
305 the IP of the client and d_remote of the proxy
306 */
307 ComboAddress d_remote;
308 ComboAddress d_source;
309 /* Destination address, might differ from
310 d_destination in case of XPF, in which case
311 d_destination holds the IP of the proxy and
312 d_local holds our own. */
313 ComboAddress d_local;
314 ComboAddress d_destination;
aa7929a3
RG
315#ifdef HAVE_PROTOBUF
316 boost::uuids::uuid d_uuid;
67e31ebe 317 string d_requestorId;
590388d2 318 string d_deviceId;
ea4aa152 319 string d_deviceName;
c29d820c 320 struct timeval d_kernelTimestamp{0,0};
aa7929a3 321#endif
08b02366 322 std::string d_query;
5164bac3
RG
323 std::vector<std::string> d_policyTags;
324 LuaContext::LuaObject d_data;
b40562da 325 EDNSSubnetOpts d_ednssubnet;
5164bac3 326 shared_ptr<TCPConnection> d_tcpConnection;
ea634573 327 int d_socket;
b673817a 328 unsigned int d_tag{0};
e9f63d47 329 uint32_t d_qhash{0};
70fb28d9 330 uint32_t d_ttlCap{std::numeric_limits<uint32_t>::max()};
08b02366
RG
331 uint16_t d_ecsBegin{0};
332 uint16_t d_ecsEnd{0};
70fb28d9 333 bool d_variable{false};
5164bac3
RG
334 bool d_ecsFound{false};
335 bool d_ecsParsed{false};
336 bool d_tcp;
ea634573
BH
337};
338
06857845
RG
339MT_t* getMT()
340{
341 return MT ? MT.get() : nullptr;
342}
ea634573 343
288f4aa9
BH
344ArgvMap &arg()
345{
346 static ArgvMap theArg;
347 return theArg;
348}
4ef015cd 349
8fb594ba 350unsigned int getRecursorThreadId()
b4015453 351{
30da2030 352 return t_id;
b4015453 353}
09e6702a 354
30ee601a
RG
355int getMTaskerTID()
356{
357 return MT->getTid();
358}
359
b243ca3b
RG
360static bool isDistributorThread()
361{
362 if (t_id == 0) {
363 return false;
364 }
365
366 return g_weDistributeQueries && s_threadInfos.at(t_id).isListener;
367}
368
369static bool isHandlerThread()
370{
371 if (t_id == 0) {
372 return true;
373 }
374
375 return s_threadInfos.at(t_id).isHandler;
376}
377
d187038c 378static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 379
50c81227 380// -1 is error, 0 is timeout, 1 is success
3ddb9247 381int asendtcp(const string& data, Socket* sock)
5c633640
BH
382{
383 PacketID pident;
384 pident.sock=sock;
385 pident.outMSG=data;
3ddb9247 386
bb4bdbaf 387 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
50c81227 388 string packet;
5c633640 389
5b0ddd18 390 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
23db0a09 391
9170fbaf 392 if(!ret || ret==-1) { // timeout
bb4bdbaf 393 t_fdm->removeWriteFD(sock->getHandle());
5c633640 394 }
50c81227
BH
395 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
396 return -1;
397 }
9170fbaf 398 return ret;
5c633640
BH
399}
400
d187038c 401static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 402
9170fbaf 403// -1 is error, 0 is timeout, 1 is success
a683e8bd 404int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
288f4aa9 405{
50c81227 406 data.clear();
5c633640
BH
407 PacketID pident;
408 pident.sock=sock;
409 pident.inNeeded=len;
825fa717 410 pident.inIncompleteOkay=incompleteOkay;
bb4bdbaf 411 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
5c633640 412
bb4bdbaf 413 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
9170fbaf 414 if(!ret || ret==-1) { // timeout
bb4bdbaf 415 t_fdm->removeReadFD(sock->getHandle());
288f4aa9 416 }
50c81227
BH
417 else if(data.empty()) {// error, EOF or other
418 return -1;
419 }
420
9170fbaf 421 return ret;
288f4aa9
BH
422}
423
d187038c 424static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
4465e941 425{
fba1e944 426 PacketID pident=*any_cast<PacketID>(&var);
4465e941 427 char resp[512];
7c77ce63
RG
428 ComboAddress fromaddr;
429 socklen_t addrlen=sizeof(fromaddr);
430
431 ssize_t ret=recvfrom(fd, resp, sizeof(resp), 0, (sockaddr *)&fromaddr, &addrlen);
432 if (fromaddr != pident.remote) {
e6a9dde5 433 g_log<<Logger::Notice<<"Response received from the wrong remote host ("<<fromaddr.toStringWithPort()<<" instead of "<<pident.remote.toStringWithPort()<<"), discarding"<<endl;
7c77ce63
RG
434
435 }
436
4465e941 437 t_fdm->removeReadFD(fd);
438 if(ret >= 0) {
a683e8bd 439 string data(resp, (size_t) ret);
fba1e944 440 MT->sendEvent(pident, &data);
4465e941 441 }
442 else {
fba1e944 443 string empty;
444 MT->sendEvent(pident, &empty);
445 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
4465e941 446 }
447}
fba1e944 448string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
4465e941 449{
4465e941 450 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
451 s.setNonBlocking();
452 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
453
454 s.bind(local);
455 s.connect(dest);
4465e941 456 s.send(query);
457
458 PacketID pident;
459 pident.sock=&s;
7c77ce63 460 pident.remote=dest;
4465e941 461 pident.type=0;
fba1e944 462 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
4465e941 463
464 string data;
fba1e944 465
4465e941 466 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
fba1e944 467
4465e941 468 if(!ret || ret==-1) { // timeout
4465e941 469 t_fdm->removeReadFD(s.getHandle());
470 }
471 else if(data.empty()) {// error, EOF or other
fba1e944 472 // we could special case this
4465e941 473 return data;
474 }
4465e941 475 return data;
476}
477
d7dae798 478//! pick a random query local address
1652a63e 479ComboAddress getQueryLocalAddress(int family, uint16_t port)
5a38281c 480{
1652a63e 481 ComboAddress ret;
5a38281c 482 if(family==AF_INET) {
3ddb9247 483 if(g_localQueryAddresses4.empty())
1652a63e 484 ret = g_local4;
3ddb9247 485 else
1652a63e
BH
486 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
487 ret.sin4.sin_port = htons(port);
5a38281c
BH
488 }
489 else {
490 if(g_localQueryAddresses6.empty())
1652a63e
BH
491 ret = g_local6;
492 else
493 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
3ddb9247 494
1652a63e 495 ret.sin6.sin6_port = htons(port);
5a38281c 496 }
1652a63e 497 return ret;
5a38281c 498}
4ef015cd 499
d187038c 500static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
09e6702a 501
d187038c 502static void setSocketBuffer(int fd, int optname, uint32_t size)
d7dae798
BH
503{
504 uint32_t psize=0;
505 socklen_t len=sizeof(psize);
3ddb9247 506
d7dae798 507 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
e6a9dde5 508 g_log<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
3ddb9247 509 return;
d7dae798
BH
510 }
511
512 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
e6a9dde5 513 g_log<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
d7dae798
BH
514}
515
516
517static void setSocketReceiveBuffer(int fd, uint32_t size)
518{
519 setSocketBuffer(fd, SO_RCVBUF, size);
520}
521
522static void setSocketSendBuffer(int fd, uint32_t size)
523{
524 setSocketBuffer(fd, SO_SNDBUF, size);
525}
526
527
4ef015cd
BH
528// you can ask this class for a UDP socket to send a query from
529// this socket is not yours, don't even think about deleting it
530// but after you call 'returnSocket' on it, don't assume anything anymore
531class UDPClientSocks
532{
4ef015cd 533 unsigned int d_numsocks;
4ef015cd 534public:
e2642526 535 UDPClientSocks() : d_numsocks(0)
4ef015cd
BH
536 {
537 }
538
996c89cc 539 typedef set<int> socks_t;
4ef015cd
BH
540 socks_t d_socks;
541
2ee280cf 542 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
d8f6d49f 543 int getSocket(const ComboAddress& toaddr, int* fd)
4ef015cd 544 {
d8f6d49f
BH
545 *fd=makeClientSocket(toaddr.sin4.sin_family);
546 if(*fd < 0) // temporary error - receive exception otherwise
2ee280cf 547 return -2;
d8f6d49f
BH
548
549 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
550 int err = errno;
41ff43f8 551 // returnSocket(*fd);
a7b68ae7
RG
552 try {
553 closesocket(*fd);
554 }
555 catch(const PDNSException& e) {
e6a9dde5 556 g_log<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
a7b68ae7
RG
557 }
558
d8f6d49f 559 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
4957a608 560 return -2;
998a4334 561 return -1;
d8f6d49f 562 }
998a4334 563
d8f6d49f 564 d_socks.insert(*fd);
998a4334 565 d_numsocks++;
d8f6d49f 566 return 0;
4ef015cd
BH
567 }
568
095c3045
BH
569 void returnSocket(int fd)
570 {
571 socks_t::iterator i=d_socks.find(fd);
34801ab1 572 if(i==d_socks.end()) {
335da0ba 573 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd)+") not in the pool");
34801ab1 574 }
bb4bdbaf 575 returnSocketLocked(i);
095c3045
BH
576 }
577
4ef015cd 578 // return a socket to the pool, or simply erase it
bb4bdbaf 579 void returnSocketLocked(socks_t::iterator& i)
4ef015cd 580 {
600fc20b 581 if(i==d_socks.end()) {
3f81d239 582 throw PDNSException("Trying to return a socket not in the pool");
600fc20b 583 }
80baf329 584 try {
bb4bdbaf 585 t_fdm->removeReadFD(*i);
80baf329
BH
586 }
587 catch(FDMultiplexerException& e) {
bb4bdbaf 588 // we sometimes return a socket that has not yet been assigned to t_fdm
80baf329 589 }
a7b68ae7
RG
590 try {
591 closesocket(*i);
592 }
593 catch(const PDNSException& e) {
e6a9dde5 594 g_log<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
a7b68ae7 595 }
3ddb9247 596
998a4334
BH
597 d_socks.erase(i++);
598 --d_numsocks;
4ef015cd 599 }
d8f6d49f
BH
600
601 // returns -1 for errors which might go away, throws for ones that won't
bb4bdbaf 602 static int makeClientSocket(int family)
d8f6d49f 603 {
a683e8bd 604 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
42c235e5 605
d8f6d49f
BH
606 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
607 return ret;
3ddb9247
PD
608
609 if(ret<0)
335da0ba 610 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
36855b53 611
7eb73ffa 612 // setCloseOnExec(ret); // we're not going to exec
5a38281c 613
d8f6d49f 614 int tries=10;
3aa91c3e 615 ComboAddress sin;
d8f6d49f 616 while(--tries) {
1652a63e 617 uint16_t port;
3ddb9247 618
d8f6d49f 619 if(tries==1) // fall back to kernel 'random'
4957a608 620 port = 0;
bf6f28ca
CHB
621 else {
622 do {
623 port = s_minUdpSourcePort + dns_random(s_maxUdpSourcePort - s_minUdpSourcePort + 1);
624 }
625 while (s_avoidUdpSourcePorts.count(port));
626 }
5a38281c 627
3aa91c3e 628 sin=getQueryLocalAddress(family, port); // does htons for us
5a38281c 629
3ddb9247 630 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
4957a608 631 break;
d8f6d49f
BH
632 }
633 if(!tries)
3aa91c3e 634 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
3ddb9247 635
29bb743c 636 setReceiveSocketErrors(ret, family);
3897b9e1 637 setNonBlocking(ret);
d8f6d49f
BH
638 return ret;
639 }
49a699c4
BH
640};
641
f26bf547 642static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
4ef015cd 643
288f4aa9 644/* these two functions are used by LWRes */
34801ab1 645// -2 is OS error, -1 is error that depends on the remote, > 0 is success
a683e8bd 646int asendto(const char *data, size_t len, int flags,
3ddb9247 647 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
288f4aa9 648{
34801ab1
BH
649
650 PacketID pident;
787e5eab
BH
651 pident.domain = domain;
652 pident.remote = toaddr;
653 pident.type = qtype;
34801ab1
BH
654
655 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
656 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
657
658 for(; chain.first != chain.second; chain.first++) {
659 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
e27e91a8 660 /*
4665c31e
BH
661 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
662 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
4957a608 663 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
e27e91a8 664 */
34801ab1
BH
665 chain.first->key.chain.insert(id); // we can chain
666 *fd=-1; // gets used in waitEvent / sendEvent later on
667 return 1;
668 }
669 }
670
49a699c4 671 int ret=t_udpclientsocks->getSocket(toaddr, fd);
d8f6d49f
BH
672 if(ret < 0)
673 return ret;
34801ab1 674
998a4334
BH
675 pident.fd=*fd;
676 pident.id=id;
3ddb9247 677
bb4bdbaf
BH
678 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
679 ret = send(*fd, data, len, 0);
680
5b0ddd18 681 int tmp = errno;
bb4bdbaf 682
7302ed0a 683 if(ret < 0)
49a699c4 684 t_udpclientsocks->returnSocket(*fd);
bb4bdbaf 685
5b0ddd18 686 errno = tmp; // this is for logging purposes only
7302ed0a 687 return ret;
288f4aa9
BH
688}
689
9170fbaf 690// -1 is error, 0 is timeout, 1 is success
f128d20d 691int arecvfrom(std::string& packet, int flags, const ComboAddress& fromaddr, size_t *d_len,
c5c066bf 692 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
288f4aa9 693{
0d5f0a9f 694 static optional<unsigned int> nearMissLimit;
3ddb9247 695 if(!nearMissLimit)
0d5f0a9f
BH
696 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
697
288f4aa9 698 PacketID pident;
4ef015cd 699 pident.fd=fd;
288f4aa9 700 pident.id=id;
0d5f0a9f 701 pident.domain=domain;
787e5eab 702 pident.type = qtype;
996c89cc 703 pident.remote=fromaddr;
b636533b 704
5b0ddd18 705 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
34801ab1 706
9170fbaf 707 if(ret > 0) {
996c89cc 708 if(packet.empty()) // means "error"
3ddb9247 709 return -1;
998a4334 710
a683e8bd 711 *d_len=packet.size();
f128d20d 712
0d5f0a9f 713 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
e6a9dde5 714 g_log<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
0d5f0a9f 715 g_stats.spoofCount++;
35ce8576
BH
716 return -1;
717 }
288f4aa9 718 }
09e6702a 719 else {
34801ab1 720 if(fd >= 0)
49a699c4 721 t_udpclientsocks->returnSocket(fd);
09e6702a 722 }
9170fbaf 723 return ret;
288f4aa9
BH
724}
725
88def049
BH
726static void writePid(void)
727{
191f2e47 728 if(!::arg().mustDo("write-pid"))
729 return;
18e7758c 730 ofstream of(s_pidfname.c_str(), std::ios_base::app);
88def049 731 if(of)
705f31ae 732 of<< Utility::getpid() <<endl;
88def049 733 else
e6a9dde5 734 g_log<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
88def049
BH
735}
736
2749c3fe 737TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : data(2, 0), d_remote(addr), d_fd(fd)
3ddb9247
PD
738{
739 ++s_currentConnections;
cd989c87 740 (*t_tcpClientCounts)[d_remote]++;
0e408828 741}
cd989c87
BH
742
743TCPConnection::~TCPConnection()
0e408828 744{
a7b68ae7
RG
745 try {
746 if(closesocket(d_fd) < 0)
e6a9dde5 747 g_log<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
a7b68ae7
RG
748 }
749 catch(const PDNSException& e) {
e6a9dde5 750 g_log<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
a7b68ae7
RG
751 }
752
3ddb9247 753 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
cd989c87 754 t_tcpClientCounts->erase(d_remote);
1bc9e6bd 755 --s_currentConnections;
0e408828 756}
0e9d9ce2 757
3ddb9247 758AtomicCounter TCPConnection::s_currentConnections;
d187038c
RG
759
760static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
6dcd28c3 761
92011b8f 762// the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
d187038c 763static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
2cc13433 764{
92011b8f 765 if(packetsize > 1000 && t_largeanswerremotes)
766 t_largeanswerremotes->push_back(remote);
2cc13433
BH
767 switch(res) {
768 case RCode::ServFail:
92011b8f 769 if(t_servfailremotes) {
770 t_servfailremotes->push_back(remote);
5af86fdc 771 if(query && t_servfailqueryring) // packet cache
92011b8f 772 t_servfailqueryring->push_back(make_pair(*query, qtype));
773 }
2cc13433
BH
774 g_stats.servFails++;
775 break;
776 case RCode::NXDomain:
777 g_stats.nxDomains++;
778 break;
779 case RCode::NoError:
780 g_stats.noErrors++;
781 break;
782 }
783}
784
9a864da4 785static string makeLoginfo(const std::unique_ptr<DNSComboWriter>& dc)
a903b39c 786try
787{
5cc8371b 788 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->getRemote())+")";
a903b39c 789}
790catch(...)
791{
792 return "Exception making error message for exception";
793}
794
aa7929a3 795#ifdef HAVE_PROTOBUF
ea4aa152 796static void protobufLogQuery(uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId, const std::string& deviceName)
aa7929a3 797{
b773359c
RG
798 if (!t_protobufServers) {
799 return;
800 }
801
e1c8a4bb
RG
802 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
803 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
804 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
c165308b 805 message.setServerIdentity(SyncRes::s_serverID);
a94bc5d7 806 message.setEDNSSubnet(ednssubnet, ednssubnet.isIpv4() ? maskV4 : maskV6);
67e31ebe 807 message.setRequestorId(requestorId);
590388d2 808 message.setDeviceId(deviceId);
ea4aa152 809 message.setDeviceName(deviceName);
02b47f43 810
02b47f43 811 if (!policyTags.empty()) {
d9d3f9c1 812 message.setPolicyTags(policyTags);
02b47f43 813 }
aa7929a3 814
d9d3f9c1 815// cerr <<message.toDebugString()<<endl;
aa7929a3 816 std::string str;
d9d3f9c1 817 message.serialize(str);
b773359c
RG
818
819 for (auto& server : *t_protobufServers) {
820 server->queueData(str);
821 }
aa7929a3
RG
822}
823
b773359c 824static void protobufLogResponse(const RecProtoBufMessage& message)
aa7929a3 825{
b773359c
RG
826 if (!t_protobufServers) {
827 return;
828 }
829
d9d3f9c1 830// cerr <<message.toDebugString()<<endl;
aa7929a3 831 std::string str;
d9d3f9c1 832 message.serialize(str);
b773359c
RG
833
834 for (auto& server : *t_protobufServers) {
835 server->queueData(str);
836 }
aa7929a3
RG
837}
838#endif
839
53508135
PL
840/**
841 * Chases the CNAME provided by the PolicyCustom RPZ policy.
842 *
843 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
844 * @param qtype: The QType of the original query
845 * @param sr: A SyncRes
846 * @param res: An integer that will contain the RCODE of the lookup we do
847 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
848 */
d187038c 849static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
53508135
PL
850{
851 if (spoofed.d_type == QType::CNAME) {
30ee601a
RG
852 bool oldWantsRPZ = sr.getWantsRPZ();
853 sr.setWantsRPZ(false);
53508135 854 vector<DNSRecord> ans;
6da513b2 855 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, QClass::IN, ans);
53508135
PL
856 for (const auto& rec : ans) {
857 if(rec.d_place == DNSResourceRecord::ANSWER) {
858 ret.push_back(rec);
859 }
860 }
861 // Reset the RPZ state of the SyncRes
30ee601a 862 sr.setWantsRPZ(oldWantsRPZ);
53508135
PL
863 }
864}
865
70fb28d9 866static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, uint32_t ttlCap, const uint16_t maxAnswerSize)
97c6d7e5 867{
70fb28d9 868 pw.startRecord(rec.d_name, rec.d_type, (rec.d_ttl > ttlCap ? ttlCap : rec.d_ttl), rec.d_class, rec.d_place);
97c6d7e5
RG
869
870 if(rec.d_type != QType::OPT) // their TTL ain't real
871 minTTL = min(minTTL, rec.d_ttl);
872
873 rec.d_content->toPacket(pw);
874 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
875 pw.rollback();
876 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
877 pw.getHeader()->tc=1;
878 pw.truncate();
879 }
880 return false;
881 }
882
883 return true;
884}
885
63341e8d 886#ifdef HAVE_PROTOBUF
3fe06137 887static std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> startProtobufServers(const ProtobufExportConfig& config)
63341e8d 888{
3fe06137 889 auto result = std::make_shared<std::vector<std::unique_ptr<RemoteLogger>>>();
b773359c
RG
890
891 for (const auto& server : config.servers) {
892 try {
4c044372
RG
893 auto logger = make_unique<RemoteLogger>(server, config.timeout, 100*config.maxQueuedEntries, config.reconnectWaitTime, config.asyncConnect);
894 logger->setLogQueries(config.logQueries);
895 logger->setLogResponses(config.logResponses);
896 result->emplace_back(std::move(logger));
b773359c
RG
897 }
898 catch(const std::exception& e) {
899 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.what()<<endl;
900 }
901 catch(const PDNSException& e) {
902 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.reason<<endl;
903 }
63341e8d
RG
904 }
905
906 return result;
907}
908
909static bool checkProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
910{
911 if (!luaconfsLocal->protobufExportConfig.enabled) {
b773359c
RG
912 if (t_protobufServers) {
913 for (auto& server : *t_protobufServers) {
914 server->stop();
915 }
916 t_protobufServers.reset();
63341e8d
RG
917 }
918
919 return false;
920 }
921
922 /* if the server was not running, or if it was running according to a
923 previous configuration */
b773359c
RG
924 if (!t_protobufServers ||
925 t_protobufServersGeneration < luaconfsLocal->generation) {
63341e8d 926
b773359c
RG
927 if (t_protobufServers) {
928 for (auto& server : *t_protobufServers) {
929 server->stop();
930 }
63341e8d 931 }
b773359c 932 t_protobufServers.reset();
63341e8d 933
b773359c
RG
934 t_protobufServers = startProtobufServers(luaconfsLocal->protobufExportConfig);
935 t_protobufServersGeneration = luaconfsLocal->generation;
63341e8d
RG
936 }
937
938 return true;
939}
940
941static bool checkOutgoingProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
942{
943 if (!luaconfsLocal->outgoingProtobufExportConfig.enabled) {
b773359c
RG
944 if (t_outgoingProtobufServers) {
945 for (auto& server : *t_outgoingProtobufServers) {
946 server->stop();
947 }
63341e8d 948 }
b773359c 949 t_outgoingProtobufServers.reset();
63341e8d
RG
950
951 return false;
952 }
953
954 /* if the server was not running, or if it was running according to a
955 previous configuration */
b773359c
RG
956 if (!t_outgoingProtobufServers ||
957 t_outgoingProtobufServersGeneration < luaconfsLocal->generation) {
63341e8d 958
b773359c
RG
959 if (t_outgoingProtobufServers) {
960 for (auto& server : *t_outgoingProtobufServers) {
961 server->stop();
962 }
63341e8d 963 }
b773359c 964 t_outgoingProtobufServers.reset();
63341e8d 965
b773359c
RG
966 t_outgoingProtobufServers = startProtobufServers(luaconfsLocal->outgoingProtobufExportConfig);
967 t_outgoingProtobufServersGeneration = luaconfsLocal->generation;
63341e8d
RG
968 }
969
970 return true;
971}
972#endif /* HAVE_PROTOBUF */
973
af1377b7 974#ifdef NOD_ENABLED
41c542ec 975static bool nodCheckNewDomain(const DNSName& dname)
af1377b7
NC
976{
977 static const QType qt(QType::A);
978 static const uint16_t qc(QClass::IN);
41c542ec 979 bool ret = false;
af1377b7
NC
980 // First check the (sub)domain isn't whitelisted for NOD purposes
981 if (!g_nodDomainWL.check(dname)) {
982 // Now check the NODDB (note this is probablistic so can have FNs/FPs)
983 if (t_nodDBp && t_nodDBp->isNewDomain(dname)) {
984 if (g_nodLog) {
985 // This should probably log to a dedicated log file
986 g_log<<Logger::Notice<<"Newly observed domain nod="<<dname.toLogString()<<endl;
987 }
988 if (!(g_nodLookupDomain.isRoot())) {
989 // Send a DNS A query to <domain>.g_nodLookupDomain
990 DNSName qname = dname;
991 vector<DNSRecord> dummy;
992 qname += g_nodLookupDomain;
993 directResolve(qname, qt, qc, dummy);
994 }
41c542ec 995 ret = true;
af1377b7
NC
996 }
997 }
41c542ec 998 return ret;
af1377b7
NC
999}
1000
1001static void nodAddDomain(const DNSName& dname)
1002{
1003 // Don't bother adding domains on the nod whitelist
1004 if (!g_nodDomainWL.check(dname)) {
1005 if (t_nodDBp) {
1006 // This keeps the nod info up to date
1007 t_nodDBp->addDomain(dname);
1008 }
1009 }
1010}
41c542ec
NC
1011
1012static bool udrCheckUniqueDNSRecord(const DNSName& dname, uint16_t qtype, const DNSRecord& record)
1013{
1014 bool ret = false;
1015 if (record.d_place == DNSResourceRecord::ANSWER ||
1016 record.d_place == DNSResourceRecord::ADDITIONAL) {
1017 // Create a string that represent a triplet of (qname, qtype and RR[type, name, content])
1018 std::stringstream ss;
1019 ss << dname.toDNSStringLC() << ":" << qtype << ":" << qtype << ":" << record.d_type << ":" << record.d_name.toDNSStringLC() << ":" << record.d_content->getZoneRepresentation();
1020 if (t_udrDBp && t_udrDBp->isUniqueResponse(ss.str())) {
ff4d391d
NC
1021 if (g_udrLog) {
1022 // This should also probably log to a dedicated file.
1023 g_log<<Logger::Notice<<"Unique response observed: qname="<<dname.toLogString()<<" qtype="<<QType(qtype).getName()<< " rrtype=" << QType(record.d_type).getName() << " rrname=" << record.d_name.toLogString() << " rrcontent=" << record.d_content->getZoneRepresentation() << endl;
41c542ec
NC
1024 }
1025 ret = true;
1026 }
1027 }
1028 return ret;
1029}
af1377b7
NC
1030#endif /* NOD_ENABLED */
1031
d187038c 1032static void startDoResolve(void *p)
288f4aa9 1033{
9a864da4 1034 auto dc=std::unique_ptr<DNSComboWriter>(reinterpret_cast<DNSComboWriter*>(p));
288f4aa9 1035 try {
5af86fdc
RG
1036 if (t_queryring)
1037 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
92011b8f 1038
32015748 1039 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
7f7b8d55 1040 EDNSOpts edo;
5164bac3 1041 std::vector<pair<uint16_t, string> > ednsOpts;
eb9444be 1042 bool variableAnswer = dc->d_variable;
8e079f3a 1043 bool haveEDNS=false;
ca2526f5
NC
1044#ifdef NOD_ENABLED
1045 bool hasUDR = false;
1046#endif /* NOD_ENABLED */
f1db0de2
PL
1047 DNSPacketWriter::optvect_t returnedEdnsOptions; // Here we stuff all the options for the return packet
1048 uint8_t ednsExtRCode = 0;
8e079f3a 1049 if(getEDNSOpts(dc->d_mdp, &edo)) {
f1db0de2
PL
1050 haveEDNS=true;
1051 if (edo.d_version != 0) {
1052 ednsExtRCode = ERCode::BADVERS;
1053 }
1054
32015748
RG
1055 if(!dc->d_tcp) {
1056 /* rfc6891 6.2.3:
1057 "Values lower than 512 MUST be treated as equal to 512."
1058 */
1059 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
1060 }
5164bac3 1061 ednsOpts = edo.d_options;
3af35968 1062 maxanswersize -= 11; // EDNS header size
b40562da 1063
1f691b94
PL
1064 for (const auto& o : edo.d_options) {
1065 if (o.first == EDNSOptionCode::ECS && g_useIncomingECS && !dc->d_ecsParsed) {
1066 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
1067 } else if (o.first == EDNSOptionCode::NSID) {
f1db0de2 1068 const static string mode_server_id = ::arg()["server-id"];
8a42919a
PL
1069 if(mode_server_id != "disabled" && !mode_server_id.empty() &&
1070 maxanswersize > (2 + 2 + mode_server_id.size())) {
f1db0de2
PL
1071 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::NSID, mode_server_id));
1072 variableAnswer = true; // Can't packetcache an answer with NSID
1073 // Option Code and Option Length are both 2
1074 maxanswersize -= 2 + 2 + mode_server_id.size();
1075 }
b40562da
RG
1076 }
1077 }
10321a98 1078 }
b40562da
RG
1079 /* perhaps there was no EDNS or no ECS but by now we looked */
1080 dc->d_ecsParsed = true;
e325f20c 1081 vector<DNSRecord> ret;
ea634573 1082 vector<uint8_t> packet;
b23b8614 1083
ad42489c 1084 auto luaconfsLocal = g_luaconfs.getLocal();
b8470add
PL
1085 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
1086 bool wantsRPZ(true);
1fbc6dc5 1087 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
f1c7929a 1088 bool logResponse = false;
aa7929a3 1089#ifdef HAVE_PROTOBUF
63341e8d 1090 if (checkProtobufExport(luaconfsLocal)) {
b773359c 1091 logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
5cc8371b 1092 Netmask requestorNM(dc->d_source, dc->d_source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
e1c8a4bb 1093 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
0bd2e252 1094 pbMessage = RecProtoBufMessage(RecProtoBufMessage::Response, dc->d_uuid, &requestor, &dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass, dc->d_mdp.d_header.id, dc->d_tcp, 0);
c165308b 1095 pbMessage->setServerIdentity(SyncRes::s_serverID);
d362f7c1 1096 pbMessage->setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
d9d3f9c1
RG
1097 }
1098#endif /* HAVE_PROTOBUF */
ad42489c 1099
3ddb9247 1100 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
ea634573
BH
1101
1102 pw.getHeader()->aa=0;
1103 pw.getHeader()->ra=1;
c154c8a4 1104 pw.getHeader()->qr=1;
bb4bdbaf 1105 pw.getHeader()->tc=0;
ea634573 1106 pw.getHeader()->id=dc->d_mdp.d_header.id;
10321a98 1107 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
57769f13 1108 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
ea634573 1109
70fb28d9
RG
1110 /* This is the lowest TTL seen in the records of the response,
1111 so we can't cache it for longer than this value.
1112 If we have a TTL cap, this value can't be larger than the
1113 cap no matter what. */
1114 uint32_t minTTL = dc->d_ttlCap;
904d3219
PD
1115
1116 SyncRes sr(dc->d_now);
0c43f455 1117
2e921ec6 1118 bool DNSSECOK=false;
3457a2a0 1119 if(t_pdl) {
f26bf547 1120 sr.setLuaEngine(t_pdl);
3457a2a0 1121 }
9eec8c98 1122 if(g_dnssecmode != DNSSECMode::Off) {
30ee601a 1123 sr.setDoDNSSEC(true);
9eec8c98
PL
1124
1125 // Does the requestor want DNSSEC records?
d6c335ab 1126 if(edo.d_extFlags & EDNSOpts::DNSSECOK) {
9eec8c98
PL
1127 DNSSECOK=true;
1128 g_stats.dnssecQueries++;
1129 }
88c33dca
RG
1130 if (dc->d_mdp.d_header.cd) {
1131 /* Per rfc6840 section 5.9, "When processing a request with
1132 the Checking Disabled (CD) bit set, a resolver SHOULD attempt
1133 to return all response data, even data that has failed DNSSEC
1134 validation. */
1135 ++g_stats.dnssecCheckDisabledQueries;
1136 }
1137 if (dc->d_mdp.d_header.ad) {
1138 /* Per rfc6840 section 5.7, "the AD bit in a query as a signal
1139 indicating that the requester understands and is interested in the
1140 value of the AD bit in the response. This allows a requester to
1141 indicate that it understands the AD bit without also requesting
1142 DNSSEC data via the DO bit. */
1143 ++g_stats.dnssecAuthenticDataQueries;
1144 }
9eec8c98
PL
1145 } else {
1146 // Ignore the client-set CD flag
1147 pw.getHeader()->cd=0;
5b9853c9 1148 }
0c43f455
RG
1149 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
1150
4898a348 1151#ifdef HAVE_PROTOBUF
30ee601a 1152 sr.setInitialRequestId(dc->d_uuid);
b773359c 1153 sr.setOutgoingProtobufServers(t_outgoingProtobufServers);
4898a348 1154#endif
0c43f455 1155
2fe3354d 1156 sr.setQuerySource(dc->d_remote, g_useIncomingECS && !dc->d_ednssubnet.source.empty() ? boost::optional<const EDNSSubnetOpts&>(dc->d_ednssubnet) : boost::none);
57769f13 1157
904d3219 1158 bool tracedQuery=false; // we could consider letting Lua know about this too
9fc36e90 1159 bool shouldNotValidate = false;
904d3219 1160
ef3b6cd7
RG
1161 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
1162 int res = RCode::NoError;
1f1ca368 1163 DNSFilterEngine::Policy appliedPolicy;
6da513b2 1164 std::vector<DNSRecord> spoofed;
f1c7929a 1165 RecursorLua4::DNSQuestion dq(dc->d_source, dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ, logResponse);
d6c335ab 1166 dq.ednsFlags = &edo.d_extFlags;
5164bac3 1167 dq.ednsOptions = &ednsOpts;
6e505c5e
RG
1168 dq.tag = dc->d_tag;
1169 dq.discardedPolicies = &sr.d_discardedPolicies;
1170 dq.policyTags = &dc->d_policyTags;
1171 dq.appliedPolicy = &appliedPolicy;
1172 dq.currentRecords = &ret;
1173 dq.dh = &dc->d_mdp.d_header;
05c74122 1174 dq.data = dc->d_data;
67e31ebe
RG
1175#ifdef HAVE_PROTOBUF
1176 dq.requestorId = dc->d_requestorId;
590388d2 1177 dq.deviceId = dc->d_deviceId;
ea4aa152 1178 dq.deviceName = dc->d_deviceName;
67e31ebe 1179#endif
ba21fcfe 1180
6cf96227
PL
1181 if(ednsExtRCode != 0) {
1182 goto sendit;
1183 }
1184
e661a20b 1185 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
56b4d21b
PD
1186 pw.getHeader()->tc = 1;
1187 res = 0;
1188 variableAnswer = true;
e661a20b
PD
1189 goto sendit;
1190 }
1191
f26bf547 1192 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
77499b05
BH
1193 sr.setLogMode(SyncRes::Store);
1194 tracedQuery=true;
1195 }
3ddb9247 1196
8f7473d7 1197
976ec823 1198 if(!g_quiet || tracedQuery) {
e6a9dde5 1199 g_log<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
976ec823 1200 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
b40562da 1201 if(!dc->d_ednssubnet.source.empty()) {
e6a9dde5 1202 g_log<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
6e986f5e 1203 }
e6a9dde5 1204 g_log<<endl;
976ec823 1205 }
c75a6a9e 1206
fededf47 1207 sr.setId(MT->getTid());
67828389 1208 if(!dc->d_mdp.d_header.rd)
c836dc19
BH
1209 sr.setCacheOnly();
1210
f26bf547
RG
1211 if (t_pdl) {
1212 t_pdl->prerpz(dq, res);
0a273054
RG
1213 }
1214
db486de5 1215 // Check if the query has a policy attached to it
0a273054 1216 if (wantsRPZ) {
5cc8371b 1217 appliedPolicy = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_source, sr.d_discardedPolicies);
0a273054 1218 }
644dd1da 1219
54be222b 1220 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
f26bf547 1221 if(!t_pdl || !t_pdl->preresolve(dq, res)) {
b8470add 1222
30ee601a 1223 sr.setWantsRPZ(wantsRPZ);
b8470add
PL
1224 if(wantsRPZ) {
1225 switch(appliedPolicy.d_kind) {
1226 case DNSFilterEngine::PolicyKind::NoAction:
1227 break;
1228 case DNSFilterEngine::PolicyKind::Drop:
1229 g_stats.policyDrops++;
7a25883a 1230 g_stats.policyResults[appliedPolicy.d_kind]++;
b8470add
PL
1231 return;
1232 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1233 g_stats.policyResults[appliedPolicy.d_kind]++;
1234 res=RCode::NXDomain;
1235 goto haveAnswer;
1236 case DNSFilterEngine::PolicyKind::NODATA:
1237 g_stats.policyResults[appliedPolicy.d_kind]++;
1238 res=RCode::NoError;
db486de5 1239 goto haveAnswer;
b8470add
PL
1240 case DNSFilterEngine::PolicyKind::Custom:
1241 g_stats.policyResults[appliedPolicy.d_kind]++;
1242 res=RCode::NoError;
6da513b2
RG
1243 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1244 for (const auto& dr : spoofed) {
1245 ret.push_back(dr);
1246 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1247 }
b8470add
PL
1248 goto haveAnswer;
1249 case DNSFilterEngine::PolicyKind::Truncate:
1250 if(!dc->d_tcp) {
1251 g_stats.policyResults[appliedPolicy.d_kind]++;
1252 res=RCode::NoError;
1253 pw.getHeader()->tc=1;
1254 goto haveAnswer;
1255 }
1256 break;
1257 }
db486de5
PL
1258 }
1259
b8470add 1260 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
44971ca0
PD
1261 try {
1262 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
9fc36e90 1263 shouldNotValidate = sr.wasOutOfBand();
44971ca0
PD
1264 }
1265 catch(ImmediateServFailException &e) {
854d44e3 1266 if(g_logCommonErrors)
e6a9dde5 1267 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
44971ca0
PD
1268 res = RCode::ServFail;
1269 }
4485aa35 1270
1921a4c2
RG
1271 dq.validationState = sr.getValidationState();
1272
b8470add
PL
1273 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
1274 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
1275 appliedPolicy = sr.d_appliedPolicy;
1276 g_stats.policyResults[appliedPolicy.d_kind]++;
1277 switch(appliedPolicy.d_kind) {
1278 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
1279 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
1280 case DNSFilterEngine::PolicyKind::Drop:
1281 g_stats.policyDrops++;
b8470add
PL
1282 return;
1283 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1284 ret.clear();
1285 res=RCode::NXDomain;
1286 goto haveAnswer;
1287
1288 case DNSFilterEngine::PolicyKind::NODATA:
1289 ret.clear();
1290 res=RCode::NoError;
1291 goto haveAnswer;
1292
1293 case DNSFilterEngine::PolicyKind::Truncate:
1294 if(!dc->d_tcp) {
1295 ret.clear();
1296 res=RCode::NoError;
1297 pw.getHeader()->tc=1;
1298 goto haveAnswer;
1299 }
1300 break;
1301
1302 case DNSFilterEngine::PolicyKind::Custom:
1303 ret.clear();
1304 res=RCode::NoError;
6da513b2
RG
1305 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1306 for (const auto& dr : spoofed) {
1307 ret.push_back(dr);
1308 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1309 }
b8470add
PL
1310 goto haveAnswer;
1311 }
1312 }
1313
1314 if (wantsRPZ) {
1f1ca368 1315 appliedPolicy = luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies);
b8470add 1316 }
db486de5 1317
f26bf547 1318 if(t_pdl) {
db486de5
PL
1319 if(res == RCode::NoError) {
1320 auto i=ret.cbegin();
1321 for(; i!= ret.cend(); ++i)
1322 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
1323 break;
f26bf547 1324 if(i == ret.cend() && t_pdl->nodata(dq, res))
3ca4e735
PL
1325 shouldNotValidate = true;
1326
db486de5 1327 }
f26bf547 1328 else if(res == RCode::NXDomain && t_pdl->nxdomain(dq, res))
3ca4e735 1329 shouldNotValidate = true;
db486de5 1330
f26bf547 1331 if(t_pdl->postresolve(dq, res))
3ca4e735 1332 shouldNotValidate = true;
db486de5
PL
1333 }
1334
b8470add
PL
1335 if (wantsRPZ) { //XXX This block is repeated, see above
1336 g_stats.policyResults[appliedPolicy.d_kind]++;
1337 switch(appliedPolicy.d_kind) {
1338 case DNSFilterEngine::PolicyKind::NoAction:
1339 break;
1340 case DNSFilterEngine::PolicyKind::Drop:
1341 g_stats.policyDrops++;
b8470add
PL
1342 return;
1343 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1344 ret.clear();
1345 res=RCode::NXDomain;
1346 goto haveAnswer;
1347
1348 case DNSFilterEngine::PolicyKind::NODATA:
1349 ret.clear();
1350 res=RCode::NoError;
1351 goto haveAnswer;
1352
1353 case DNSFilterEngine::PolicyKind::Truncate:
1354 if(!dc->d_tcp) {
1355 ret.clear();
1356 res=RCode::NoError;
1357 pw.getHeader()->tc=1;
1358 goto haveAnswer;
1359 }
1360 break;
1361
1362 case DNSFilterEngine::PolicyKind::Custom:
1363 ret.clear();
1364 res=RCode::NoError;
6da513b2
RG
1365 spoofed=appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1366 for (const auto& dr : spoofed) {
1367 ret.push_back(dr);
1368 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1369 }
b8470add
PL
1370 goto haveAnswer;
1371 }
644dd1da 1372 }
4485aa35 1373 }
644dd1da 1374 haveAnswer:;
3e8216c8 1375 if(res == PolicyDecision::DROP) {
e9c2ad3a 1376 g_stats.policyDrops++;
ae7e77ad 1377 return;
3ddb9247 1378 }
9cdfab64 1379 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1dc8f4d0 1380 {
85ffbc53
PD
1381 string trace(sr.getTrace());
1382 if(!trace.empty()) {
1383 vector<string> lines;
1384 boost::split(lines, trace, boost::is_any_of("\n"));
1dc8f4d0 1385 for(const string& line : lines) {
85ffbc53 1386 if(!line.empty())
e6a9dde5 1387 g_log<<Logger::Warning<< line << endl;
85ffbc53
PD
1388 }
1389 }
1390 }
3ddb9247 1391
9cdfab64 1392 if(res == -1) {
0fe1d080
PD
1393 pw.getHeader()->rcode=RCode::ServFail;
1394 // no commit here, because no record
1395 g_stats.servFails++;
1396 }
288f4aa9 1397 else {
ea634573 1398 pw.getHeader()->rcode=res;
92011b8f 1399
f3fe4ae6 1400 // Does the validation mode or query demand validation?
0c43f455 1401 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
b25cae9a 1402 try {
f3fe4ae6 1403 if(sr.doLog()) {
e6a9dde5 1404 g_log<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<endl;
2e921ec6 1405 }
4d2be65d
RG
1406
1407 auto state = sr.getValidationState();
1408
b25cae9a 1409 if(state == Secure) {
2e921ec6 1410 if(sr.doLog()) {
e6a9dde5 1411 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates correctly"<<endl;
2e921ec6 1412 }
b25cae9a 1413
1414 // Is the query source interested in the value of the ad-bit?
885c8881 1415 if (dc->d_mdp.d_header.ad || DNSSECOK)
b25cae9a 1416 pw.getHeader()->ad=1;
1417 }
1418 else if(state == Insecure) {
f3fe4ae6 1419 if(sr.doLog()) {
e6a9dde5 1420 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Insecure"<<endl;
12ce523e 1421 }
b25cae9a 1422
1423 pw.getHeader()->ad=0;
f3fe4ae6 1424 }
b25cae9a 1425 else if(state == Bogus) {
66f2e6ad
KM
1426 if(t_bogusremotes)
1427 t_bogusremotes->push_back(dc->d_source);
1428 if(t_bogusqueryring)
1429 t_bogusqueryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
c87e1876 1430 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
e6a9dde5 1431 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Bogus"<<endl;
b25cae9a 1432 }
1433
1434 // Does the query or validation mode sending out a SERVFAIL on validation errors?
885c8881 1435 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
b25cae9a 1436 if(sr.doLog()) {
e6a9dde5 1437 g_log<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
b25cae9a 1438 }
1439
1440 pw.getHeader()->rcode=RCode::ServFail;
1441 goto sendit;
1442 } else {
1443 if(sr.doLog()) {
e6a9dde5 1444 g_log<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
b25cae9a 1445 }
1446 }
1447 }
1448 }
1449 catch(ImmediateServFailException &e) {
1450 if(g_logCommonErrors)
e6a9dde5 1451 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
b25cae9a 1452 pw.getHeader()->rcode=RCode::ServFail;
1453 goto sendit;
f3fe4ae6 1454 }
b3f0ed10 1455 }
1456
c154c8a4 1457 if(ret.size()) {
92476c8b 1458 orderAndShuffle(ret);
5cc8371b 1459 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_source)) {
20d84f77 1460 stable_sort(ret.begin(), ret.end(), *sl);
3e61e7f7 1461 variableAnswer=true;
1462 }
8e079f3a 1463 }
0afa32d4
RG
1464
1465 bool needCommit = false;
8e079f3a 1466 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
3e80ebce
KM
1467 if( ! DNSSECOK &&
1468 ( i->d_type == QType::NSEC3 ||
1469 (
1470 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1471 (
1472 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1473 i->d_place != DNSResourceRecord::ANSWER
1474 )
1475 )
1476 )
1477 ) {
2e921ec6 1478 continue;
3e80ebce
KM
1479 }
1480
70fb28d9 1481 if (!addRecordToPacket(pw, *i, minTTL, dc->d_ttlCap, maxanswersize)) {
97c6d7e5
RG
1482 needCommit = false;
1483 break;
1484 }
1485 needCommit = true;
1486
41c542ec
NC
1487#ifdef NOD_ENABLED
1488 bool udr = false;
1489 if (g_udrEnabled) {
1490 udr = udrCheckUniqueDNSRecord(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, *i);
ca2526f5
NC
1491 if (!hasUDR && udr)
1492 hasUDR = true;
41c542ec
NC
1493 }
1494#endif /* NOD ENABLED */
1495
aa7929a3 1496#ifdef HAVE_PROTOBUF
b773359c 1497 if (t_protobufServers) {
41c542ec
NC
1498#ifdef NOD_ENABLED
1499 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes, udr);
1500#else
0bd2e252 1501 pbMessage->addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes);
41c542ec 1502#endif /* NOD_ENABLED */
aa7929a3
RG
1503 }
1504#endif
ea634573 1505 }
0afa32d4 1506 if(needCommit)
8e079f3a 1507 pw.commit();
288f4aa9 1508 }
10321a98 1509 sendit:;
b3f0ed10 1510
a0ddd130 1511 if(g_useIncomingECS && dc->d_ecsFound && !sr.wasVariable() && !variableAnswer) {
9837850d 1512 // cerr<<"Stuffing in a 0 scope because answer is static"<<endl;
5a7f99b4 1513 EDNSSubnetOpts eo;
1514 eo.source = dc->d_ednssubnet.source;
1515 ComboAddress sa;
1ef18cab 1516 sa.reset();
5a7f99b4 1517 sa.sin4.sin_family = eo.source.getNetwork().sin4.sin_family;
1518 eo.scope = Netmask(sa, 0);
1519
1520 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::ECS, makeEDNSSubnetOptsString(eo)));
1521 }
1522
97c6d7e5
RG
1523 if (haveEDNS) {
1524 /* we try to add the EDNS OPT RR even for truncated answers,
1525 as rfc6891 states:
1526 "The minimal response MUST be the DNS header, question section, and an
1527 OPT record. This MUST also occur when a truncated response (using
1528 the DNS header's TC bit) is returned."
1529 */
9b60fb71 1530 pw.addOpt(512, ednsExtRCode, DNSSECOK ? EDNSOpts::DNSSECOK : 0, returnedEdnsOptions);
1f691b94 1531 pw.commit();
97c6d7e5
RG
1532 }
1533
79332bff 1534 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
5cc8371b 1535 updateResponseStats(res, dc->d_source, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
ff4d391d
NC
1536#ifdef NOD_ENABLED
1537 bool nod = false;
1538 if (g_nodEnabled) {
1539 if (nodCheckNewDomain(dc->d_mdp.d_qname))
1540 nod = true;
1541 }
1542#endif /* NOD_ENABLED */
aa7929a3 1543#ifdef HAVE_PROTOBUF
b773359c 1544 if (t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && (!appliedPolicy.d_name || appliedPolicy.d_name->empty()) && dc->d_policyTags.empty())) {
d362f7c1
RG
1545 pbMessage->setBytes(packet.size());
1546 pbMessage->setResponseCode(pw.getHeader()->rcode);
0a273054 1547 if (appliedPolicy.d_name) {
d362f7c1
RG
1548 pbMessage->setAppliedPolicy(*appliedPolicy.d_name);
1549 pbMessage->setAppliedPolicyType(appliedPolicy.d_type);
0a273054 1550 }
d362f7c1 1551 pbMessage->setPolicyTags(dc->d_policyTags);
c29d820c
RG
1552 if (g_useKernelTimestamp && dc->d_kernelTimestamp.tv_sec) {
1553 pbMessage->setQueryTime(dc->d_kernelTimestamp.tv_sec, dc->d_kernelTimestamp.tv_usec);
1554 }
1555 else {
1556 pbMessage->setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1557 }
d362f7c1
RG
1558 pbMessage->setRequestorId(dq.requestorId);
1559 pbMessage->setDeviceId(dq.deviceId);
ea4aa152 1560 pbMessage->setDeviceName(dq.deviceName);
41c542ec
NC
1561#ifdef NOD_ENABLED
1562 if (g_nodEnabled) {
ca2526f5 1563 if (nod) {
41c542ec 1564 pbMessage->setNOD(true);
ca2526f5
NC
1565 pbMessage->addPolicyTag(g_nod_pbtag);
1566 }
1567 if (hasUDR) {
1568 pbMessage->addPolicyTag(g_udr_pbtag);
1569 }
41c542ec
NC
1570 }
1571#endif /* NOD_ENABLED */
b773359c 1572 protobufLogResponse(*pbMessage);
ac238ea7 1573#ifdef NOD_ENABLED
ca2526f5
NC
1574 if (g_nodEnabled) {
1575 pbMessage->setNOD(false);
1576 pbMessage->clearUDR();
1577 if (nod)
1578 pbMessage->removePolicyTag(g_nod_pbtag);
1579 if (hasUDR)
1580 pbMessage->removePolicyTag(g_udr_pbtag);
1581 }
ac238ea7 1582#endif /* NOD_ENABLED */
aa7929a3
RG
1583 }
1584#endif
ea634573 1585 if(!dc->d_tcp) {
b71b60ee 1586 struct msghdr msgh;
1587 struct iovec iov;
1588 char cbuf[256];
1589 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
2c0af54f
PD
1590 msgh.msg_control=NULL;
1591
cbc03320 1592 if(g_fromtosockets.count(dc->d_socket)) {
fbe2a2e0 1593 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
2c0af54f 1594 }
cbc03320 1595 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
e6a9dde5 1596 g_log<<Logger::Warning<<"Sending UDP reply to client "<<dc->getRemote()<<" failed with: "<<strerror(errno)<<endl;
70fb28d9 1597
49dc532e 1598 if(variableAnswer || sr.wasVariable()) {
1ef18cab 1599 g_stats.variableResponses++;
49dc532e 1600 }
3762e821 1601 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
b5e675a7 1602 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, std::move(dc->d_query), dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
76e2b9e3 1603 string((const char*)&*packet.begin(), packet.size()),
3ddb9247 1604 g_now.tv_sec,
76e2b9e3 1605 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
d9d3f9c1 1606 min(minTTL,SyncRes::s_packetcachettl),
88694a6a 1607 dq.validationState,
08b02366
RG
1608 dc->d_ecsBegin,
1609 dc->d_ecsEnd,
4b0bdd5f 1610 std::move(pbMessage));
1051f8a9 1611 }
3762e821 1612 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
feccc9fc 1613 }
9c495589
BH
1614 else {
1615 char buf[2];
ea634573
BH
1616 buf[0]=packet.size()/256;
1617 buf[1]=packet.size()%256;
feccc9fc 1618
c038218b 1619 Utility::iovec iov[2];
feccc9fc 1620
ea634573
BH
1621 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1622 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
feccc9fc 1623
dd079764 1624 int wret=Utility::writev(dc->d_socket, iov, 2);
0e9d9ce2 1625 bool hadError=true;
feccc9fc 1626
dd079764 1627 if(wret == 0)
e6a9dde5 1628 g_log<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
dd079764 1629 else if(wret < 0 )
e6a9dde5 1630 g_log<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
dd079764 1631 else if((unsigned int)wret != 2 + packet.size())
e6a9dde5 1632 g_log<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
0e9d9ce2 1633 else
18af64a8 1634 hadError=false;
3ddb9247 1635
09e6702a 1636 // update tcp connection status, either by closing or moving to 'BYTE0'
3ddb9247 1637
09e6702a 1638 if(hadError) {
18af64a8 1639 // no need to remove us from FDM, we weren't there
c36bc97a 1640 dc->d_socket = -1;
09e6702a 1641 }
a6ae6414 1642 else {
fde296a3
RG
1643 dc->d_tcpConnection->queriesCount++;
1644 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1645 dc->d_socket = -1;
1646 }
1647 else {
1648 dc->d_tcpConnection->state=TCPConnection::BYTE0;
1649 Utility::gettimeofday(&g_now, 0); // needs to be updated
27ae2e3c
RG
1650 struct timeval ttd = g_now;
1651 ttd.tv_sec += g_tcpTimeout;
1652
1653 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection, &ttd);
fde296a3 1654 }
0e9d9ce2 1655 }
9c495589 1656 }
2c9119cd 1657 float spent=makeFloat(sr.getNow()-dc->d_now);
1d5b3ce6 1658 if(!g_quiet) {
e6a9dde5
PL
1659 g_log<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1660 g_log<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
2c9119cd 1661 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
1662 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
1663
1664 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
e6a9dde5 1665 g_log<< ", dnssec="<<vStates[sr.getValidationState()];
2c9119cd 1666 }
1667
e6a9dde5 1668 g_log<<endl;
2c9119cd 1669
c75a6a9e 1670 }
b23b8614 1671
f7b8cffa
RG
1672 if (sr.d_outqueries || sr.d_authzonequeries) {
1673 t_RC->cacheMisses++;
1674 }
1675 else {
1676 t_RC->cacheHits++;
1677 }
2c9119cd 1678
fe213470
BH
1679 if(spent < 0.001)
1680 g_stats.answers0_1++;
1681 else if(spent < 0.010)
1682 g_stats.answers1_10++;
1683 else if(spent < 0.1)
1684 g_stats.answers10_100++;
1685 else if(spent < 1.0)
1686 g_stats.answers100_1000++;
1687 else
1688 g_stats.answersSlow++;
1689
574af7ea 1690 uint64_t newLat=(uint64_t)(spent*1000000);
b841314c 1691 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
08f3f638 1692 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
0a6b1027 1693 // no worries, we do this for packet cache hits elsewhere
19178da9 1694
1695 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
1696 if(ourtime < 1)
1697 g_stats.ourtime0_1++;
1698 else if(ourtime < 2)
1699 g_stats.ourtime1_2++;
1700 else if(ourtime < 4)
1701 g_stats.ourtime2_4++;
1702 else if(ourtime < 8)
1703 g_stats.ourtime4_8++;
1704 else if(ourtime < 16)
1705 g_stats.ourtime8_16++;
1706 else if(ourtime < 32)
1707 g_stats.ourtime16_32++;
1708 else {
1709 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1710 g_stats.ourtimeSlow++;
1711 }
042da1a1 1712 if(ourtime >= 0.0) {
1713 newLat=ourtime*1000; // usec
1714 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
1715 }
c6d04bdc 1716 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
288f4aa9 1717 }
3f81d239 1718 catch(PDNSException &ae) {
e6a9dde5 1719 g_log<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
288f4aa9 1720 }
16ce7f18
JS
1721 catch(const MOADNSException &mde) {
1722 g_log<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<mde.what()<<endl;
7b1469bb 1723 }
fdbf35ac 1724 catch(std::exception& e) {
e6a9dde5 1725 g_log<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
068c7634
PD
1726
1727 // Luawrapper nests the exception from Lua, so we unnest it here
1728 try {
1729 std::rethrow_if_nested(e);
2010ac95 1730 } catch(const std::exception& ne) {
e6a9dde5 1731 g_log<<". Extra info: "<<ne.what();
068c7634
PD
1732 } catch(...) {}
1733
e6a9dde5 1734 g_log<<endl;
c154c8a4 1735 }
288f4aa9 1736 catch(...) {
e6a9dde5 1737 g_log<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
288f4aa9 1738 }
3ddb9247 1739
ec6eacbc 1740 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
288f4aa9
BH
1741}
1742
d187038c 1743static void makeControlChannelSocket(int processNum=-1)
1d5b3ce6 1744{
2d733c0f 1745 string sockname=::arg()["socket-dir"]+"/"+s_programname;
677e2a46 1746 if(processNum >= 0)
335da0ba 1747 sockname += "."+std::to_string(processNum);
677e2a46 1748 sockname+=".controlsocket";
41f7a068 1749 s_rcc.listen(sockname);
3ddb9247 1750
387de317
BH
1751 int sockowner = -1;
1752 int sockgroup = -1;
1753
1754 if (!::arg().isEmpty("socket-group"))
1755 sockgroup=::arg().asGid("socket-group");
1756 if (!::arg().isEmpty("socket-owner"))
1757 sockowner=::arg().asUid("socket-owner");
3ddb9247 1758
f838ad8d
BH
1759 if (sockgroup > -1 || sockowner > -1) {
1760 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1761 unixDie("Failed to chown control socket");
1762 }
1763 }
387de317
BH
1764
1765 // do mode change if socket-mode is given
1766 if(!::arg().isEmpty("socket-mode")) {
1767 mode_t sockmode=::arg().asMode("socket-mode");
34c513f9
RG
1768 if(chmod(sockname.c_str(), sockmode) < 0) {
1769 unixDie("Failed to chmod control socket");
1770 }
387de317 1771 }
1d5b3ce6
BH
1772}
1773
5cc8371b 1774static void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass,
29e6303a 1775 bool& foundECS, EDNSSubnetOpts* ednssubnet, EDNSOptionViewMap* options,
5cc8371b 1776 bool& foundXPF, ComboAddress* xpfSource, ComboAddress* xpfDest)
02b47f43 1777{
59cb4a79 1778 const bool lookForXPF = xpfSource != nullptr && g_xpfRRCode != 0;
5cc8371b
RG
1779 const bool lookForECS = ednssubnet != nullptr;
1780 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(question.c_str());
02b47f43
RG
1781 size_t questionLen = question.length();
1782 unsigned int consumed=0;
1783 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1784
1785 size_t pos= sizeof(dnsheader)+consumed+4;
5cc8371b
RG
1786 const size_t headerSize = /* root */ 1 + sizeof(dnsrecordheader);
1787 const uint16_t arcount = ntohs(dh->arcount);
1788
1789 for (uint16_t arpos = 0; arpos < arcount && questionLen > (pos + headerSize) && ((lookForECS && !foundECS) || (lookForXPF && !foundXPF)); arpos++) {
1790 if (question.at(pos) != 0) {
1791 /* not an OPT or a XPF, bye. */
1792 return;
1793 }
1794
1795 pos += 1;
1796 const dnsrecordheader* drh = reinterpret_cast<const dnsrecordheader*>(&question.at(pos));
1797 pos += sizeof(dnsrecordheader);
1798
1799 if (pos >= questionLen) {
1800 return;
1801 }
1802
02b47f43 1803 /* OPT root label (1) followed by type (2) */
5cc8371b 1804 if(lookForECS && ntohs(drh->d_type) == QType::OPT) {
00b8cadc
RG
1805 if (!options) {
1806 char* ecsStart = nullptr;
1807 size_t ecsLen = 0;
5cc8371b
RG
1808 /* we need to pass the record len */
1809 int res = getEDNSOption(const_cast<char*>(reinterpret_cast<const char*>(&question.at(pos - sizeof(drh->d_clen)))), questionLen - pos + sizeof(drh->d_clen), EDNSOptionCode::ECS, &ecsStart, &ecsLen);
00b8cadc
RG
1810 if (res == 0 && ecsLen > 4) {
1811 EDNSSubnetOpts eso;
1812 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1813 *ednssubnet=eso;
5cc8371b 1814 foundECS = true;
00b8cadc
RG
1815 }
1816 }
1817 }
1818 else {
5cc8371b
RG
1819 /* we need to pass the record len */
1820 int res = getEDNSOptions(reinterpret_cast<const char*>(&question.at(pos -sizeof(drh->d_clen))), questionLen - pos + (sizeof(drh->d_clen)), *options);
00b8cadc
RG
1821 if (res == 0) {
1822 const auto& it = options->find(EDNSOptionCode::ECS);
29e6303a 1823 if (it != options->end() && !it->second.values.empty() && it->second.values.at(0).content != nullptr && it->second.values.at(0).size > 0) {
00b8cadc 1824 EDNSSubnetOpts eso;
29e6303a 1825 if(getEDNSSubnetOptsFromString(it->second.values.at(0).content, it->second.values.at(0).size, &eso)) {
00b8cadc 1826 *ednssubnet=eso;
5cc8371b 1827 foundECS = true;
00b8cadc
RG
1828 }
1829 }
02b47f43
RG
1830 }
1831 }
1832 }
59cb4a79 1833 else if (lookForXPF && ntohs(drh->d_type) == g_xpfRRCode && ntohs(drh->d_class) == QClass::IN && drh->d_ttl == 0) {
5cc8371b
RG
1834 if ((questionLen - pos) < ntohs(drh->d_clen)) {
1835 return;
1836 }
1837
1838 foundXPF = parseXPFPayload(reinterpret_cast<const char*>(&question.at(pos)), ntohs(drh->d_clen), *xpfSource, xpfDest);
1839 }
1840
1841 pos += ntohs(drh->d_clen);
02b47f43
RG
1842 }
1843}
1844
d187038c 1845static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1846{
cd989c87 1847 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
c038218b 1848
879b3f70 1849 if(conn->state==TCPConnection::BYTE0) {
2749c3fe 1850 ssize_t bytes=recv(conn->getFD(), &conn->data[0], 2, 0);
09e6702a 1851 if(bytes==1)
667f7e60 1852 conn->state=TCPConnection::BYTE1;
3ddb9247 1853 if(bytes==2) {
a0aa4f64 1854 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2749c3fe 1855 conn->data.resize(conn->qlen);
667f7e60
BH
1856 conn->bytesread=0;
1857 conn->state=TCPConnection::GETQUESTION;
09e6702a
BH
1858 }
1859 if(!bytes || bytes < 0) {
bb4bdbaf 1860 t_fdm->removeReadFD(fd);
09e6702a
BH
1861 return;
1862 }
1863 }
667f7e60 1864 else if(conn->state==TCPConnection::BYTE1) {
2749c3fe 1865 ssize_t bytes=recv(conn->getFD(), &conn->data[1], 1, 0);
09e6702a 1866 if(bytes==1) {
667f7e60 1867 conn->state=TCPConnection::GETQUESTION;
a0aa4f64 1868 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2749c3fe 1869 conn->data.resize(conn->qlen);
667f7e60 1870 conn->bytesread=0;
09e6702a
BH
1871 }
1872 if(!bytes || bytes < 0) {
1873 if(g_logCommonErrors)
e6a9dde5 1874 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected after first byte"<<endl;
bb4bdbaf 1875 t_fdm->removeReadFD(fd);
09e6702a
BH
1876 return;
1877 }
1878 }
667f7e60 1879 else if(conn->state==TCPConnection::GETQUESTION) {
2749c3fe 1880 ssize_t bytes=recv(conn->getFD(), &conn->data[conn->bytesread], conn->qlen - conn->bytesread, 0);
f9d67b41 1881 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
c0f9be19
RG
1882 if(g_logCommonErrors) {
1883 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected while reading question body"<<endl;
1884 }
bb4bdbaf 1885 t_fdm->removeReadFD(fd);
09e6702a
BH
1886 return;
1887 }
b841314c 1888 conn->bytesread+=(uint16_t)bytes;
667f7e60 1889 if(conn->bytesread==conn->qlen) {
bb4bdbaf 1890 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
879b3f70 1891
9a864da4 1892 std::unique_ptr<DNSComboWriter> dc;
09e6702a 1893 try {
9a864da4 1894 dc=std::unique_ptr<DNSComboWriter>(new DNSComboWriter(conn->data, g_now));
09e6702a 1895 }
16ce7f18 1896 catch(const MOADNSException &mde) {
3ddb9247 1897 g_stats.clientParseError++;
4957a608 1898 if(g_logCommonErrors)
e6a9dde5 1899 g_log<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4957a608 1900 return;
09e6702a 1901 }
cd989c87
BH
1902 dc->d_tcpConnection = conn; // carry the torch
1903 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
09e6702a 1904 dc->d_tcp=true;
5cc8371b
RG
1905 dc->setRemote(conn->d_remote);
1906 dc->setSource(conn->d_remote);
a6147cd2 1907 ComboAddress dest;
d38e2ba9 1908 dest.reset();
a6147cd2 1909 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1910 socklen_t len = dest.getSocklen();
1911 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1912 dc->setLocal(dest);
5cc8371b 1913 dc->setDestination(dest);
33dcceba
RG
1914 DNSName qname;
1915 uint16_t qtype=0;
1916 uint16_t qclass=0;
1917 bool needECS = false;
5cc8371b 1918 bool needXPF = g_XPFAcl.match(conn->d_remote);
67e31ebe 1919 string requestorId;
590388d2 1920 string deviceId;
ea4aa152 1921 string deviceName;
16bbc6e3 1922 bool logQuery = false;
aa7929a3 1923#ifdef HAVE_PROTOBUF
02b47f43 1924 auto luaconfsLocal = g_luaconfs.getLocal();
63341e8d 1925 if (checkProtobufExport(luaconfsLocal)) {
33dcceba
RG
1926 needECS = true;
1927 }
b773359c 1928 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
33dcceba
RG
1929#endif
1930
70fb28d9 1931 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag_ffi || t_pdl->d_gettag))) {
33dcceba
RG
1932
1933 try {
29e6303a 1934 EDNSOptionViewMap ednsOptions;
5cc8371b 1935 bool xpfFound = false;
b40562da 1936 dc->d_ecsParsed = true;
5cc8371b 1937 dc->d_ecsFound = false;
2749c3fe 1938 getQNameAndSubnet(conn->data, &qname, &qtype, &qclass,
5cc8371b
RG
1939 dc->d_ecsFound, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
1940 xpfFound, needXPF ? &dc->d_source : nullptr, needXPF ? &dc->d_destination : nullptr);
02b47f43 1941
70fb28d9 1942 if(t_pdl) {
33dcceba 1943 try {
70fb28d9 1944 if (t_pdl->d_gettag_ffi) {
ea4aa152 1945 dc->d_tag = t_pdl->gettag_ffi(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, deviceName, dc->d_ttlCap, dc->d_variable, logQuery);
70fb28d9
RG
1946 }
1947 else if (t_pdl->d_gettag) {
ea4aa152 1948 dc->d_tag = t_pdl->gettag(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, deviceName);
70fb28d9 1949 }
33dcceba 1950 }
70fb28d9 1951 catch(const std::exception& e) {
33dcceba 1952 if(g_logCommonErrors)
e6a9dde5 1953 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
33dcceba
RG
1954 }
1955 }
1956 }
70fb28d9 1957 catch(const std::exception& e)
33dcceba
RG
1958 {
1959 if(g_logCommonErrors)
e6a9dde5 1960 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
33dcceba
RG
1961 }
1962 }
f52177c3
RG
1963
1964 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(&conn->data[0]);
1965
33dcceba 1966#ifdef HAVE_PROTOBUF
b773359c 1967 if(t_protobufServers || t_outgoingProtobufServers) {
67e31ebe 1968 dc->d_requestorId = requestorId;
590388d2 1969 dc->d_deviceId = deviceId;
ea4aa152 1970 dc->d_deviceName = deviceName;
d61aa945 1971 dc->d_uuid = getUniqueID();
4898a348 1972 }
02b47f43 1973
b773359c 1974 if(t_protobufServers) {
02b47f43 1975 try {
02b47f43 1976
845cbf4c 1977 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && dc->d_policyTags.empty())) {
ea4aa152 1978 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, dc->d_source, dc->d_destination, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId, dc->d_deviceName);
b790ef3d 1979 }
02b47f43
RG
1980 }
1981 catch(std::exception& e) {
1982 if(g_logCommonErrors)
e6a9dde5 1983 g_log<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
02b47f43
RG
1984 }
1985 }
aa7929a3 1986#endif
5034517a
RG
1987 if(t_pdl) {
1988 if(t_pdl->ipfilter(dc->d_source, dc->d_destination, *dh)) {
1989 if(!g_quiet)
1990 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED TCP question from "<<dc->d_source.toStringWithPort()<<(dc->d_source != dc->d_remote ? " (via "+dc->d_remote.toStringWithPort()+")" : "")<<" based on policy"<<endl;
1991 g_stats.policyDrops++;
1992 return;
1993 }
1994 }
1995
879b3f70 1996 if(dc->d_mdp.d_header.qr) {
048f5db6 1997 g_stats.ignoredCount++;
c0f9be19
RG
1998 if(g_logCommonErrors) {
1999 g_log<<Logger::Error<<"Ignoring answer from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
2000 }
4957a608 2001 return;
879b3f70 2002 }
3abcdab2 2003 if(dc->d_mdp.d_header.opcode) {
048f5db6 2004 g_stats.ignoredCount++;
c0f9be19
RG
2005 if(g_logCommonErrors) {
2006 g_log<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
2007 }
c0f9be19
RG
2008 return;
2009 }
2010 else if (dh->qdcount == 0) {
2011 g_stats.emptyQueriesCount++;
2012 if(g_logCommonErrors) {
2013 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<< dc->getRemote() <<" on server socket!"<<endl;
2014 }
3abcdab2
PD
2015 return;
2016 }
09e6702a 2017 else {
4957a608
BH
2018 ++g_stats.qcounter;
2019 ++g_stats.tcpqcounter;
9a864da4 2020 MT->makeThread(startDoResolve, dc.release()); // deletes dc, will set state to BYTE0 again
4957a608 2021 return;
09e6702a
BH
2022 }
2023 }
2024 }
2025}
2026
6dcd28c3 2027//! Handle new incoming TCP connection
d187038c 2028static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
09e6702a 2029{
37d3f960 2030 ComboAddress addr;
09e6702a 2031 socklen_t addrlen=sizeof(addr);
a683e8bd 2032 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
b841314c 2033 if(newsock>=0) {
85c32340
BH
2034 if(MT->numProcesses() > g_maxMThreads) {
2035 g_stats.overCapacityDrops++;
a7b68ae7
RG
2036 try {
2037 closesocket(newsock);
2038 }
2039 catch(const PDNSException& e) {
e6a9dde5 2040 g_log<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
a7b68ae7 2041 }
85c32340
BH
2042 return;
2043 }
2044
92011b8f 2045 if(t_remotes)
2046 t_remotes->push_back(addr);
49a699c4 2047 if(t_allowFrom && !t_allowFrom->match(&addr)) {
3ddb9247 2048 if(!g_quiet)
e6a9dde5 2049 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2914b022 2050
09e6702a 2051 g_stats.unauthorizedTCP++;
a7b68ae7
RG
2052 try {
2053 closesocket(newsock);
2054 }
2055 catch(const PDNSException& e) {
e6a9dde5 2056 g_log<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
a7b68ae7 2057 }
09e6702a
BH
2058 return;
2059 }
bd0289fc 2060 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
09e6702a 2061 g_stats.tcpClientOverflow++;
a7b68ae7
RG
2062 try {
2063 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
2064 }
2065 catch(const PDNSException& e) {
e6a9dde5 2066 g_log<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
a7b68ae7 2067 }
09e6702a
BH
2068 return;
2069 }
3ddb9247 2070
3897b9e1 2071 setNonBlocking(newsock);
f26bf547 2072 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
cd989c87 2073 tc->state=TCPConnection::BYTE0;
3ddb9247 2074
27ae2e3c
RG
2075 struct timeval ttd;
2076 Utility::gettimeofday(&ttd, 0);
2077 ttd.tv_sec += g_tcpTimeout;
c038218b 2078
27ae2e3c 2079 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc, &ttd);
09e6702a
BH
2080 }
2081}
3ddb9247 2082
d187038c 2083static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
1bc3c142 2084{
183eb877 2085 gettimeofday(&g_now, 0);
c29d820c
RG
2086 if (tv.tv_sec) {
2087 struct timeval diff = g_now - tv;
2088 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
183eb877 2089
c29d820c
RG
2090 if(delta > 1000.0) {
2091 g_stats.tooOldDrops++;
2092 return nullptr;
2093 }
b71b60ee 2094 }
2095
1bc3c142 2096 ++g_stats.qcounter;
d7f10541
BH
2097 if(fromaddr.sin4.sin_family==AF_INET6)
2098 g_stats.ipv6qcounter++;
1bc3c142
BH
2099
2100 string response;
93f0da94 2101 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
49a3500d 2102 unsigned int ctag=0;
f57486f1 2103 uint32_t qhash = 0;
12aff2e5 2104 bool needECS = false;
5cc8371b 2105 bool needXPF = g_XPFAcl.match(fromaddr);
02b47f43 2106 std::vector<std::string> policyTags;
5fd2577f 2107 LuaContext::LuaObject data;
5cc8371b
RG
2108 ComboAddress source = fromaddr;
2109 ComboAddress destination = destaddr;
67e31ebe 2110 string requestorId;
590388d2 2111 string deviceId;
ea4aa152 2112 string deviceName;
16bbc6e3 2113 bool logQuery = false;
12aff2e5 2114#ifdef HAVE_PROTOBUF
02b47f43 2115 boost::uuids::uuid uniqueId;
02b47f43 2116 auto luaconfsLocal = g_luaconfs.getLocal();
63341e8d 2117 if (checkProtobufExport(luaconfsLocal)) {
d61aa945 2118 uniqueId = getUniqueID();
02b47f43 2119 needECS = true;
63341e8d 2120 } else if (checkOutgoingProtobufExport(luaconfsLocal)) {
d61aa945 2121 uniqueId = getUniqueID();
02b47f43 2122 }
b773359c
RG
2123 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
2124 bool logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
12aff2e5 2125#endif
b40562da
RG
2126 EDNSSubnetOpts ednssubnet;
2127 bool ecsFound = false;
2128 bool ecsParsed = false;
08b02366
RG
2129 uint16_t ecsBegin = 0;
2130 uint16_t ecsEnd = 0;
70fb28d9
RG
2131 uint32_t ttlCap = std::numeric_limits<uint32_t>::max();
2132 bool variable = false;
1bc3c142 2133 try {
02b47f43
RG
2134 DNSName qname;
2135 uint16_t qtype=0;
2136 uint16_t qclass=0;
1bc3c142 2137 uint32_t age;
c15ff3df 2138 bool qnameParsed=false;
8f7473d7 2139#ifdef MALLOC_TRACE
2140 /*
2141 static uint64_t last=0;
2142 if(!last)
2143 g_mtracer->clearAllocators();
2144 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
2145 last=g_mtracer->getAllocs();
2146 cout<<g_mtracer->topAllocatorsString()<<endl;
2147 g_mtracer->clearAllocators();
2148 */
2149#endif
55a1378f 2150
70fb28d9 2151 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag || t_pdl->d_gettag_ffi))) {
b2eacd67 2152 try {
29e6303a 2153 EDNSOptionViewMap ednsOptions;
5cc8371b
RG
2154 bool xpfFound = false;
2155
2156 ecsFound = false;
2157
2158 getQNameAndSubnet(question, &qname, &qtype, &qclass,
2159 ecsFound, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2160 xpfFound, needXPF ? &source : nullptr, needXPF ? &destination : nullptr);
2161
c15ff3df
RG
2162 qnameParsed = true;
2163 ecsParsed = true;
12aff2e5 2164
70fb28d9 2165 if(t_pdl) {
12aff2e5 2166 try {
70fb28d9 2167 if (t_pdl->d_gettag_ffi) {
ea4aa152 2168 ctag = t_pdl->gettag_ffi(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, deviceName, ttlCap, variable, logQuery);
70fb28d9
RG
2169 }
2170 else if (t_pdl->d_gettag) {
ea4aa152 2171 ctag = t_pdl->gettag(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, deviceName);
70fb28d9 2172 }
12aff2e5 2173 }
70fb28d9 2174 catch(const std::exception& e) {
12aff2e5 2175 if(g_logCommonErrors)
e6a9dde5 2176 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 2177 }
8ea8c302 2178 }
b2eacd67 2179 }
70fb28d9 2180 catch(const std::exception& e)
b2eacd67 2181 {
2182 if(g_logCommonErrors)
e6a9dde5 2183 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 2184 }
12ce523e 2185 }
3ddb9247 2186
02b47f43 2187 bool cacheHit = false;
1fbc6dc5 2188 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
02b47f43 2189#ifdef HAVE_PROTOBUF
b773359c 2190 if (t_protobufServers) {
d362f7c1 2191 pbMessage = RecProtoBufMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
c165308b 2192 pbMessage->setServerIdentity(SyncRes::s_serverID);
845cbf4c 2193 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && policyTags.empty())) {
ea4aa152 2194 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, source, destination, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId, deviceName);
b790ef3d 2195 }
d9d3f9c1
RG
2196 }
2197#endif /* HAVE_PROTOBUF */
02b47f43 2198
70fb28d9
RG
2199 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
2200 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
2201 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
8467ec26 2202 vState valState;
c15ff3df 2203 if (qnameParsed) {
08b02366 2204 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
c15ff3df
RG
2205 }
2206 else {
08b02366 2207 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, &qtype, &qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &ecsBegin, &ecsEnd, pbMessage ? &(*pbMessage) : nullptr));
c15ff3df
RG
2208 }
2209
d9d3f9c1 2210 if (cacheHit) {
8467ec26
KM
2211 if(valState == Bogus) {
2212 if(t_bogusremotes)
2213 t_bogusremotes->push_back(source);
2214 if(t_bogusqueryring)
2215 t_bogusqueryring->push_back(make_pair(qname, qtype));
2216 }
2217
d9d3f9c1 2218#ifdef HAVE_PROTOBUF
b773359c 2219 if(t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && pbMessage->getAppliedPolicy().empty() && pbMessage->getPolicyTags().empty())) {
5cc8371b 2220 Netmask requestorNM(source, source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
e1c8a4bb 2221 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
d362f7c1
RG
2222 pbMessage->update(uniqueId, &requestor, &destination, false, dh->id);
2223 pbMessage->setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
c29d820c
RG
2224 if (g_useKernelTimestamp && tv.tv_sec) {
2225 pbMessage->setQueryTime(tv.tv_sec, tv.tv_usec);
2226 }
2227 else {
2228 pbMessage->setQueryTime(g_now.tv_sec, g_now.tv_usec);
2229 }
d362f7c1
RG
2230 pbMessage->setRequestorId(requestorId);
2231 pbMessage->setDeviceId(deviceId);
ea4aa152 2232 pbMessage->setDeviceName(deviceName);
b773359c 2233 protobufLogResponse(*pbMessage);
02b47f43 2234 }
d9d3f9c1 2235#endif /* HAVE_PROTOBUF */
49a3500d 2236 if(!g_quiet)
e6a9dde5 2237 g_log<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<endl;
8f7473d7 2238
1bc3c142
BH
2239 g_stats.packetCacheHits++;
2240 SyncRes::s_queries++;
2241 ageDNSPacket(response, age);
b71b60ee 2242 struct msghdr msgh;
2243 struct iovec iov;
2244 char cbuf[256];
2245 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
2c0af54f
PD
2246 msgh.msg_control=NULL;
2247
cbc03320 2248 if(g_fromtosockets.count(fd)) {
fbe2a2e0 2249 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
b71b60ee 2250 }
cbc03320 2251 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
e6a9dde5 2252 g_log<<Logger::Warning<<"Sending UDP reply to client "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" failed with: "<<strerror(errno)<<endl;
b71b60ee 2253
97bee66d 2254 if(response.length() >= sizeof(struct dnsheader)) {
dd079764
RG
2255 struct dnsheader tmpdh;
2256 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
5cc8371b 2257 updateResponseStats(tmpdh.rcode, source, response.length(), 0, 0);
97bee66d 2258 }
08f3f638 2259 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
19178da9 2260 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
1bc3c142
BH
2261 return 0;
2262 }
3ddb9247 2263 }
1bc3c142 2264 catch(std::exception& e) {
1b654ccb
RG
2265 if(g_logCommonErrors)
2266 g_log<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1bc3c142
BH
2267 return 0;
2268 }
3ddb9247 2269
f26bf547 2270 if(t_pdl) {
5cc8371b 2271 if(t_pdl->ipfilter(source, destination, *dh)) {
4ea94941 2272 if(!g_quiet)
e6a9dde5 2273 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" based on policy"<<endl;
4ea94941 2274 g_stats.policyDrops++;
2275 return 0;
2276 }
2277 }
2278
1bc3c142 2279 if(MT->numProcesses() > g_maxMThreads) {
461df9d2 2280 if(!g_quiet)
e6a9dde5 2281 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<", over capacity"<<endl;
461df9d2 2282
1bc3c142
BH
2283 g_stats.overCapacityDrops++;
2284 return 0;
2285 }
3ddb9247 2286
9a864da4 2287 auto dc = std::unique_ptr<DNSComboWriter>(new DNSComboWriter(question, g_now, std::move(policyTags), std::move(data)));
1bc3c142 2288 dc->setSocket(fd);
49a3500d 2289 dc->d_tag=ctag;
e9f63d47 2290 dc->d_qhash=qhash;
5cc8371b
RG
2291 dc->setRemote(fromaddr);
2292 dc->setSource(source);
b71b60ee 2293 dc->setLocal(destaddr);
5cc8371b 2294 dc->setDestination(destination);
1bc3c142 2295 dc->d_tcp=false;
b40562da
RG
2296 dc->d_ecsFound = ecsFound;
2297 dc->d_ecsParsed = ecsParsed;
08b02366
RG
2298 dc->d_ecsBegin = ecsBegin;
2299 dc->d_ecsEnd = ecsEnd;
b40562da 2300 dc->d_ednssubnet = ednssubnet;
70fb28d9
RG
2301 dc->d_ttlCap = ttlCap;
2302 dc->d_variable = variable;
aa7929a3 2303#ifdef HAVE_PROTOBUF
b773359c 2304 if (t_protobufServers || t_outgoingProtobufServers) {
5164bac3 2305 dc->d_uuid = std::move(uniqueId);
d9d3f9c1 2306 }
67e31ebe 2307 dc->d_requestorId = requestorId;
590388d2 2308 dc->d_deviceId = deviceId;
ea4aa152 2309 dc->d_deviceName = deviceName;
c29d820c 2310 dc->d_kernelTimestamp = tv;
aa7929a3
RG
2311#endif
2312
9a864da4 2313 MT->makeThread(startDoResolve, (void*) dc.release()); // deletes dc
1bc3c142 2314 return 0;
3ddb9247
PD
2315}
2316
b71b60ee 2317
d187038c 2318static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
5db529f8 2319{
a683e8bd 2320 ssize_t len;
12c2f2b9 2321 static const size_t maxIncomingQuerySize = 512;
04896b99 2322 static thread_local std::string data;
5db529f8 2323 ComboAddress fromaddr;
b71b60ee 2324 struct msghdr msgh;
2325 struct iovec iov;
2326 char cbuf[256];
390f1dab 2327 bool firstQuery = true;
b71b60ee 2328
c0a00acd
RG
2329 for(size_t queriesCounter = 0; queriesCounter < s_maxUDPQueriesPerRound; queriesCounter++) {
2330 data.resize(maxIncomingQuerySize);
2331 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
2332 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), &data[0], data.size(), &fromaddr);
b71b60ee 2333
c0a00acd 2334 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
390f1dab 2335
c0a00acd 2336 firstQuery = false;
390f1dab 2337
c0a00acd
RG
2338 if (static_cast<size_t>(len) < sizeof(dnsheader)) {
2339 g_stats.ignoredCount++;
2340 if (!g_quiet) {
2341 g_log<<Logger::Error<<"Ignoring too-short ("<<std::to_string(len)<<") query from "<<fromaddr.toString()<<endl;
2342 }
2343 return;
04896b99 2344 }
04896b99 2345
c0a00acd
RG
2346 if (msgh.msg_flags & MSG_TRUNC) {
2347 g_stats.truncatedDrops++;
2348 if (!g_quiet) {
2349 g_log<<Logger::Error<<"Ignoring truncated query from "<<fromaddr.toString()<<endl;
2350 }
2351 return;
ba892c7f 2352 }
b23b8614 2353
c0a00acd
RG
2354 if(t_remotes) {
2355 t_remotes->push_back(fromaddr);
2356 }
81859ba5 2357
c0a00acd
RG
2358 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
2359 if(!g_quiet) {
2360 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2361 }
3ddb9247 2362
c0a00acd
RG
2363 g_stats.unauthorizedUDP++;
2364 return;
5db529f8 2365 }
c0a00acd
RG
2366 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
2367 if(!fromaddr.sin4.sin_port) { // also works for IPv6
2368 if(!g_quiet) {
2369 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
2370 }
2371
2372 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
2373 return;
3abcdab2 2374 }
c0a00acd
RG
2375
2376 try {
2377 data.resize(static_cast<size_t>(len));
2378 dnsheader* dh=(dnsheader*)&data[0];
2379
2380 if(dh->qr) {
2381 g_stats.ignoredCount++;
2382 if(g_logCommonErrors) {
2383 g_log<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
2384 }
2385 }
2386 else if(dh->opcode) {
2387 g_stats.ignoredCount++;
2388 if(g_logCommonErrors) {
2389 g_log<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
2390 }
a6147cd2 2391 }
c0f9be19
RG
2392 else if (dh->qdcount == 0) {
2393 g_stats.emptyQueriesCount++;
2394 if(g_logCommonErrors) {
2395 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<<fromaddr.toString()<<" on server socket!"<<endl;
2396 }
2397 }
a6147cd2 2398 else {
c0a00acd
RG
2399 struct timeval tv={0,0};
2400 HarvestTimestamp(&msgh, &tv);
2401 ComboAddress dest;
2402 dest.reset(); // this makes sure we ignore this address if not returned by recvmsg above
2403 auto loc = rplookup(g_listenSocketsAddresses, fd);
2404 if(HarvestDestinationAddress(&msgh, &dest)) {
2405 // but.. need to get port too
2406 if(loc) {
2407 dest.sin4.sin_port = loc->sin4.sin_port;
2408 }
a6147cd2 2409 }
2410 else {
c0a00acd
RG
2411 if(loc) {
2412 dest = *loc;
2413 }
2414 else {
2415 dest.sin4.sin_family = fromaddr.sin4.sin_family;
2416 socklen_t slen = dest.getSocklen();
2417 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
2418 }
2419 }
2420
2421 if(g_weDistributeQueries) {
2422 distributeAsyncFunction(data, boost::bind(doProcessUDPQuestion, data, fromaddr, dest, tv, fd));
2423 }
2424 else {
144040be 2425 ++s_threadInfos[t_id].numberOfDistributedQueries;
c0a00acd 2426 doProcessUDPQuestion(data, fromaddr, dest, tv, fd);
a6147cd2 2427 }
2428 }
c0a00acd 2429 }
16ce7f18 2430 catch(const MOADNSException &mde) {
c0a00acd
RG
2431 g_stats.clientParseError++;
2432 if(g_logCommonErrors) {
2433 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
2434 }
2435 }
2436 catch(const std::runtime_error& e) {
2437 g_stats.clientParseError++;
2438 if(g_logCommonErrors) {
2439 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
2440 }
5db529f8
BH
2441 }
2442 }
c0a00acd
RG
2443 else {
2444 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
2445 if(firstQuery && errno == EAGAIN) {
2446 g_stats.noPacketError++;
2447 }
390f1dab 2448
c0a00acd
RG
2449 break;
2450 }
ac0e821b 2451 }
5db529f8
BH
2452}
2453
adb6cd72 2454static void makeTCPServerSockets(deferredAdd_t& deferredAdds, std::set<int>& tcpSockets)
9c495589 2455{
37d3f960 2456 int fd;
f28307ad 2457 vector<string>locals;
2e3d8a19 2458 stringtok(locals,::arg()["local-address"]," ,");
9c495589 2459
f28307ad 2460 if(locals.empty())
3f81d239 2461 throw PDNSException("No local address specified");
3ddb9247 2462
f28307ad 2463 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
2464 ServiceTuple st;
2465 st.port=::arg().asNum("local-port");
2466 parseService(*i, st);
3ddb9247 2467
32252594
BH
2468 ComboAddress sin;
2469
d38e2ba9 2470 sin.reset();
37d3f960 2471 sin.sin4.sin_family = AF_INET;
32252594 2472 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 2473 sin.sin6.sin6_family = AF_INET6;
f71bc087 2474 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 2475 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
37d3f960
BH
2476 }
2477
2478 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
3ddb9247 2479 if(fd<0)
3f81d239 2480 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
f28307ad 2481
3897b9e1 2482 setCloseOnExec(fd);
a903b39c 2483
f28307ad 2484 int tmp=1;
810ff705 2485 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
e6a9dde5 2486 g_log<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
c8ddb7c2 2487 exit(1);
f28307ad 2488 }
0dfa94ab 2489 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
e6a9dde5 2490 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
0dfa94ab 2491 }
2492
c8ddb7c2 2493#ifdef TCP_DEFER_ACCEPT
38ac0821 2494 if(setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
37d3f960 2495 if(i==locals.begin())
377602e3 2496 g_log<<Logger::Info<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
c8ddb7c2
BH
2497 }
2498#endif
2499
fec7dd5a
SS
2500 if( ::arg().mustDo("non-local-bind") )
2501 Utility::setBindAny(AF_INET, fd);
2502
2332f42d 2503#ifdef SO_REUSEPORT
810ff705
RG
2504 if(g_reusePort) {
2505 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
2332f42d 2506 throw PDNSException("SO_REUSEPORT: "+stringerror());
2507 }
2508#endif
2509
0735b17e
RG
2510 if (::arg().asNum("tcp-fast-open") > 0) {
2511#ifdef TCP_FASTOPEN
2512 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
2513 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
e6a9dde5 2514 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno)<<endl;
0735b17e
RG
2515 }
2516#else
e6a9dde5 2517 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
0735b17e
RG
2518#endif
2519 }
2520
32252594 2521 sin.sin4.sin_port = htons(st.port);
a683e8bd 2522 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
3ddb9247 2523 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
3f81d239 2524 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
3ddb9247 2525
3897b9e1 2526 setNonBlocking(fd);
49a699c4 2527 setSocketSendBuffer(fd, 65000);
37d3f960 2528 listen(fd, 128);
b243ca3b 2529 deferredAdds.push_back(make_pair(fd, handleNewTCPQuestion));
adb6cd72
RG
2530 tcpSockets.insert(fd);
2531
84433b79 2532 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
2533 // - fd is not that which we know here, but returned from accept()
3ddb9247 2534 if(sin.sin4.sin_family == AF_INET)
377602e3 2535 g_log<<Logger::Info<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 2536 else
377602e3 2537 g_log<<Logger::Info<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 2538 }
9c495589
BH
2539}
2540
b243ca3b 2541static void makeUDPServerSockets(deferredAdd_t& deferredAdds)
288f4aa9 2542{
fec7dd5a 2543 int one=1;
f28307ad 2544 vector<string>locals;
2e3d8a19 2545 stringtok(locals,::arg()["local-address"]," ,");
288f4aa9 2546
f28307ad 2547 if(locals.empty())
3f81d239 2548 throw PDNSException("No local address specified");
3ddb9247 2549
f28307ad 2550 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
2551 ServiceTuple st;
2552 st.port=::arg().asNum("local-port");
2553 parseService(*i, st);
2554
37d3f960 2555 ComboAddress sin;
996c89cc 2556
d38e2ba9 2557 sin.reset();
37d3f960 2558 sin.sin4.sin_family = AF_INET;
32252594 2559 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 2560 sin.sin6.sin6_family = AF_INET6;
f71bc087 2561 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 2562 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
37d3f960 2563 }
3ddb9247 2564
bb4bdbaf 2565 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
d3b4137e 2566 if(fd < 0) {
3f81d239 2567 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
d3b4137e 2568 }
915b0c39 2569 if (!setSocketTimestamps(fd))
e6a9dde5 2570 g_log<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
0dfa94ab 2571
b71b60ee 2572 if(IsAnyAddress(sin)) {
cbc03320 2573 if(sin.sin4.sin_family == AF_INET)
2574 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
2575 g_fromtosockets.insert(fd);
757d3179 2576#ifdef IPV6_RECVPKTINFO
cbc03320 2577 if(sin.sin4.sin_family == AF_INET6)
2578 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
2579 g_fromtosockets.insert(fd);
757d3179 2580#endif
0dfa94ab 2581 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
e6a9dde5 2582 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
0dfa94ab 2583 }
b71b60ee 2584 }
fec7dd5a
SS
2585 if( ::arg().mustDo("non-local-bind") )
2586 Utility::setBindAny(AF_INET6, fd);
2587
3897b9e1 2588 setCloseOnExec(fd);
a903b39c 2589
4e9a20e6 2590 setSocketReceiveBuffer(fd, 250000);
32252594 2591 sin.sin4.sin_port = htons(st.port);
37d3f960 2592
2332f42d 2593
2573d4a6 2594#ifdef SO_REUSEPORT
810ff705 2595 if(g_reusePort) {
2332f42d 2596 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
2597 throw PDNSException("SO_REUSEPORT: "+stringerror());
2598 }
2599#endif
90f9fbc0
RG
2600
2601 if (sin.isIPv4()) {
2602 try {
2603 setSocketIgnorePMTU(fd);
2604 }
2605 catch(const std::exception& e) {
2606 g_log<<Logger::Warning<<"Failed to set IP_MTU_DISCOVER on UDP server socket: "<<e.what()<<endl;
2607 }
2608 }
2609
2610 socklen_t socklen=sin.getSocklen();
3ddb9247 2611 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
335da0ba 2612 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
3ddb9247 2613
3897b9e1 2614 setNonBlocking(fd);
c2136bf0 2615
b243ca3b 2616 deferredAdds.push_back(make_pair(fd, handleNewUDPQuestion));
40a3dd64 2617 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
3ddb9247 2618 if(sin.sin4.sin_family == AF_INET)
377602e3 2619 g_log<<Logger::Info<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 2620 else
377602e3 2621 g_log<<Logger::Info<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 2622 }
c836dc19 2623}
caa6eefa 2624
d187038c 2625static void daemonize(void)
c836dc19
BH
2626{
2627 if(fork())
2628 exit(0); // bye bye
3ddb9247
PD
2629
2630 setsid();
c836dc19 2631
27a5ead5 2632 int i=open("/dev/null",O_RDWR); /* open stdin */
3ddb9247 2633 if(i < 0)
e6a9dde5 2634 g_log<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
27a5ead5
BH
2635 else {
2636 dup2(i,0); /* stdin */
2637 dup2(i,1); /* stderr */
2638 dup2(i,2); /* stderr */
2639 close(i);
2640 }
288f4aa9 2641}
caa6eefa 2642
d187038c 2643static void usr1Handler(int)
c75a6a9e
BH
2644{
2645 statsWanted=true;
2646}
ae1b2e98 2647
d187038c 2648static void usr2Handler(int)
9170fbaf 2649{
f1f34cc2 2650 g_quiet= !g_quiet;
2651 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
2652 ::arg().set("quiet")=g_quiet ? "" : "no";
9170fbaf
BH
2653}
2654
d187038c 2655static void doStats(void)
c75a6a9e 2656{
16beeaa4
BH
2657 static time_t lastOutputTime;
2658 static uint64_t lastQueryCount;
d299d4f5 2659
2660 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
2661 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
3ddb9247 2662
d299d4f5 2663 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
e6a9dde5 2664 g_log<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
3427fa8a
BH
2665 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
2666 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
3ddb9247
PD
2667 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
2668
e6a9dde5 2669 g_log<<Logger::Notice<<"stats: throttle map: "
3427fa8a 2670 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
1efd0e28 2671 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<", failed ns: "
77689b10
OM
2672 << broadcastAccFunction<uint64_t>(pleaseGetFailedServersSize)<<", ednsmap: "
2673 <<broadcastAccFunction<uint64_t>(pleaseGetEDNSStatusesSize)<<endl;
e6a9dde5
PL
2674 g_log<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2675 g_log<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
525b8a7c 2676 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
e6a9dde5 2677 g_log<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
3427fa8a 2678 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
81883dcc 2679
e6a9dde5 2680 //g_log<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
16beeaa4 2681 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
3ddb9247 2682
e6a9dde5 2683 g_log<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
16beeaa4 2684 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
3ddb9247 2685
144040be
RG
2686 size_t idx = 0;
2687 for (const auto& threadInfo : s_threadInfos) {
2688 if(threadInfo.isWorker) {
ad9fc3dc 2689 g_log<<Logger::Notice<<"stats: thread "<<idx<<" has been distributed "<<threadInfo.numberOfDistributedQueries<<" queries"<<endl;
144040be
RG
2690 ++idx;
2691 }
2692 }
2693
16beeaa4
BH
2694 time_t now = time(0);
2695 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
e6a9dde5 2696 g_log<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
16beeaa4
BH
2697 }
2698 lastOutputTime = now;
2699 lastQueryCount = SyncRes::s_queries;
c75a6a9e 2700 }
3ddb9247 2701 else if(statsWanted)
e6a9dde5 2702 g_log<<Logger::Notice<<"stats: no stats yet!"<<endl;
7becf07f 2703
c75a6a9e
BH
2704 statsWanted=false;
2705}
c836dc19 2706
29f0b1ce 2707static void houseKeeping(void *)
c836dc19 2708{
e4ae55e5 2709 static thread_local time_t last_rootupdate, last_prune, last_secpoll, last_trustAnchorUpdate{0};
3337c2f7
RG
2710 static thread_local int cleanCounter=0;
2711 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
e4ae55e5
PL
2712 auto luaconfsLocal = g_luaconfs.getLocal();
2713
2714 if (last_trustAnchorUpdate == 0 && !luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0) {
2715 // Loading the Lua config file already "refreshed" the TAs
2716 last_trustAnchorUpdate = g_now.tv_sec + luaconfsLocal->trustAnchorFileInfo.interval * 3600;
2717 }
2718
cc59bce6 2719 try {
6b0d90ea 2720 if(s_running) {
cc59bce6 2721 return;
6b0d90ea 2722 }
cc59bce6 2723 s_running=true;
3ddb9247 2724
cc59bce6 2725 struct timeval now;
2726 Utility::gettimeofday(&now, 0);
3ddb9247
PD
2727
2728 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
a6f7f5fe 2729 t_RC->doPrune(g_maxCacheEntries / g_numThreads); // this function is local to a thread, so fine anyhow
2730 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
3ddb9247 2731
a6f7f5fe 2732 SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
3ddb9247 2733
cc59bce6 2734 if(!((cleanCounter++)%40)) { // this is a full scan!
2735 time_t limit=now.tv_sec-300;
a712cb56 2736 SyncRes::pruneNSSpeeds(limit);
1efd0e28
OM
2737 limit = now.tv_sec - SyncRes::s_serverdownthrottletime * 10;
2738 SyncRes::pruneFailedServers(limit);
77689b10
OM
2739 limit = now.tv_sec - 2*3600;
2740 SyncRes::pruneEDNSStatuses(limit);
cc59bce6 2741 }
2742 last_prune=time(0);
d67620e4 2743 }
3ddb9247 2744
cc59bce6 2745 if(now.tv_sec - last_rootupdate > 7200) {
30ee601a 2746 int res = SyncRes::getRootNS(g_now, nullptr);
e7fb25fa 2747 if (!res) {
7836f7b4 2748 last_rootupdate=now.tv_sec;
e7fb25fa
OM
2749 primeRootNSZones(g_dnssecmode != DNSSECMode::Off);
2750 }
cc59bce6 2751 }
3ddb9247 2752
b243ca3b 2753 if(isHandlerThread()) {
3ddb9247 2754
cc59bce6 2755 if(now.tv_sec - last_secpoll >= 3600) {
2756 try {
2757 doSecPoll(&last_secpoll);
2758 }
581d4ea3 2759 catch(std::exception& e)
2760 {
e6a9dde5 2761 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
581d4ea3 2762 }
47e9b74f 2763 catch(PDNSException& e)
2764 {
e6a9dde5 2765 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
47e9b74f 2766 }
d0992a65
CH
2767 catch(ImmediateServFailException &e)
2768 {
e6a9dde5 2769 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
d0992a65 2770 }
47e9b74f 2771 catch(...)
2772 {
e6a9dde5 2773 g_log<<Logger::Error<<"Exception while performing security poll"<<endl;
47e9b74f 2774 }
18b73338 2775 }
e4ae55e5
PL
2776
2777 if (!luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0 &&
2778 g_now.tv_sec - last_trustAnchorUpdate >= (luaconfsLocal->trustAnchorFileInfo.interval * 3600)) {
2779 g_log<<Logger::Debug<<"Refreshing Trust Anchors from file"<<endl;
2780 try {
2781 map<DNSName, dsmap_t> dsAnchors;
2782 if (updateTrustAnchorsFromFile(luaconfsLocal->trustAnchorFileInfo.fname, dsAnchors)) {
2783 g_luaconfs.modify([&dsAnchors](LuaConfigItems& lci) {
2784 lci.dsAnchors = dsAnchors;
2785 });
2786 }
2787 last_trustAnchorUpdate = now.tv_sec;
2788 } catch (const PDNSException &pe) {
2789 g_log<<Logger::Error<<"Unable to update Trust Anchors: "<<pe.reason<<endl;
2790 }
2791 }
d67620e4 2792 }
6b0d90ea 2793 s_running=false;
d67620e4 2794 }
cc59bce6 2795 catch(PDNSException& ae)
2796 {
2797 s_running=false;
e6a9dde5 2798 g_log<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
cc59bce6 2799 throw;
2800 }
779828c4 2801}
d6d5dea7 2802
d187038c 2803static void makeThreadPipes()
49a699c4 2804{
ee271fc4
RG
2805 auto pipeBufferSize = ::arg().asNum("distribution-pipe-buffer-size");
2806 if (pipeBufferSize > 0) {
2807 g_log<<Logger::Info<<"Resizing the buffer of the distribution pipe to "<<pipeBufferSize<<endl;
2808 }
2809
b243ca3b
RG
2810 /* thread 0 is the handler / SNMP, we start at 1 */
2811 for(unsigned int n = 1; n <= (g_numWorkerThreads + g_numDistributorThreads); ++n) {
2812 auto& threadInfos = s_threadInfos.at(n);
2813
49a699c4
BH
2814 int fd[2];
2815 if(pipe(fd) < 0)
2816 unixDie("Creating pipe for inter-thread communications");
3ddb9247 2817
b243ca3b
RG
2818 threadInfos.pipes.readToThread = fd[0];
2819 threadInfos.pipes.writeToThread = fd[1];
3ddb9247 2820
49a699c4
BH
2821 if(pipe(fd) < 0)
2822 unixDie("Creating pipe for inter-thread communications");
b243ca3b
RG
2823
2824 threadInfos.pipes.readFromThread = fd[0];
2825 threadInfos.pipes.writeFromThread = fd[1];
3ddb9247 2826
cf8cda18
RG
2827 if(pipe(fd) < 0)
2828 unixDie("Creating pipe for inter-thread communications");
d10307c5 2829
b243ca3b
RG
2830 threadInfos.pipes.readQueriesToThread = fd[0];
2831 threadInfos.pipes.writeQueriesToThread = fd[1];
2832
ee271fc4
RG
2833 if (pipeBufferSize > 0) {
2834 if (!setPipeBufferSize(threadInfos.pipes.writeQueriesToThread, pipeBufferSize)) {
2835 g_log<<Logger::Warning<<"Error resizing the buffer of the distribution pipe for thread "<<n<<" to "<<pipeBufferSize<<": "<<strerror(errno)<<endl;
2836 auto existingSize = getPipeBufferSize(threadInfos.pipes.writeQueriesToThread);
2837 if (existingSize > 0) {
2838 g_log<<Logger::Warning<<"The current size of the distribution pipe's buffer for thread "<<n<<" is "<<existingSize<<endl;
2839 }
2840 }
2841 }
2842
b243ca3b 2843 if (!setNonBlocking(threadInfos.pipes.writeQueriesToThread)) {
d10307c5
RG
2844 unixDie("Making pipe for inter-thread communications non-blocking");
2845 }
49a699c4
BH
2846 }
2847}
2848
00c9b8c1
BH
2849struct ThreadMSG
2850{
2851 pipefunc_t func;
2852 bool wantAnswer;
2853};
2854
b4e76a18 2855void broadcastFunction(const pipefunc_t& func)
49a699c4 2856{
b243ca3b
RG
2857 /* This function might be called by the worker with t_id 0 during startup
2858 for the initialization of ACLs and domain maps. After that it should only
2859 be called by the handler. */
d77abca1 2860
b243ca3b
RG
2861 if (s_threadInfos.empty() && isHandlerThread()) {
2862 /* the handler and distributors will call themselves below, but
2863 during startup we get called while s_threadInfos has not been
2864 populated yet to update the ACL or domain maps, so we need to
2865 handle that case.
2866 */
2867 func();
2868 }
b4e76a18 2869
b243ca3b
RG
2870 unsigned int n = 0;
2871 for (const auto& threadInfo : s_threadInfos) {
49a699c4 2872 if(n++ == t_id) {
b4e76a18 2873 func(); // don't write to ourselves!
49a699c4
BH
2874 continue;
2875 }
3ddb9247 2876
00c9b8c1
BH
2877 ThreadMSG* tmsg = new ThreadMSG();
2878 tmsg->func = func;
2879 tmsg->wantAnswer = true;
b243ca3b 2880 if(write(threadInfo.pipes.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
b841314c 2881 delete tmsg;
b243ca3b 2882
49a699c4 2883 unixDie("write to thread pipe returned wrong size or error");
b841314c 2884 }
3ddb9247 2885
49467864 2886 string* resp = nullptr;
b243ca3b 2887 if(read(threadInfo.pipes.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
49a699c4 2888 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 2889
49a699c4 2890 if(resp) {
49a699c4 2891 delete resp;
49467864 2892 resp = nullptr;
49a699c4
BH
2893 }
2894 }
2895}
06ea9015 2896
592d7ade 2897static bool trySendingQueryToWorker(unsigned int target, ThreadMSG* tmsg)
00c9b8c1 2898{
144040be 2899 auto& targetInfo = s_threadInfos[target];
b243ca3b
RG
2900 if(!targetInfo.isWorker) {
2901 g_log<<Logger::Error<<"distributeAsyncFunction() tried to assign a query to a non-worker thread"<<endl;
d77abca1 2902 exit(1);
00c9b8c1 2903 }
d77abca1 2904
b243ca3b 2905 const auto& tps = targetInfo.pipes;
3ddb9247 2906
cf8cda18
RG
2907 ssize_t written = write(tps.writeQueriesToThread, &tmsg, sizeof(tmsg));
2908 if (written > 0) {
2909 if (static_cast<size_t>(written) != sizeof(tmsg)) {
2910 delete tmsg;
2911 unixDie("write to thread pipe returned wrong size or error");
2912 }
2913 }
2914 else {
2915 int error = errno;
cf8cda18 2916 if (error == EAGAIN || error == EWOULDBLOCK) {
592d7ade 2917 return false;
cf8cda18 2918 } else {
592d7ade 2919 delete tmsg;
17634427 2920 unixDie("write to thread pipe returned wrong size or error:" + std::to_string(error));
cf8cda18 2921 }
b841314c 2922 }
592d7ade 2923
144040be
RG
2924 ++targetInfo.numberOfDistributedQueries;
2925
592d7ade
RG
2926 return true;
2927}
2928
144040be
RG
2929static unsigned int getWorkerLoad(size_t workerIdx)
2930{
2931 const auto mt = s_threadInfos[/* skip handler */ 1 + g_numDistributorThreads + workerIdx].mt;
2932 if (mt != nullptr) {
2933 return mt->numProcesses();
2934 }
2935 return 0;
2936}
2937
2938static unsigned int selectWorker(unsigned int hash)
2939{
2940 if (s_balancingFactor == 0) {
2941 return /* skip handler */ 1 + g_numDistributorThreads + (hash % g_numWorkerThreads);
2942 }
2943
2944 /* we start with one, representing the query we are currently handling */
2945 double currentLoad = 1;
2946 std::vector<unsigned int> load(g_numWorkerThreads);
2947 for (size_t idx = 0; idx < g_numWorkerThreads; idx++) {
2948 load[idx] = getWorkerLoad(idx);
2949 currentLoad += load[idx];
2950 // cerr<<"load for worker "<<idx<<" is "<<load[idx]<<endl;
2951 }
2952
2953 double targetLoad = (currentLoad / g_numWorkerThreads) * s_balancingFactor;
2954 // cerr<<"total load is "<<currentLoad<<", number of workers is "<<g_numWorkerThreads<<", target load is "<<targetLoad<<endl;
2955
2956 unsigned int worker = hash % g_numWorkerThreads;
1b9d2d46 2957 /* at least one server has to be at or below the average load */
596bf482
RG
2958 if (load[worker] > targetLoad) {
2959 ++g_stats.rebalancedQueries;
2960 do {
2961 // cerr<<"worker "<<worker<<" is above the target load, selecting another one"<<endl;
2962 worker = (worker + 1) % g_numWorkerThreads;
2963 }
2964 while(load[worker] > targetLoad);
144040be
RG
2965 }
2966
2967 return /* skip handler */ 1 + g_numDistributorThreads + worker;
2968}
2969
592d7ade
RG
2970// This function is only called by the distributor threads, when pdns-distributes-queries is set
2971void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
2972{
2973 if (!isDistributorThread()) {
2974 g_log<<Logger::Error<<"distributeAsyncFunction() has been called by a worker ("<<t_id<<")"<<endl;
2975 exit(1);
2976 }
2977
2978 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
144040be 2979 unsigned int target = selectWorker(hash);
592d7ade
RG
2980
2981 ThreadMSG* tmsg = new ThreadMSG();
2982 tmsg->func = func;
2983 tmsg->wantAnswer = false;
2984
2985 if (!trySendingQueryToWorker(target, tmsg)) {
2986 /* if this function failed but did not raise an exception, it means that the pipe
2987 was full, let's try another one */
2988 unsigned int newTarget = 0;
2989 do {
2990 newTarget = /* skip handler */ 1 + g_numDistributorThreads + dns_random(g_numWorkerThreads);
2991 } while (newTarget == target);
2992
2993 if (!trySendingQueryToWorker(newTarget, tmsg)) {
2994 g_stats.queryPipeFullDrops++;
2995 delete tmsg;
2996 }
2997 }
00c9b8c1 2998}
3427fa8a 2999
d187038c 3000static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
49a699c4 3001{
f26bf547 3002 ThreadMSG* tmsg = nullptr;
3ddb9247 3003
cf8cda18 3004 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread || fd == readQueriesToThread
49a699c4
BH
3005 unixDie("read from thread pipe returned wrong size or error");
3006 }
3ddb9247 3007
2f22827a 3008 void *resp=0;
3009 try {
3010 resp = tmsg->func();
3011 }
3012 catch(std::exception& e) {
6d2010a8 3013 if(g_logCommonErrors)
e6a9dde5 3014 g_log<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2f22827a 3015 }
3016 catch(PDNSException& e) {
6d2010a8 3017 if(g_logCommonErrors)
e6a9dde5 3018 g_log<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2f22827a 3019 }
d7c676a5 3020 if(tmsg->wantAnswer) {
b243ca3b
RG
3021 const auto& threadInfo = s_threadInfos.at(t_id);
3022 if(write(threadInfo.pipes.writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
d7c676a5 3023 delete tmsg;
00c9b8c1 3024 unixDie("write to thread pipe returned wrong size or error");
d7c676a5
RG
3025 }
3026 }
3ddb9247 3027
00c9b8c1 3028 delete tmsg;
49a699c4 3029}
09e6702a 3030
13034931
BH
3031template<class T> void *voider(const boost::function<T*()>& func)
3032{
3033 return func();
3034}
3035
b3b5459d
BH
3036vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
3037{
3038 a.insert(a.end(), b.begin(), b.end());
3039 return a;
3040}
3041
92011b8f 3042vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
3043{
3044 a.insert(a.end(), b.begin(), b.end());
3045 return a;
3046}
3047
3ddb9247
PD
3048vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
3049{
3050 a.insert(a.end(), b.begin(), b.end());
3051 return a;
3052}
3053
92011b8f 3054
387b9ca6
RG
3055/*
3056 This function should only be called by the handler to gather metrics, wipe the cache,
788eeb4c
RG
3057 reload the Lua script (not the Lua config) or change the current trace regex,
3058 and by the SNMP thread to gather metrics. */
b4e76a18 3059template<class T> T broadcastAccFunction(const boost::function<T*()>& func)
3427fa8a 3060{
b243ca3b 3061 if (!isHandlerThread()) {
788eeb4c 3062 g_log<<Logger::Error<<"broadcastAccFunction has been called by a worker ("<<t_id<<")"<<endl;
d77abca1 3063 exit(1);
d77abca1
RG
3064 }
3065
b243ca3b 3066 unsigned int n = 0;
3427fa8a 3067 T ret=T();
b243ca3b
RG
3068 for (const auto& threadInfo : s_threadInfos) {
3069 if (n++ == t_id) {
3070 continue;
3071 }
3072
3073 const auto& tps = threadInfo.pipes;
00c9b8c1
BH
3074 ThreadMSG* tmsg = new ThreadMSG();
3075 tmsg->func = boost::bind(voider<T>, func);
3076 tmsg->wantAnswer = true;
3ddb9247 3077
b841314c
RG
3078 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
3079 delete tmsg;
3427fa8a 3080 unixDie("write to thread pipe returned wrong size or error");
b841314c 3081 }
3ddb9247 3082
49467864 3083 T* resp = nullptr;
3427fa8a
BH
3084 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
3085 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 3086
3427fa8a 3087 if(resp) {
3427fa8a
BH
3088 ret += *resp;
3089 delete resp;
49467864 3090 resp = nullptr;
3427fa8a
BH
3091 }
3092 }
3093 return ret;
3094}
3095
b4e76a18
RG
3096template string broadcastAccFunction(const boost::function<string*()>& fun); // explicit instantiation
3097template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun); // explicit instantiation
3098template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun); // explicit instantiation
3099template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun); // explicit instantiation
5ac6d761 3100template ThreadTimes broadcastAccFunction(const boost::function<ThreadTimes*()>& fun);
3427fa8a 3101
d187038c 3102static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3103{
fbfc1809
RG
3104 try {
3105 string remote;
3106 string msg=s_rcc.recv(&remote);
3107 RecursorControlParser rcp;
3108 RecursorControlParser::func_t* command;
3ddb9247 3109
fbfc1809 3110 string answer=rcp.getAnswer(msg, &command);
f0f3f0b0 3111
fbfc1809
RG
3112 // If we are inside a chroot, we need to strip
3113 if (!arg()["chroot"].empty()) {
3114 size_t len = arg()["chroot"].length();
3115 remote = remote.substr(len);
3116 }
f0f3f0b0 3117
ab5c053d
BH
3118 s_rcc.send(answer, &remote);
3119 command();
3120 }
fbfc1809 3121 catch(const std::exception& e) {
e6a9dde5 3122 g_log<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
ab5c053d 3123 }
fbfc1809 3124 catch(const PDNSException& ae) {
e6a9dde5 3125 g_log<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
ab5c053d 3126 }
09e6702a
BH
3127}
3128
d187038c 3129static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3130{
0b18b22e 3131 PacketID* pident=any_cast<PacketID>(&var);
667f7e60 3132 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
09e6702a 3133
667f7e60 3134 shared_array<char> buffer(new char[pident->inNeeded]);
09e6702a 3135
a683e8bd 3136 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
09e6702a 3137 if(ret > 0) {
667f7e60 3138 pident->inMSG.append(&buffer[0], &buffer[ret]);
a683e8bd 3139 pident->inNeeded-=(size_t)ret;
825fa717 3140 if(!pident->inNeeded || pident->inIncompleteOkay) {
667f7e60
BH
3141 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
3142 PacketID pid=*pident;
3143 string msg=pident->inMSG;
3ddb9247 3144
bb4bdbaf 3145 t_fdm->removeReadFD(fd);
3ddb9247 3146 MT->sendEvent(pid, &msg);
09e6702a
BH
3147 }
3148 else {
667f7e60 3149 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
09e6702a
BH
3150 }
3151 }
3152 else {
667f7e60 3153 PacketID tmp=*pident;
bb4bdbaf 3154 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
09e6702a
BH
3155 string empty;
3156 MT->sendEvent(tmp, &empty); // this conveys error status
3157 }
3158}
3159
d187038c 3160static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3161{
0b18b22e 3162 PacketID* pid=any_cast<PacketID>(&var);
a683e8bd 3163 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
09e6702a 3164 if(ret > 0) {
a683e8bd 3165 pid->outPos+=(ssize_t)ret;
667f7e60
BH
3166 if(pid->outPos==pid->outMSG.size()) {
3167 PacketID tmp=*pid;
bb4bdbaf 3168 t_fdm->removeWriteFD(fd);
09e6702a
BH
3169 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
3170 }
3171 }
3172 else { // error or EOF
667f7e60 3173 PacketID tmp(*pid);
bb4bdbaf 3174 t_fdm->removeWriteFD(fd);
09e6702a 3175 string sent;
998a4334 3176 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
09e6702a
BH
3177 }
3178}
3179
34801ab1 3180// resend event to everybody chained onto it
d187038c 3181static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
34801ab1
BH
3182{
3183 if(iter->key.chain.empty())
3184 return;
e27e91a8 3185 // cerr<<"doResends called!\n";
34801ab1
BH
3186 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
3187 resend.fd=-1;
3188 resend.id=*i;
e27e91a8 3189 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
4665c31e 3190
34801ab1
BH
3191 MT->sendEvent(resend, &content);
3192 g_stats.chainResends++;
34801ab1
BH
3193 }
3194}
3195
d187038c 3196static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3197{
600fc20b 3198 PacketID pid=any_cast<PacketID>(var);
a683e8bd 3199 ssize_t len;
fae8fe07
RG
3200 std::string packet;
3201 packet.resize(g_outgoingEDNSBufsize);
996c89cc 3202 ComboAddress fromaddr;
09e6702a
BH
3203 socklen_t addrlen=sizeof(fromaddr);
3204
fae8fe07 3205 len=recvfrom(fd, &packet.at(0), packet.size(), 0, (sockaddr *)&fromaddr, &addrlen);
c1da7976 3206
a683e8bd 3207 if(len < (ssize_t) sizeof(dnsheader)) {
998a4334 3208 if(len < 0)
996c89cc 3209 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
09e6702a 3210 else {
3ddb9247 3211 g_stats.serverParseError++;
09e6702a 3212 if(g_logCommonErrors)
e6a9dde5 3213 g_log<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
e44d9fa7 3214 ": packet smaller than DNS header"<<endl;
998a4334 3215 }
34801ab1 3216
49a699c4 3217 t_udpclientsocks->returnSocket(fd);
34801ab1
BH
3218 string empty;
3219
3220 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3ddb9247 3221 if(iter != MT->d_waiters.end())
34801ab1 3222 doResends(iter, pid, empty);
3ddb9247 3223
34801ab1 3224 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
998a4334 3225 return;
3ddb9247 3226 }
998a4334 3227
fae8fe07 3228 packet.resize(len);
998a4334 3229 dnsheader dh;
fae8fe07 3230 memcpy(&dh, &packet.at(0), sizeof(dh));
3ddb9247 3231
6da3b3ad
PD
3232 PacketID pident;
3233 pident.remote=fromaddr;
3234 pident.id=dh.id;
3235 pident.fd=fd;
34801ab1 3236
33a928af 3237 if(!dh.qr && g_logCommonErrors) {
e6a9dde5 3238 g_log<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
6da3b3ad
PD
3239 }
3240
3241 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
3242 !dh.qr) { // one weird server
3243 pident.domain.clear();
3244 pident.type = 0;
3245 }
3246 else {
3247 try {
0b31e67e 3248 if(len > 12)
fae8fe07 3249 pident.domain=DNSName(&packet.at(0), len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
6da3b3ad
PD
3250 }
3251 catch(std::exception& e) {
3252 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
e6a9dde5 3253 g_log<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
6da3b3ad 3254 return;
34801ab1 3255 }
6da3b3ad 3256 }
34801ab1 3257
6da3b3ad
PD
3258 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
3259 if(iter != MT->d_waiters.end()) {
3260 doResends(iter, pident, packet);
3261 }
c1da7976 3262
6da3b3ad 3263retryWithName:
4957a608 3264
6da3b3ad
PD
3265 if(!MT->sendEvent(pident, &packet)) {
3266 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
3267 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
3268 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
e325f20c 3269 pident.domain == mthread->key.domain) {
6da3b3ad 3270 mthread->key.nearMisses++;
998a4334 3271 }
6da3b3ad
PD
3272
3273 // be a bit paranoid here since we're weakening our matching
3ddb9247 3274 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
6da3b3ad
PD
3275 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
3276 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
3277 pident.domain = mthread->key.domain;
3278 pident.type = mthread->key.type;
3279 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
d4fb76e9 3280 }
09e6702a 3281 }
6da3b3ad
PD
3282 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
3283 if(g_logCommonErrors) {
e6a9dde5 3284 g_log<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
d8f6d49f 3285 }
09e6702a 3286 }
6da3b3ad
PD
3287 else if(fd >= 0) {
3288 t_udpclientsocks->returnSocket(fd);
3289 }
09e6702a
BH
3290}
3291
1f4abb20
BH
3292FDMultiplexer* getMultiplexer()
3293{
3294 FDMultiplexer* ret;
f26bf547 3295 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
1f4abb20 3296 try {
f26bf547 3297 ret=i.second();
1f4abb20
BH
3298 return ret;
3299 }
98d0ee4a 3300 catch(FDMultiplexerException &fe) {
e6a9dde5 3301 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
98d0ee4a
BH
3302 }
3303 catch(...) {
e6a9dde5 3304 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
98d0ee4a 3305 }
1f4abb20 3306 }
e6a9dde5 3307 g_log<<Logger::Error<<"No working multiplexer found!"<<endl;
1f4abb20
BH
3308 exit(1);
3309}
3310
3ddb9247 3311
d187038c 3312static string* doReloadLuaScript()
4485aa35 3313{
674cf0f6 3314 string fname= ::arg()["lua-dns-script"];
4485aa35 3315 try {
674cf0f6 3316 if(fname.empty()) {
f26bf547 3317 t_pdl.reset();
377602e3 3318 g_log<<Logger::Info<<t_id<<" Unloaded current lua script"<<endl;
0f39c1a3 3319 return new string("unloaded\n");
4485aa35
BH
3320 }
3321 else {
9694e14f
AT
3322 t_pdl = std::make_shared<RecursorLua4>();
3323 t_pdl->loadFile(fname);
4485aa35
BH
3324 }
3325 }
fdbf35ac 3326 catch(std::exception& e) {
e6a9dde5 3327 g_log<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
0f39c1a3 3328 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
4485aa35 3329 }
3ddb9247 3330
e6a9dde5 3331 g_log<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
0f39c1a3 3332 return new string("(re)loaded '"+fname+"'\n");
4485aa35
BH
3333}
3334
49a699c4
BH
3335string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3336{
3ddb9247 3337 if(begin != end)
49a699c4 3338 ::arg().set("lua-dns-script") = *begin;
3ddb9247 3339
0f39c1a3 3340 return broadcastAccFunction<string>(doReloadLuaScript);
3ddb9247 3341}
49a699c4 3342
d187038c 3343static string* pleaseUseNewTraceRegex(const std::string& newRegex)
77499b05
BH
3344try
3345{
3346 if(newRegex.empty()) {
f26bf547 3347 t_traceRegex.reset();
77499b05
BH
3348 return new string("unset\n");
3349 }
3350 else {
f26bf547 3351 t_traceRegex = std::make_shared<Regex>(newRegex);
77499b05
BH
3352 return new string("ok\n");
3353 }
3354}
3f81d239 3355catch(PDNSException& ae)
77499b05
BH
3356{
3357 return new string(ae.reason+"\n");
3358}
3359
3360string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
3361{
3362 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
3363}
3364
4e9a20e6 3365static void checkLinuxIPv6Limits()
3366{
3367#ifdef __linux__
3368 string line;
3369 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
335da0ba 3370 int lim=std::stoi(line);
4e9a20e6 3371 if(lim < 16384) {
e6a9dde5 3372 g_log<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
4e9a20e6 3373 }
3374 }
3375#endif
3376}
36849ff2 3377static void checkOrFixFDS()
4e9a20e6 3378{
c0063e60 3379 unsigned int availFDs=getFilenumLimit();
3380 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
3381
3382 if(wantFDs > availFDs) {
067ad20e 3383 unsigned int hardlimit= getFilenumLimit(true);
3384 if(hardlimit >= wantFDs) {
c0063e60 3385 setFilenumLimit(wantFDs);
e6a9dde5 3386 g_log<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
36849ff2 3387 }
3388 else {
067ad20e 3389 int newval = (hardlimit - 25) / g_numWorkerThreads;
e6a9dde5 3390 g_log<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
36849ff2 3391 g_maxMThreads = newval;
067ad20e 3392 setFilenumLimit(hardlimit);
36849ff2 3393 }
3394 }
4e9a20e6 3395}
77499b05 3396
c390b2da 3397static void* recursorThread(unsigned int tid, const string& threadName);
51e2144e 3398
f26bf547 3399static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
49a699c4
BH
3400{
3401 t_allowFrom = ng;
f26bf547 3402 return nullptr;
49a699c4
BH
3403}
3404
dbd23fc2
BH
3405int g_argc;
3406char** g_argv;
3407
18af64a8 3408void parseACLs()
f7c1d4e3 3409{
18af64a8 3410 static bool l_initialized;
3ddb9247 3411
49a699c4 3412 if(l_initialized) { // only reload configuration file on second call
18af64a8 3413 string configname=::arg()["config-dir"]+"/recursor.conf";
3e63da83
JR
3414 if(::arg()["config-name"]!="") {
3415 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3416 }
18af64a8 3417 cleanSlashes(configname);
3ddb9247
PD
3418
3419 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
7e818521 3420 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
49a699c4 3421 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
242b90e1 3422 ::arg().preParseFile(configname.c_str(), "include-dir");
829849d6
AT
3423 ::arg().preParse(g_argc, g_argv, "include-dir");
3424
3425 // then process includes
3426 std::vector<std::string> extraConfigs;
242b90e1
AT
3427 ::arg().gatherIncludes(extraConfigs);
3428
1dc8f4d0 3429 for(const std::string& fn : extraConfigs) {
7e818521 3430 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
3431 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
3432 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
3433 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
829849d6 3434 }
ca2c884c
AT
3435
3436 ::arg().preParse(g_argc, g_argv, "allow-from-file");
3437 ::arg().preParse(g_argc, g_argv, "allow-from");
f27e6356 3438 }
49a699c4 3439
f26bf547
RG
3440 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
3441 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
3ddb9247 3442
2c95fc65
BH
3443 if(!::arg()["allow-from-file"].empty()) {
3444 string line;
2c95fc65
BH
3445 ifstream ifs(::arg()["allow-from-file"].c_str());
3446 if(!ifs) {
9c61b9d0 3447 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2c95fc65
BH
3448 }
3449
3450 string::size_type pos;
3451 while(getline(ifs,line)) {
3452 pos=line.find('#');
3453 if(pos!=string::npos)
3454 line.resize(pos);
3455 trim(line);
3456 if(line.empty())
3457 continue;
3458
18af64a8 3459 allowFrom->addMask(line);
2c95fc65 3460 }
e6a9dde5 3461 g_log<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2c95fc65
BH
3462 }
3463 else if(!::arg()["allow-from"].empty()) {
f7c1d4e3
BH
3464 vector<string> ips;
3465 stringtok(ips, ::arg()["allow-from"], ", ");
3ddb9247 3466
e6a9dde5 3467 g_log<<Logger::Warning<<"Only allowing queries from: ";
f7c1d4e3 3468 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
18af64a8 3469 allowFrom->addMask(*i);
f7c1d4e3 3470 if(i!=ips.begin())
e6a9dde5
PL
3471 g_log<<Logger::Warning<<", ";
3472 g_log<<Logger::Warning<<*i;
f7c1d4e3 3473 }
e6a9dde5 3474 g_log<<Logger::Warning<<endl;
f7c1d4e3 3475 }
49a699c4 3476 else {
3ddb9247 3477 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
377602e3 3478 g_log<<Logger::Warning<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
f26bf547 3479 allowFrom = nullptr;
49a699c4 3480 }
3ddb9247 3481
49a699c4 3482 g_initialAllowFrom = allowFrom;
d7dae798 3483 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
f26bf547 3484 oldAllowFrom = nullptr;
3ddb9247 3485
49a699c4 3486 l_initialized = true;
18af64a8
BH
3487}
3488
795215f2 3489
756e82cf 3490static void setupDelegationOnly()
3491{
3492 vector<string> parts;
3493 stringtok(parts, ::arg()["delegation-only"], ", \t");
3494 for(const auto& p : parts) {
9065eb05 3495 SyncRes::addDelegationOnly(DNSName(p));
756e82cf 3496 }
3497}
795215f2 3498
8fd25133
RG
3499static std::map<unsigned int, std::set<int> > parseCPUMap()
3500{
3501 std::map<unsigned int, std::set<int> > result;
3502
3503 const std::string value = ::arg()["cpu-map"];
3504
3505 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
e6a9dde5 3506 g_log<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
8fd25133
RG
3507 return result;
3508 }
3509
3510 std::vector<std::string> parts;
3511
3512 stringtok(parts, value, " \t");
3513
3514 for(const auto& part : parts) {
3515 if (part.find('=') == string::npos)
3516 continue;
3517
3518 try {
3519 auto headers = splitField(part, '=');
3520 trim(headers.first);
3521 trim(headers.second);
3522
3523 unsigned int threadId = pdns_stou(headers.first);
3524 std::vector<std::string> cpus;
3525
3526 stringtok(cpus, headers.second, ",");
3527
3528 for(const auto& cpu : cpus) {
3529 int cpuId = std::stoi(cpu);
3530
3531 result[threadId].insert(cpuId);
3532 }
3533 }
3534 catch(const std::exception& e) {
e6a9dde5 3535 g_log<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
8fd25133
RG
3536 }
3537 }
3538
3539 return result;
3540}
3541
3542static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
3543{
3544 const auto& cpuMapping = cpusMap.find(n);
3545 if (cpuMapping != cpusMap.cend()) {
3546 int rc = mapThreadToCPUList(tid, cpuMapping->second);
3547 if (rc == 0) {
e6a9dde5 3548 g_log<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
8fd25133 3549 for (const auto cpu : cpuMapping->second) {
e6a9dde5 3550 g_log<<Logger::Info<<" "<<cpu;
8fd25133 3551 }
e6a9dde5 3552 g_log<<Logger::Info<<endl;
8fd25133
RG
3553 }
3554 else {
e6a9dde5 3555 g_log<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
8fd25133 3556 for (const auto cpu : cpuMapping->second) {
e6a9dde5 3557 g_log<<Logger::Info<<" "<<cpu;
8fd25133 3558 }
e6a9dde5 3559 g_log<<Logger::Info<<strerror(rc)<<endl;
8fd25133
RG
3560 }
3561 }
3562}
3563
af1377b7
NC
3564#ifdef NOD_ENABLED
3565static void setupNODThread()
3566{
3567 if (g_nodEnabled) {
b78727c6
NC
3568 uint32_t num_cells = ::arg().asNum("new-domain-db-size");
3569 t_nodDBp = std::make_shared<nod::NODDB>(num_cells);
af1377b7
NC
3570 try {
3571 t_nodDBp->setCacheDir(::arg()["new-domain-history-dir"]);
3572 }
3573 catch (const PDNSException& e) {
3574 g_log<<Logger::Error<<"new-domain-history-dir (" << ::arg()["new-domain-history-dir"] << ") is not readable or does not exist"<<endl;
3575 _exit(1);
3576 }
3577 if (!t_nodDBp->init()) {
3578 g_log<<Logger::Error<<"Could not initialize domain tracking"<<endl;
3579 _exit(1);
3580 }
41c542ec 3581 std::thread t(nod::NODDB::startHousekeepingThread, t_nodDBp, std::this_thread::get_id());
af1377b7 3582 t.detach();
ca2526f5 3583 g_nod_pbtag = ::arg()["new-domain-pb-tag"];
41c542ec
NC
3584 }
3585 if (g_udrEnabled) {
b78727c6
NC
3586 uint32_t num_cells = ::arg().asNum("unique-response-db-size");
3587 t_udrDBp = std::make_shared<nod::UniqueResponseDB>(num_cells);
41c542ec
NC
3588 try {
3589 t_udrDBp->setCacheDir(::arg()["unique-response-history-dir"]);
3590 }
3591 catch (const PDNSException& e) {
3592 g_log<<Logger::Error<<"unique-response-history-dir (" << ::arg()["unique-response-history-dir"] << ") is not readable or does not exist"<<endl;
3593 _exit(1);
3594 }
3595 if (!t_udrDBp->init()) {
3596 g_log<<Logger::Error<<"Could not initialize unique response tracking"<<endl;
3597 _exit(1);
3598 }
3599 std::thread t(nod::UniqueResponseDB::startHousekeepingThread, t_udrDBp, std::this_thread::get_id());
af1377b7 3600 t.detach();
ca2526f5 3601 g_udr_pbtag = ::arg()["unique-response-pb-tag"];
af1377b7
NC
3602 }
3603}
3604
3605void parseNODWhitelist(const std::string& wlist)
3606{
3607 vector<string> parts;
3608 stringtok(parts, wlist, ",; ");
3609 for(const auto& a : parts) {
3610 g_nodDomainWL.add(DNSName(a));
3611 }
3612}
3613
3614static void setupNODGlobal()
3615{
3616 // Setup NOD subsystem
3617 g_nodEnabled = ::arg().mustDo("new-domain-tracking");
3618 g_nodLookupDomain = DNSName(::arg()["new-domain-lookup"]);
3619 g_nodLog = ::arg().mustDo("new-domain-log");
3620 parseNODWhitelist(::arg()["new-domain-whitelist"]);
41c542ec
NC
3621
3622 // Setup Unique DNS Response subsystem
3623 g_udrEnabled = ::arg().mustDo("unique-response-tracking");
3624 g_udrLog = ::arg().mustDo("unique-response-log");
af1377b7
NC
3625}
3626#endif /* NOD_ENABLED */
3627
d187038c 3628static int serviceMain(int argc, char*argv[])
18af64a8 3629{
e6a9dde5
PL
3630 g_log.setName(s_programname);
3631 g_log.disableSyslog(::arg().mustDo("disable-syslog"));
3632 g_log.setTimestamps(::arg().mustDo("log-timestamp"));
18af64a8
BH
3633
3634 if(!::arg()["logging-facility"].empty()) {
f8499e52
BH
3635 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
3636 if(val >= 0)
e6a9dde5 3637 g_log.setFacility(val);
18af64a8 3638 else
e6a9dde5 3639 g_log<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
18af64a8
BH
3640 }
3641
ba1a571d 3642 showProductVersion();
3afde9b2 3643
06ea9015 3644 g_disthashseed=dns_random(0xffffffff);
3645
b7ef5828
PL
3646 checkLinuxIPv6Limits();
3647 try {
3648 vector<string> addrs;
3649 if(!::arg()["query-local-address6"].empty()) {
3650 SyncRes::s_doIPv6=true;
e6a9dde5 3651 g_log<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
b7ef5828
PL
3652
3653 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
3654 for(const string& addr : addrs) {
3655 g_localQueryAddresses6.push_back(ComboAddress(addr));
3656 }
3657 }
3658 else {
e6a9dde5 3659 g_log<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
b7ef5828
PL
3660 }
3661 addrs.clear();
3662 stringtok(addrs, ::arg()["query-local-address"], ", ;");
3663 for(const string& addr : addrs) {
3664 g_localQueryAddresses4.push_back(ComboAddress(addr));
3665 }
3666 }
3667 catch(std::exception& e) {
e6a9dde5 3668 g_log<<Logger::Error<<"Assigning local query addresses: "<<e.what();
b7ef5828
PL
3669 exit(99);
3670 }
3671
e48c6b8a
PL
3672 // keep this ABOVE loadRecursorLuaConfig!
3673 if(::arg()["dnssec"]=="off")
3674 g_dnssecmode=DNSSECMode::Off;
3675 else if(::arg()["dnssec"]=="process-no-validate")
3676 g_dnssecmode=DNSSECMode::ProcessNoValidate;
3677 else if(::arg()["dnssec"]=="process")
3678 g_dnssecmode=DNSSECMode::Process;
3679 else if(::arg()["dnssec"]=="validate")
3680 g_dnssecmode=DNSSECMode::ValidateAll;
3681 else if(::arg()["dnssec"]=="log-fail")
3682 g_dnssecmode=DNSSECMode::ValidateForLog;
3683 else {
e6a9dde5 3684 g_log<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
e48c6b8a
PL
3685 exit(1);
3686 }
3687
9a3ab3e4
KM
3688 g_signatureInceptionSkew = ::arg().asNum("signature-inception-skew");
3689 if (g_signatureInceptionSkew < 0) {
3690 g_log<<Logger::Error<<"A negative value for 'signature-inception-skew' is not allowed"<<endl;
3691 exit(1);
3692 }
3693
e48c6b8a 3694 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
d377bb54 3695 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
e48c6b8a 3696
a6f7f5fe 3697 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
3698 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
e6ec15bf
RG
3699
3700 luaConfigDelayedThreads delayedLuaThreads;
0f5785a6 3701 try {
e6ec15bf 3702 loadRecursorLuaConfig(::arg()["lua-config-file"], delayedLuaThreads);
0f5785a6
PL
3703 }
3704 catch (PDNSException &e) {
e6a9dde5 3705 g_log<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
0f5785a6
PL
3706 exit(1);
3707 }
ad42489c 3708
18af64a8 3709 parseACLs();
d6f3fcfa 3710 initPublicSuffixList(::arg()["public-suffix-list-file"]);
92011b8f 3711
eb5bae86 3712 if(!::arg()["dont-query"].empty()) {
eb5bae86
BH
3713 vector<string> ips;
3714 stringtok(ips, ::arg()["dont-query"], ", ");
66e0b6ea
BH
3715 ips.push_back("0.0.0.0");
3716 ips.push_back("::");
c36bc97a 3717
e6a9dde5 3718 g_log<<Logger::Warning<<"Will not send queries to: ";
eb5bae86 3719 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
9065eb05 3720 SyncRes::addDontQuery(*i);
eb5bae86 3721 if(i!=ips.begin())
e6a9dde5
PL
3722 g_log<<Logger::Warning<<", ";
3723 g_log<<Logger::Warning<<*i;
eb5bae86 3724 }
e6a9dde5 3725 g_log<<Logger::Warning<<endl;
eb5bae86
BH
3726 }
3727
f7c1d4e3 3728 g_quiet=::arg().mustDo("quiet");
3ddb9247 3729
b243ca3b 3730 /* this needs to be done before parseACLs(), which call broadcastFunction() */
1bc3c142
BH
3731 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
3732 if(g_weDistributeQueries) {
b243ca3b 3733 g_log<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
1bc3c142 3734 }
3ddb9247 3735
756e82cf 3736 setupDelegationOnly();
b33c2462 3737 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
756e82cf 3738
77499b05
BH
3739 if(::arg()["trace"]=="fail") {
3740 SyncRes::setDefaultLogMode(SyncRes::Store);
3741 }
3742 else if(::arg().mustDo("trace")) {
3743 SyncRes::setDefaultLogMode(SyncRes::Log);
f7c1d4e3
BH
3744 ::arg().set("quiet")="no";
3745 g_quiet=false;
3e9c6c0a 3746 g_dnssecLOG=true;
f7c1d4e3 3747 }
43a9b290
PL
3748 string myHostname = getHostname();
3749 if (myHostname == "UNKNOWN"){
3750 g_log<<Logger::Warning<<"Unable to get the hostname, NSID and id.server values will be empty"<<endl;
3751 myHostname = "";
d0983bff 3752 }
3ddb9247 3753
aadceba8 3754 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
5cf4b2e7 3755 SyncRes::s_minimumECSTTL = ::arg().asNum("ecs-minimum-ttl-override");
aadceba8 3756
1051f8a9
BH
3757 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
3758
f7c1d4e3 3759 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
b9473937 3760 SyncRes::s_maxbogusttl=::arg().asNum("max-cache-bogus-ttl");
63637fd8 3761 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
1051f8a9 3762 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
79ec0627
PL
3763 // Cap the packetcache-servfail-ttl to the packetcache-ttl
3764 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
3765 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
628e2c7b
PA
3766 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
3767 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
f7c1d4e3 3768 SyncRes::s_serverID=::arg()["server-id"];
173d790e 3769 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
9de3e034 3770 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
7c3398aa 3771 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
01402d56 3772 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
f7c1d4e3 3773 if(SyncRes::s_serverID.empty()) {
d0983bff 3774 SyncRes::s_serverID = myHostname;
f7c1d4e3 3775 }
3ddb9247 3776
e9f9b8ec
RG
3777 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
3778 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
c9783016 3779 SyncRes::clearECSStats();
fd8898fb 3780 SyncRes::s_ecsipv4cachelimit = ::arg().asNum("ecs-ipv4-cache-bits");
3781 SyncRes::s_ecsipv6cachelimit = ::arg().asNum("ecs-ipv6-cache-bits");
ed9019c9 3782 SyncRes::s_ecscachelimitttl = ::arg().asNum("ecs-cache-limit-ttl");
e9f9b8ec 3783
8a3a3822
RG
3784 if (!::arg().isEmpty("ecs-scope-zero-address")) {
3785 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
3786 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
3787 }
3788 else {
3789 bool found = false;
3790 for (const auto& addr : g_localQueryAddresses4) {
3791 if (!IsAnyAddress(addr)) {
3792 SyncRes::setECSScopeZeroAddress(Netmask(addr, 32));
3793 found = true;
3794 break;
3795 }
3796 }
3797 if (!found) {
3798 for (const auto& addr : g_localQueryAddresses6) {
3799 if (!IsAnyAddress(addr)) {
3800 SyncRes::setECSScopeZeroAddress(Netmask(addr, 128));
3801 found = true;
3802 break;
3803 }
3804 }
3805 if (!found) {
3806 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
3807 }
3808 }
3809 }
3810
2fe3354d
CH
3811 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
3812 SyncRes::parseEDNSSubnetAddFor(::arg()["ecs-add-for"]);
3813 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
3814
5cc8371b 3815 g_XPFAcl.toMasks(::arg()["xpf-allow-from"]);
59cb4a79 3816 g_xpfRRCode = ::arg().asNum("xpf-rr-code");
5cc8371b 3817
5b0ddd18 3818 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
bb4bdbaf 3819
49a699c4 3820 g_initialDomainMap = parseAuthAndForwards();
3ddb9247 3821
08f3f638 3822 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3ddb9247 3823
f7c1d4e3 3824 g_logCommonErrors=::arg().mustDo("log-common-errors");
98d36505 3825 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
e661a20b
PD
3826
3827 g_anyToTcp = ::arg().mustDo("any-to-tcp");
a09a8ce0
PD
3828 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
3829
b3adda56
PD
3830 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
3831
b243ca3b 3832 g_numDistributorThreads = ::arg().asNum("distributor-threads");
810ff705 3833 g_numWorkerThreads = ::arg().asNum("threads");
1c4f2e1b 3834 if (g_numWorkerThreads < 1) {
e6a9dde5 3835 g_log<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
1c4f2e1b
RG
3836 g_numWorkerThreads = 1;
3837 }
3838
b243ca3b 3839 g_numThreads = g_numDistributorThreads + g_numWorkerThreads;
810ff705
RG
3840 g_maxMThreads = ::arg().asNum("max-mthreads");
3841
00b8cadc
RG
3842 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
3843
0ec489bf 3844 g_statisticsInterval = ::arg().asNum("statistics-interval");
3845
559b6c93
PL
3846 {
3847 SuffixMatchNode dontThrottleNames;
3848 vector<string> parts;
69eb0665 3849 stringtok(parts, ::arg()["dont-throttle-names"], " ,");
559b6c93
PL
3850 for (const auto &p : parts) {
3851 dontThrottleNames.add(DNSName(p));
3852 }
3853 g_dontThrottleNames.setState(dontThrottleNames);
3854
3855 NetmaskGroup dontThrottleNetmasks;
69eb0665 3856 stringtok(parts, ::arg()["dont-throttle-netmasks"], " ,");
559b6c93
PL
3857 for (const auto &p : parts) {
3858 dontThrottleNetmasks.addMask(Netmask(p));
3859 }
3860 g_dontThrottleNetmasks.setState(dontThrottleNetmasks);
3861 }
3862
144040be 3863 s_balancingFactor = ::arg().asDouble("distribution-load-factor");
078be17f
RG
3864 if (s_balancingFactor != 0.0 && s_balancingFactor < 1.0) {
3865 s_balancingFactor = 0.0;
3866 g_log<<Logger::Warning<<"Asked to run with a distribution-load-factor below 1.0, disabling it instead"<<endl;
3867 }
144040be 3868
810ff705
RG
3869#ifdef SO_REUSEPORT
3870 g_reusePort = ::arg().mustDo("reuseport");
3871#endif
3872
b243ca3b 3873 s_threadInfos.resize(g_numDistributorThreads + g_numWorkerThreads + /* handler */ 1);
810ff705 3874
b243ca3b
RG
3875 if (g_reusePort) {
3876 if (g_weDistributeQueries) {
3877 /* first thread is the handler, then distributors */
3878 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3879 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
adb6cd72 3880 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
b243ca3b 3881 makeUDPServerSockets(deferredAdds);
adb6cd72 3882 makeTCPServerSockets(deferredAdds, tcpSockets);
b243ca3b
RG
3883 }
3884 }
3885 else {
3886 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3887 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3888 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
adb6cd72 3889 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
b243ca3b 3890 makeUDPServerSockets(deferredAdds);
adb6cd72 3891 makeTCPServerSockets(deferredAdds, tcpSockets);
b243ca3b 3892 }
810ff705
RG
3893 }
3894 }
3895 else {
c47f201b 3896 std::set<int> tcpSockets;
b243ca3b
RG
3897 /* we don't have reuseport so we can only open one socket per
3898 listening addr:port and everyone will listen on it */
3899 makeUDPServerSockets(g_deferredAdds);
c47f201b
RG
3900 makeTCPServerSockets(g_deferredAdds, tcpSockets);
3901
3902 /* every listener (so distributor if g_weDistributeQueries, workers otherwise)
3903 needs to listen to the shared sockets */
3904 if (g_weDistributeQueries) {
3905 /* first thread is the handler, then distributors */
3906 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
3907 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3908 }
3909 }
3910 else {
3911 /* first thread is the handler, there is no distributor here and workers are accepting queries */
3912 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
3913 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
3914 }
3915 }
810ff705 3916 }
815099b2 3917
af1377b7
NC
3918#ifdef NOD_ENABLED
3919 // Setup newly observed domain globals
3920 setupNODGlobal();
3921#endif /* NOD_ENABLED */
3922
677e2a46
BH
3923 int forks;
3924 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
1bc3c142
BH
3925 if(!fork()) // we are child
3926 break;
3927 }
3ddb9247 3928
f7c1d4e3 3929 if(::arg().mustDo("daemon")) {
e6a9dde5
PL
3930 g_log<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
3931 g_log.toConsole(Logger::Critical);
f7c1d4e3
BH
3932 daemonize();
3933 }
3934 signal(SIGUSR1,usr1Handler);
3935 signal(SIGUSR2,usr2Handler);
3936 signal(SIGPIPE,SIG_IGN);
810ff705 3937
a6414fdc 3938 checkOrFixFDS();
3ddb9247 3939
d1b28475
KM
3940#ifdef HAVE_LIBSODIUM
3941 if (sodium_init() == -1) {
e6a9dde5 3942 g_log<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
d1b28475
KM
3943 exit(99);
3944 }
3945#endif
3946
3afde9b2
PL
3947 openssl_thread_setup();
3948 openssl_seed();
e97cb679
AT
3949 /* setup rng before chroot */
3950 dns_random_init();
3afde9b2 3951
bdbb07e0 3952 if(::arg()["server-id"].empty()) {
d0983bff 3953 ::arg().set("server-id") = myHostname;
bdbb07e0
PL
3954 }
3955
138435cb
BH
3956 int newgid=0;
3957 if(!::arg()["setgid"].empty())
3958 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
3959 int newuid=0;
3960 if(!::arg()["setuid"].empty())
3961 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
3962
f1d6a7ce
KM
3963 Utility::dropGroupPrivs(newuid, newgid);
3964
138435cb 3965 if (!::arg()["chroot"].empty()) {
75336810
PL
3966#ifdef HAVE_SYSTEMD
3967 char *ns;
3968 ns = getenv("NOTIFY_SOCKET");
3969 if (ns != nullptr) {
e6a9dde5 3970 g_log<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
75336810
PL
3971 exit(1);
3972 }
3973#endif
138435cb 3974 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
e6a9dde5 3975 g_log<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
138435cb
BH
3976 exit(1);
3977 }
f0f3f0b0 3978 else
377602e3 3979 g_log<<Logger::Info<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
138435cb
BH
3980 }
3981
f0f3f0b0
PL
3982 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
3983 if(!s_pidfname.empty())
3984 unlink(s_pidfname.c_str()); // remove possible old pid file
3985 writePid();
3986
3987 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
3988
f1d6a7ce 3989 Utility::dropUserPrivs(newuid);
1f2b341e
RG
3990 try {
3991 /* we might still have capabilities remaining, for example if we have been started as root
3992 without --setuid (please don't do that) or as an unprivileged user with ambient capabilities
3993 like CAP_NET_BIND_SERVICE.
3994 */
3995 dropCapabilities();
3996 }
3997 catch(const std::exception& e) {
3998 g_log<<Logger::Warning<<e.what()<<endl;
3999 }
c0063e60 4000
e6ec15bf
RG
4001 startLuaConfigDelayedThreads(delayedLuaThreads, g_luaconfs.getCopy().generation);
4002
49a699c4 4003 makeThreadPipes();
3ddb9247 4004
5d4dd7fe
BH
4005 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
4006 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
fde296a3 4007 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
a5886e6a 4008 s_maxUDPQueriesPerRound=::arg().asNum("max-udp-queries-per-round");
343257a4 4009
c29d820c
RG
4010 g_useKernelTimestamp = ::arg().mustDo("protobuf-use-kernel-timestamp");
4011
563517f3
RG
4012 blacklistStats(StatComponent::API, ::arg()["stats-api-blacklist"]);
4013 blacklistStats(StatComponent::Carbon, ::arg()["stats-carbon-blacklist"]);
4014 blacklistStats(StatComponent::RecControl, ::arg()["stats-rec-control-blacklist"]);
4015 blacklistStats(StatComponent::SNMP, ::arg()["stats-snmp-blacklist"]);
72259676 4016
d705aad9
RG
4017 if (::arg().mustDo("snmp-agent")) {
4018 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
4019 g_snmpAgent->run();
4020 }
4021
b47026fd 4022 int port = ::arg().asNum("udp-source-port-min");
58da9034 4023 if(port < 1024 || port > 65535){
e6a9dde5 4024 g_log<<Logger::Error<<"Unable to launch, udp-source-port-min is not a valid port number"<<endl;
bf6f28ca
CHB
4025 exit(99); // this isn't going to fix itself either
4026 }
4027 s_minUdpSourcePort = port;
b47026fd 4028 port = ::arg().asNum("udp-source-port-max");
58da9034 4029 if(port < 1024 || port > 65535 || port < s_minUdpSourcePort){
e6a9dde5 4030 g_log<<Logger::Error<<"Unable to launch, udp-source-port-max is not a valid port number or is smaller than udp-source-port-min"<<endl;
bf6f28ca
CHB
4031 exit(99); // this isn't going to fix itself either
4032 }
4033 s_maxUdpSourcePort = port;
4034 std::vector<string> parts {};
b47026fd 4035 stringtok(parts, ::arg()["udp-source-port-avoid"], ", ");
bf6f28ca
CHB
4036 for (const auto &part : parts)
4037 {
4038 port = std::stoi(part);
58da9034 4039 if(port < 1024 || port > 65535){
e6a9dde5 4040 g_log<<Logger::Error<<"Unable to launch, udp-source-port-avoid contains an invalid port number: "<<part<<endl;
bf6f28ca
CHB
4041 exit(99); // this isn't going to fix itself either
4042 }
4043 s_avoidUdpSourcePorts.insert(port);
4044 }
4045
b243ca3b 4046 unsigned int currentThreadId = 1;
8fd25133 4047 const auto cpusMap = parseCPUMap();
d77abca1 4048
c3828c03 4049 if(g_numThreads == 1) {
e6a9dde5 4050 g_log<<Logger::Warning<<"Operating unthreaded"<<endl;
6b6720de
PL
4051#ifdef HAVE_SYSTEMD
4052 sd_notify(0, "READY=1");
4053#endif
b243ca3b
RG
4054
4055 /* This thread handles the web server, carbon, statistics and the control channel */
4056 auto& handlerInfos = s_threadInfos.at(0);
4057 handlerInfos.isHandler = true;
c390b2da 4058 handlerInfos.thread = std::thread(recursorThread, 0, "main");
b243ca3b
RG
4059
4060 setCPUMap(cpusMap, currentThreadId, pthread_self());
4061
4062 auto& infos = s_threadInfos.at(currentThreadId);
4063 infos.isListener = true;
4064 infos.isWorker = true;
c390b2da 4065 recursorThread(currentThreadId++, "worker");
76698c6e
BH
4066 }
4067 else {
8fd25133 4068
38354396
OM
4069
4070 if (g_weDistributeQueries) {
4071 for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
4072 auto& infos = s_threadInfos.at(currentThreadId + n);
4073 infos.isListener = true;
4074 }
4075 }
4076 for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
4077 auto& infos = s_threadInfos.at(currentThreadId + (g_weDistributeQueries ? g_numDistributorThreads : 0) + n);
4078 infos.isListener = !g_weDistributeQueries;
4079 infos.isWorker = true;
4080 }
4081
b243ca3b
RG
4082 if (g_weDistributeQueries) {
4083 g_log<<Logger::Warning<<"Launching "<< g_numDistributorThreads <<" distributor threads"<<endl;
4084 for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
4085 auto& infos = s_threadInfos.at(currentThreadId);
c390b2da 4086 infos.thread = std::thread(recursorThread, currentThreadId++, "distr");
b243ca3b
RG
4087 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
4088 }
4089 }
8fd25133 4090
62b549e0
RG
4091 g_log<<Logger::Warning<<"Launching "<< g_numWorkerThreads <<" worker threads"<<endl;
4092
b243ca3b
RG
4093 for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
4094 auto& infos = s_threadInfos.at(currentThreadId);
c390b2da 4095 infos.thread = std::thread(recursorThread, currentThreadId++, "worker");
b243ca3b 4096 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
76698c6e 4097 }
b243ca3b 4098
6b6720de
PL
4099#ifdef HAVE_SYSTEMD
4100 sd_notify(0, "READY=1");
4101#endif
b243ca3b
RG
4102
4103 /* This thread handles the web server, carbon, statistics and the control channel */
4104 auto& infos = s_threadInfos.at(0);
4105 infos.isHandler = true;
c390b2da 4106 infos.thread = std::thread(recursorThread, 0, "web+stat");
b243ca3b
RG
4107
4108 s_threadInfos.at(0).thread.join();
bb4bdbaf 4109 }
bb4bdbaf
BH
4110 return 0;
4111}
4112
c390b2da 4113static void* recursorThread(unsigned int n, const string& threadName)
bb4bdbaf
BH
4114try
4115{
d77abca1 4116 t_id=n;
b243ca3b 4117 auto& threadInfo = s_threadInfos.at(t_id);
c390b2da
PL
4118
4119 static string threadPrefix = "pdns-r/";
519f5484 4120 setThreadName(threadPrefix + threadName);
c390b2da 4121
49a699c4 4122 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
a712cb56 4123 SyncRes::setDomainMap(g_initialDomainMap);
49a699c4 4124 t_allowFrom = g_initialAllowFrom;
f26bf547
RG
4125 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
4126 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
49a699c4 4127 primeHints();
3ddb9247 4128
f26bf547 4129 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
3ddb9247 4130
e6a9dde5 4131 g_log<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3ddb9247 4132
af1377b7 4133#ifdef NOD_ENABLED
41c542ec
NC
4134 if (threadInfo.isWorker)
4135 setupNODThread();
af1377b7 4136#endif /* NOD_ENABLED */
c1751a59
RG
4137
4138 /* the listener threads handle TCP queries */
4139 if(threadInfo.isWorker || threadInfo.isListener) {
5b388d28
PD
4140 try {
4141 if(!::arg()["lua-dns-script"].empty()) {
4142 t_pdl = std::make_shared<RecursorLua4>();
4143 t_pdl->loadFile(::arg()["lua-dns-script"]);
4144 g_log<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
4145 }
4146 }
4147 catch(std::exception &e) {
4148 g_log<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
4149 _exit(99);
674cf0f6 4150 }
674cf0f6 4151 }
3ddb9247 4152
f8f243b0 4153 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
92011b8f 4154 if(ringsize) {
f26bf547 4155 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
b243ca3b
RG
4156 if(g_weDistributeQueries)
4157 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries") / g_numDistributorThreads);
f8f243b0 4158 else
3ddb9247 4159 t_remotes->set_capacity(ringsize);
f26bf547 4160 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 4161 t_servfailremotes->set_capacity(ringsize);
66f2e6ad
KM
4162 t_bogusremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4163 t_bogusremotes->set_capacity(ringsize);
f26bf547 4164 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 4165 t_largeanswerremotes->set_capacity(ringsize);
621ccf89 4166 t_timeouts = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
4167 t_timeouts->set_capacity(ringsize);
92011b8f 4168
f26bf547 4169 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 4170 t_queryring->set_capacity(ringsize);
f26bf547 4171 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 4172 t_servfailqueryring->set_capacity(ringsize);
66f2e6ad
KM
4173 t_bogusqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
4174 t_bogusqueryring->set_capacity(ringsize);
92011b8f 4175 }
3ddb9247 4176
f26bf547 4177 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
144040be 4178 threadInfo.mt = MT.get();
3ddb9247 4179
63341e8d
RG
4180#ifdef HAVE_PROTOBUF
4181 /* start protobuf export threads if needed */
4182 auto luaconfsLocal = g_luaconfs.getLocal();
4183 checkProtobufExport(luaconfsLocal);
4184 checkOutgoingProtobufExport(luaconfsLocal);
4185#endif /* HAVE_PROTOBUF */
4186
bb4bdbaf
BH
4187 PacketID pident;
4188
4189 t_fdm=getMultiplexer();
d77abca1 4190
b243ca3b 4191 if(threadInfo.isHandler) {
d07bf7ff 4192 if(::arg().mustDo("webserver")) {
e6a9dde5 4193 g_log<<Logger::Warning << "Enabling web server" << endl;
8989097d 4194 try {
1ce57618 4195 new RecursorWebServer(t_fdm);
8989097d
CH
4196 }
4197 catch(PDNSException &e) {
e6a9dde5 4198 g_log<<Logger::Error<<"Exception: "<<e.reason<<endl;
8989097d
CH
4199 exit(99);
4200 }
f3d1d67b 4201 }
377602e3 4202 g_log<<Logger::Info<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
f3d1d67b 4203 }
810ff705 4204 else {
d77abca1 4205
b243ca3b
RG
4206 t_fdm->addReadFD(threadInfo.pipes.readToThread, handlePipeRequest);
4207 t_fdm->addReadFD(threadInfo.pipes.readQueriesToThread, handlePipeRequest);
4208
4209 if (threadInfo.isListener) {
4210 if (g_reusePort) {
4211 /* then every listener has its own FDs */
4212 for(const auto deferred : threadInfo.deferredAdds) {
4213 t_fdm->addReadFD(deferred.first, deferred.second);
4214 }
810ff705 4215 }
b243ca3b
RG
4216 else {
4217 /* otherwise all listeners are listening on the same ones */
4218 for(const auto deferred : g_deferredAdds) {
4219 t_fdm->addReadFD(deferred.first, deferred.second);
d77abca1
RG
4220 }
4221 }
4222 }
810ff705 4223 }
3ddb9247 4224
b0b37121 4225 registerAllStats();
d77abca1 4226
b243ca3b 4227 if(threadInfo.isHandler) {
674cf0f6
BH
4228 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
4229 }
1bc3c142 4230
f7c1d4e3 4231 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3ddb9247 4232
f7c1d4e3 4233 bool listenOnTCP(true);
49a699c4 4234
cb1523d1 4235 time_t last_stat = 0;
a2f87dd1 4236 time_t last_carbon=0, last_lua_maintenance=0;
2c78bd57 4237 time_t carbonInterval=::arg().asNum("carbon-interval");
a2f87dd1 4238 time_t luaMaintenanceInterval=::arg().asNum("lua-maintenance-interval");
ac0995bb 4239 counter.store(0); // used to periodically execute certain tasks
f7c1d4e3 4240 for(;;) {
ac0e821b 4241 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3ddb9247 4242
3427fa8a
BH
4243 if(!(counter%500)) {
4244 MT->makeThread(houseKeeping, 0);
f7c1d4e3
BH
4245 }
4246
d2392145 4247 if(!(counter%55)) {
d8f6d49f 4248 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
bb4bdbaf 4249 expired_t expired=t_fdm->getTimeouts(g_now);
3ddb9247 4250
f7c1d4e3 4251 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
cd989c87 4252 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4957a608 4253 if(g_logCommonErrors)
e6a9dde5 4254 g_log<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4957a608 4255 t_fdm->removeReadFD(i->first);
f7c1d4e3
BH
4256 }
4257 }
3ddb9247 4258
f7c1d4e3
BH
4259 counter++;
4260
b243ca3b 4261 if(threadInfo.isHandler) {
cb1523d1
RG
4262 if(statsWanted || (g_statisticsInterval > 0 && (g_now.tv_sec - last_stat) >= g_statisticsInterval)) {
4263 doStats();
4264 last_stat = g_now.tv_sec;
4265 }
f7c1d4e3 4266
cb1523d1 4267 Utility::gettimeofday(&g_now, 0);
2c78bd57 4268
cb1523d1
RG
4269 if((g_now.tv_sec - last_carbon) >= carbonInterval) {
4270 MT->makeThread(doCarbonDump, 0);
4271 last_carbon = g_now.tv_sec;
4272 }
2c78bd57 4273 }
2a0276a9 4274 if (t_pdl != nullptr) {
9adbe790 4275 // lua-dns-script directive is present, call the maintenance callback if needed
c1751a59
RG
4276 /* remember that the listener threads handle TCP queries */
4277 if (threadInfo.isWorker || threadInfo.isListener) {
2a0276a9
CHB
4278 // Only on threads processing queries
4279 if(g_now.tv_sec - last_lua_maintenance >= luaMaintenanceInterval) {
4280 t_pdl->maintenance();
4281 last_lua_maintenance = g_now.tv_sec;
4282 }
9adbe790 4283 }
a2f87dd1 4284 }
2c78bd57 4285
bb4bdbaf 4286 t_fdm->run(&g_now);
3ea54bf0 4287 // 'run' updates g_now for us
f7c1d4e3 4288
b243ca3b 4289 if(threadInfo.isListener) {
5c889cf5 4290 if(listenOnTCP) {
c47f201b
RG
4291 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
4292 for(const auto fd : threadInfo.tcpSockets) {
4293 t_fdm->removeReadFD(fd);
b243ca3b 4294 }
c47f201b
RG
4295 listenOnTCP=false;
4296 }
f7c1d4e3 4297 }
5c889cf5 4298 else {
c47f201b
RG
4299 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
4300 for(const auto fd : threadInfo.tcpSockets) {
4301 t_fdm->addReadFD(fd, handleNewTCPQuestion);
b243ca3b 4302 }
c47f201b
RG
4303 listenOnTCP=true;
4304 }
f7c1d4e3
BH
4305 }
4306 }
4307 }
4308}
3f81d239 4309catch(PDNSException &ae) {
e6a9dde5 4310 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
bb4bdbaf
BH
4311 return 0;
4312}
4313catch(std::exception &e) {
e6a9dde5 4314 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
bb4bdbaf
BH
4315 return 0;
4316}
4317catch(...) {
e6a9dde5 4318 g_log<<Logger::Error<<"any other exception in main: "<<endl;
bb4bdbaf
BH
4319 return 0;
4320}
4321
51e2144e 4322
3ddb9247 4323int main(int argc, char **argv)
288f4aa9 4324{
dbd23fc2
BH
4325 g_argc = argc;
4326 g_argv = argv;
5e3de507 4327 g_stats.startupTime=time(0);
b51ef4f9 4328 Utility::srandom();
3e135495 4329 versionSetProduct(ProductRecursor);
8a63d3ce 4330 reportBasicTypes();
0007c2e5 4331 reportOtherTypes();
ea634573 4332
22030c37 4333 int ret = EXIT_SUCCESS;
caa6eefa 4334
288f4aa9 4335 try {
f888311c 4336 ::arg().set("stack-size","stack size per mthread")="200000";
2e3d8a19 4337 ::arg().set("soa-minimum-ttl","Don't change")="0";
2e3d8a19 4338 ::arg().set("no-shuffle","Don't change")="off";
2e3d8a19 4339 ::arg().set("local-port","port to listen on")="53";
32252594 4340 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
fec7dd5a 4341 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
77499b05 4342 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
a6415142 4343 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
c87e1876 4344 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
13c46e62 4345 ::arg().set("signature-inception-skew", "Allow the signature inception to be off by this number of seconds")="60";
d3f809bf 4346 ::arg().set("daemon","Operate as a daemon")="no";
191f2e47 4347 ::arg().setSwitch("write-pid","Write a PID file")="yes";
9afa8662 4348 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
b6cfa948 4349 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
b18fa400 4350 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
22e0810c 4351 ::arg().set("log-common-errors","If we should log rather common errors")="no";
2e3d8a19
BH
4352 ::arg().set("chroot","switch to chroot jail")="";
4353 ::arg().set("setgid","If set, change group id to this gid for more security")="";
4354 ::arg().set("setuid","If set, change user id to this uid for more security")="";
c83ee49d 4355 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
bb4bdbaf 4356 ::arg().set("threads", "Launch this number of threads")="2";
b243ca3b 4357 ::arg().set("distributor-threads", "Launch this number of distributor threads, distributing queries to other threads")="0";
adabfcb9 4358 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
5124de27 4359 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
d07bf7ff 4360 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
479e0976 4361 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
479e0976 4362 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
d07bf7ff
PL
4363 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
4364 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
4365 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
be3e1477 4366 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
8ca656a8 4367 ::arg().set("webserver-loglevel", "Amount of logging in the webserver (none, normal, detailed)") = "normal";
cc08b5a9 4368 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
e12f8407 4369 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
2c78bd57 4370 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
f7a645ec
RG
4371 ::arg().set("carbon-namespace", "If set overwrites the first part of the carbon string")="pdns";
4372 ::arg().set("carbon-instance", "If set overwrites the the instance name default")="recursor";
4373
0ec489bf 4374 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
c038218b 4375 ::arg().set("quiet","Suppress logging of questions and answers")="";
f27e6356 4376 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
2e3d8a19 4377 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
fdbf35ac
BH
4378 ::arg().set("socket-owner","Owner of socket")="";
4379 ::arg().set("socket-group","Group of socket")="";
4380 ::arg().set("socket-mode", "Permissions for socket")="";
3ddb9247 4381
f0f3f0b0 4382 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
2e3d8a19
BH
4383 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
4384 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
d4fb76e9 4385 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
2e3d8a19 4386 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
85c32340 4387 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
2e3d8a19 4388 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
324dc148 4389 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
979edd70 4390 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
559b6c93
PL
4391 ::arg().set("dont-throttle-names", "Do not throttle nameservers with this name or suffix")="";
4392 ::arg().set("dont-throttle-netmasks", "Do not throttle nameservers with this IP netmask")="";
2e3d8a19 4393 ::arg().set("hint-file", "If set, load root hints from this file")="";
b45eb27c 4394 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
a9af3782 4395 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
b9473937 4396 ::arg().set("max-cache-bogus-ttl", "maximum number of seconds to keep a Bogus (positive or negative) cached entry in memory")="3600";
c3e753c7 4397 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
1051f8a9 4398 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
927c12b0 4399 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
1051f8a9 4400 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
950626be 4401 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname, set custom or 'disabled'")="";
92011b8f 4402 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
ba1a571d 4403 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
49a699c4 4404 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
2c95fc65 4405 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
51e2144e 4406 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3ddb9247 4407 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4e120339 4408 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
fde296a3 4409 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
0d5f0a9f 4410 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4ef015cd 4411 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
5605c067 4412 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3e61e7f7 4413 ::arg().set("lua-config-file", "More powerful configuration options")="";
644dd1da 4414
5605c067 4415 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
927c12b0
BH
4416 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
4417 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
5605c067 4418 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
ac0b4eb3 4419 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3ea54bf0 4420 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
e498dac1 4421 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
4485aa35 4422 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
a2f87dd1 4423 ::arg().set("lua-maintenance-interval", "Number of seconds between calls to the lua user defined maintenance() function")="1";
08f3f638 4424 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3ddb9247 4425 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
35695d18 4426 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
fd8898fb 4427 ::arg().set("ecs-ipv4-cache-bits", "Maximum number of bits of IPv4 mask to cache ECS response")="24";
35695d18 4428 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
fd8898fb 4429 ::arg().set("ecs-ipv6-cache-bits", "Maximum number of bits of IPv6 mask to cache ECS response")="56";
5cf4b2e7 4430 ::arg().set("ecs-minimum-ttl-override", "Set under adverse conditions, a minimum TTL for records in ECS-specific answers")="0";
ed9019c9 4431 ::arg().set("ecs-cache-limit-ttl", "Minimum TTL to cache ECS response")="0";
3f975863 4432 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
2fe3354d 4433 ::arg().set("ecs-add-for", "List of client netmasks for which EDNS Client Subnet will be added")="0.0.0.0/0, ::/0, " LOCAL_NETS_INVERSE;
8a3a3822 4434 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
a16c4536 4435 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
e498dac1 4436 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
4ca2d205 4437 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
e661a20b 4438 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
b3adda56 4439 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
00b8cadc 4440 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
54c36063
PL
4441 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1232";
4442 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1232";
aadceba8 4443 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
173d790e 4444 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
c5950146 4445 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
7c3398aa 4446 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
78227847 4447 ::arg().set("max-udp-queries-per-round", "Maximum number of UDP queries processed per recvmsg() round, before returning back to normal processing")="10000";
c29d820c 4448 ::arg().set("protobuf-use-kernel-timestamp", "Compute the latency of queries in protobuf messages by using the timestamp set by the kernel when the query was received (when available)")="";
ee271fc4 4449 ::arg().set("distribution-pipe-buffer-size", "Size in bytes of the internal buffer of the pipe used by the distributor to pass incoming queries to a worker thread")="0";
a09a8ce0 4450
68e6df3c 4451 ::arg().set("include-dir","Include *.conf files from this directory")="";
d67620e4 4452 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
2332f42d 4453
4454 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
2e3d8a19 4455
d705aad9 4456 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
396f126e 4457 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
d705aad9 4458
72259676
RG
4459 std::string defaultBlacklistedStats = "cache-bytes, packetcache-bytes, special-memory-usage";
4460 for (size_t idx = 0; idx < 32; idx++) {
4461 defaultBlacklistedStats += ", ecs-v4-response-bits-" + std::to_string(idx + 1);
4462 }
4463 for (size_t idx = 0; idx < 128; idx++) {
4464 defaultBlacklistedStats += ", ecs-v6-response-bits-" + std::to_string(idx + 1);
4465 }
563517f3
RG
4466 ::arg().set("stats-api-blacklist", "List of statistics that are disabled when retrieving the complete list of statistics via the API")=defaultBlacklistedStats;
4467 ::arg().set("stats-carbon-blacklist", "List of statistics that are prevented from being exported via Carbon")=defaultBlacklistedStats;
4468 ::arg().set("stats-rec-control-blacklist", "List of statistics that are prevented from being exported via rec_control get-all")=defaultBlacklistedStats;
4469 ::arg().set("stats-snmp-blacklist", "List of statistics that are prevented from being exported via SNMP")=defaultBlacklistedStats;
d705aad9 4470
0735b17e 4471 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
d377bb54 4472 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
0735b17e 4473
8fd25133
RG
4474 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
4475
98d36505
RG
4476 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
4477
5cc8371b 4478 ::arg().set("xpf-allow-from","XPF information is only processed from these subnets")="";
59cb4a79 4479 ::arg().set("xpf-rr-code","XPF option code to use")="0";
5cc8371b 4480
58da9034 4481 ::arg().set("udp-source-port-min", "Minimum UDP port to bind on")="1024";
b47026fd
CHB
4482 ::arg().set("udp-source-port-max", "Maximum UDP port to bind on")="65535";
4483 ::arg().set("udp-source-port-avoid", "List of comma separated UDP port number to avoid")="11211";
e97cb679 4484 ::arg().set("rng", "Specify random number generator to use. Valid values are auto,sodium,openssl,getrandom,arc4random,urandom.")="auto";
d6f3fcfa 4485 ::arg().set("public-suffix-list-file", "Path to the Public Suffix List file, if any")="";
144040be 4486 ::arg().set("distribution-load-factor", "The load factor used when PowerDNS is distributing queries to worker threads")="0.0";
af1377b7
NC
4487#ifdef NOD_ENABLED
4488 ::arg().set("new-domain-tracking", "Track newly observed domains (i.e. never seen before).")="no";
4489 ::arg().set("new-domain-log", "Log newly observed domains.")="yes";
4490 ::arg().set("new-domain-lookup", "Perform a DNS lookup newly observed domains as a subdomain of the configured domain")="";
4491 ::arg().set("new-domain-history-dir", "Persist new domain tracking data here to persist between restarts")=string(NODCACHEDIR)+"/nod";
4492 ::arg().set("new-domain-whitelist", "List of domains (and implicitly all subdomains) which will never be considered a new domain")="";
b78727c6 4493 ::arg().set("new-domain-db-size", "Size of the DB used to track new domains in terms of number of cells. Defaults to 67108864")="67108864";
ca2526f5 4494 ::arg().set("new-domain-pb-tag", "If protobuf is configured, the tag to use for messages containing newly observed domains. Defaults to 'pdns-nod'")="pdns-nod";
41c542ec
NC
4495 ::arg().set("unique-response-tracking", "Track unique responses (tuple of query name, type and RR).")="no";
4496 ::arg().set("unique-response-log", "Log unique responses")="yes";
4497 ::arg().set("unique-response-history-dir", "Persist unique response tracking data here to persist between restarts")=string(NODCACHEDIR)+"/udr";
b78727c6 4498 ::arg().set("unique-response-db-size", "Size of the DB used to track unique responses in terms of number of cells. Defaults to 67108864")="67108864";
ca2526f5 4499 ::arg().set("unique-response-pb-tag", "If protobuf is configured, the tag to use for messages containing unique DNS responses. Defaults to 'pdns-udr'")="pdns-udr";
af1377b7 4500#endif /* NOD_ENABLED */
2e3d8a19 4501 ::arg().setCmd("help","Provide a helpful message");
ba1a571d 4502 ::arg().setCmd("version","Print version string");
d5141417 4503 ::arg().setCmd("config","Output blank configuration");
e6a9dde5 4504 g_log.toConsole(Logger::Info);
2e3d8a19 4505 ::arg().laxParse(argc,argv); // do a lax parse
c75a6a9e 4506
2d733c0f
CH
4507 string configname=::arg()["config-dir"]+"/recursor.conf";
4508 if(::arg()["config-name"]!="") {
4509 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
5124de27 4510 s_programname+="-"+::arg()["config-name"];
2d733c0f
CH
4511 }
4512 cleanSlashes(configname);
5124de27 4513
5cc1ea1d
CH
4514 if(!::arg().getCommands().empty()) {
4515 cerr<<"Fatal: non-option on the command line, perhaps a '--setting=123' statement missed the '='?"<<endl;
4516 exit(99);
4517 }
4518
577cf284
BH
4519 if(::arg().mustDo("config")) {
4520 cout<<::arg().configstring()<<endl;
4521 exit(0);
4522 }
4523
3ddb9247 4524 if(!::arg().file(configname.c_str()))
e6a9dde5 4525 g_log<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
c75a6a9e 4526
2e3d8a19 4527 ::arg().parse(argc,argv);
c836dc19 4528
2054afbb
CH
4529 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() ) {
4530 g_log<<Logger::Error<<"Using chroot and enabling the API is not possible"<<endl;
f0f3f0b0
PL
4531 exit(EXIT_FAILURE);
4532 }
4533
4534 if (::arg()["socket-dir"].empty()) {
4535 if (::arg()["chroot"].empty())
4536 ::arg().set("socket-dir") = LOCALSTATEDIR;
4537 else
4538 ::arg().set("socket-dir") = "/";
4539 }
4540
2e3d8a19 4541 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
562588a3 4542
b243ca3b
RG
4543 if(::arg().asNum("threads")==1) {
4544 if (::arg().mustDo("pdns-distributes-queries")) {
4545 g_log<<Logger::Warning<<"Only one thread, no need to distribute queries ourselves"<<endl;
4546 ::arg().set("pdns-distributes-queries")="no";
4547 }
4548 }
4549
4550 if(::arg().mustDo("pdns-distributes-queries") && ::arg().asNum("distributor-threads") <= 0) {
4551 g_log<<Logger::Warning<<"Asked to run with pdns-distributes-queries set but no distributor threads, raising to 1"<<endl;
4552 ::arg().set("distributor-threads")="1";
4553 }
4554
4555 if (!::arg().mustDo("pdns-distributes-queries")) {
4556 ::arg().set("distributor-threads")="0";
4557 }
61d74169 4558
2e3d8a19 4559 if(::arg().mustDo("help")) {
ff5ba4f9
WA
4560 cout<<"syntax:"<<endl<<endl;
4561 cout<<::arg().helpstring(::arg()["help"])<<endl;
4562 exit(0);
b636533b 4563 }
5e3de507 4564 if(::arg().mustDo("version")) {
ba1a571d 4565 showProductVersion();
3613a51c 4566 showBuildConfiguration();
67076869 4567 exit(0);
5e3de507 4568 }
b636533b 4569
34162f8f 4570 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
f48d7b65 4571
34162f8f
CH
4572 if (logUrgency < Logger::Error)
4573 logUrgency = Logger::Error;
f48d7b65 4574 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
4575 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
4576 }
e6a9dde5
PL
4577 g_log.setLoglevel(logUrgency);
4578 g_log.toConsole(logUrgency);
34162f8f 4579
f7c1d4e3 4580 serviceMain(argc, argv);
288f4aa9 4581 }
3f81d239 4582 catch(PDNSException &ae) {
e6a9dde5 4583 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
22030c37 4584 ret=EXIT_FAILURE;
288f4aa9 4585 }
fdbf35ac 4586 catch(std::exception &e) {
e6a9dde5 4587 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
22030c37 4588 ret=EXIT_FAILURE;
288f4aa9
BH
4589 }
4590 catch(...) {
e6a9dde5 4591 g_log<<Logger::Error<<"any other exception in main: "<<endl;
22030c37 4592 ret=EXIT_FAILURE;
288f4aa9 4593 }
3ddb9247 4594
22030c37 4595 return ret;
288f4aa9 4596}