]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/pdns_recursor.cc
Merge pull request #9784 from pieterlexis/dnsdist-spoofaction-one-arg
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
CommitLineData
288f4aa9 1/*
6edbf68a
PL
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
870a0fe4
AT
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
3e61e7f7 25
76473b92
KM
26#include <netdb.h>
27#include <sys/stat.h>
28#include <unistd.h>
f097141b 29#ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
b47026fd 30#include <boost/container/flat_set.hpp>
f097141b 31#endif
2470b36e 32#include "ws-recursor.hh"
c390b2da 33#include <thread>
519f5484 34#include "threadname.hh"
3ea54bf0 35#include "recpacketcache.hh"
3ddb9247 36#include "utility.hh"
51e2144e 37#include "dns_random.hh"
d1b28475
KM
38#ifdef HAVE_LIBSODIUM
39#include <sodium.h>
40#endif
3afde9b2 41#include "opensslsigners.hh"
288f4aa9
BH
42#include <iostream>
43#include <errno.h>
81859ba5 44#include <boost/static_assert.hpp>
288f4aa9
BH
45#include <map>
46#include <set>
97bb160b 47#include "recursor_cache.hh"
38c9ceaa 48#include "cachecleaner.hh"
288f4aa9 49#include <stdio.h>
c75a6a9e 50#include <signal.h>
288f4aa9 51#include <stdlib.h>
bb4bdbaf 52#include "misc.hh"
288f4aa9
BH
53#include "mtasker.hh"
54#include <utility>
288f4aa9
BH
55#include "arguments.hh"
56#include "syncres.hh"
88def049
BH
57#include <fcntl.h>
58#include <fstream>
3e61e7f7 59#include "sortlist.hh"
5c633640 60#include "sstuff.hh"
dc593046 61#include <boost/any.hpp>
5c633640
BH
62#include <boost/tuple/tuple.hpp>
63#include <boost/tuple/tuple_comparison.hpp>
72df400f 64#include <boost/shared_array.hpp>
7f1fa77d 65#include <boost/function.hpp>
5605c067 66#include <boost/algorithm/string.hpp>
8f7473d7 67#ifdef MALLOC_TRACE
68#include "malloctrace.hh"
69#endif
40a3dd64 70#include <netinet/tcp.h>
f12666f2 71#include "capabilities.hh"
ea634573
BH
72#include "dnsparser.hh"
73#include "dnswriter.hh"
74#include "dnsrecords.hh"
f814d7c8 75#include "zoneparser-tng.hh"
1d5b3ce6 76#include "rec_channel.hh"
aaacf7f2 77#include "logger.hh"
c8ddb7c2 78#include "iputils.hh"
09e6702a 79#include "mplexer.hh"
c038218b 80#include "config.h"
808c5ef7 81#include "lua-recursor4.hh"
ba1a571d 82#include "version.hh"
79332bff 83#include "responsestats.hh"
d67620e4 84#include "secpoll-recursor.hh"
c5c066bf 85#include "dnsname.hh"
644dd1da 86#include "filterpo.hh"
87#include "rpzloader.hh"
b3f0ed10 88#include "validate-recursor.hh"
f3c18728 89#include "rec-lua-conf.hh"
5c3b5e7f 90#include "ednsoptions.hh"
94e2a9b0 91#include "ednsextendederror.hh"
85c7ca75 92#include "gettime.hh"
5216ddcc 93#include "proxy-protocol.hh"
d6f3fcfa 94#include "pubsuffix.hh"
bbec1961 95#include "shuffle.hh"
af1377b7
NC
96#ifdef NOD_ENABLED
97#include "nod.hh"
98#endif /* NOD_ENABLED */
20829585 99#include "query-local-address.hh"
f3c18728 100
d9d3f9c1 101#include "rec-protobuf.hh"
d705aad9 102#include "rec-snmp.hh"
aa7929a3 103
6b6720de
PL
104#ifdef HAVE_SYSTEMD
105#include <systemd/sd-daemon.h>
106#endif
107
d187038c
RG
108#include "namespaces.hh"
109
d61aa945
RG
110#ifdef HAVE_PROTOBUF
111#include "uuid-utils.hh"
a44a8d66 112#include "protozero.hh"
b9fa43e0 113#endif /* HAVE_PROTOBUF */
d61aa945 114
5cc8371b
RG
115#include "xpf.hh"
116
d187038c
RG
117typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
118
f26bf547 119static thread_local std::shared_ptr<RecursorLua4> t_pdl;
b243ca3b 120static thread_local unsigned int t_id = 0;
f26bf547
RG
121static thread_local std::shared_ptr<Regex> t_traceRegex;
122static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
63341e8d 123#ifdef HAVE_PROTOBUF
3fe06137 124static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_protobufServers{nullptr};
b773359c 125static thread_local uint64_t t_protobufServersGeneration;
3fe06137 126static thread_local std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> t_outgoingProtobufServers{nullptr};
b773359c 127static thread_local uint64_t t_outgoingProtobufServersGeneration;
63341e8d 128#endif /* HAVE_PROTOBUF */
f26bf547 129
b9fa43e0 130#ifdef HAVE_FSTRM
10ba6d01 131static thread_local std::shared_ptr<std::vector<std::unique_ptr<FrameStreamLogger>>> t_frameStreamServers{nullptr};
b9fa43e0
OM
132static thread_local uint64_t t_frameStreamServersGeneration;
133#endif /* HAVE_FSTRM */
134
f26bf547 135thread_local std::unique_ptr<MT_t> MT; // the big MTasker
ccfadb6c
OM
136std::unique_ptr<MemRecursorCache> g_recCache;
137std::unique_ptr<NegCache> g_negCache;
a7956123 138
f26bf547 139thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
3337c2f7 140thread_local FDMultiplexer* t_fdm{nullptr};
be9078b3 141thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes, t_bogusremotes;
66f2e6ad 142thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring, t_bogusqueryring;
f26bf547 143thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
af1377b7
NC
144#ifdef NOD_ENABLED
145thread_local std::shared_ptr<nod::NODDB> t_nodDBp;
41c542ec 146thread_local std::shared_ptr<nod::UniqueResponseDB> t_udrDBp;
af1377b7 147#endif /* NOD_ENABLED */
d187038c 148__thread struct timeval g_now; // timestamp, updated (too) frequently
d7dae798 149
dc593046 150typedef vector<pair<int, boost::function< void(int, boost::any&) > > > deferredAdd_t;
b243ca3b 151
d7dae798 152// for communicating with our threads
b243ca3b
RG
153// effectively readonly after startup
154struct RecThreadInfo
155{
156 struct ThreadPipeSet
157 {
158 int writeToThread{-1};
159 int readToThread{-1};
160 int writeFromThread{-1};
161 int readFromThread{-1};
162 int writeQueriesToThread{-1}; // this one is non-blocking
163 int readQueriesToThread{-1};
164 };
165
adb6cd72 166 /* FD corresponding to TCP sockets this thread is listening
c47f201b 167 on.
adb6cd72
RG
168 These FDs are also in deferredAdds when we have one
169 socket per listener, and in g_deferredAdds instead. */
170 std::set<int> tcpSockets;
b243ca3b
RG
171 /* FD corresponding to listening sockets if we have one socket per
172 listener (with reuseport), otherwise all listeners share the
173 same FD and g_deferredAdds is then used instead */
174 deferredAdd_t deferredAdds;
175 struct ThreadPipeSet pipes;
176 std::thread thread;
144040be
RG
177 MT_t* mt{nullptr};
178 uint64_t numberOfDistributedQueries{0};
529925dd 179 int exitCode{0};
b243ca3b
RG
180 /* handle the web server, carbon, statistics and the control channel */
181 bool isHandler{false};
182 /* accept incoming queries (and distributes them to the workers if pdns-distributes-queries is set) */
183 bool isListener{false};
184 /* process queries */
185 bool isWorker{false};
49a699c4 186};
810ff705 187
b243ca3b
RG
188/* first we have the handler thread, t_id == 0 (some other
189 helper threads like SNMP might have t_id == 0 as well)
190 then the distributor threads if any
191 and finally the workers */
192static std::vector<RecThreadInfo> s_threadInfos;
193/* without reuseport, all listeners share the same sockets */
194static deferredAdd_t g_deferredAdds;
faf580f5 195
d187038c
RG
196typedef vector<int> tcpListenSockets_t;
197typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
3ea54bf0 198
d187038c 199static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
2ea1d2c0 200static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism (without actually using sendfromto())
d187038c 201static AtomicCounter counter;
9065eb05 202static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
f26bf547 203static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
5cc8371b 204static NetmaskGroup g_XPFAcl;
5216ddcc 205static NetmaskGroup g_proxyProtocolACL;
ef3ee606
RG
206static boost::optional<ComboAddress> g_dns64Prefix{boost::none};
207static DNSName g_dns64PrefixReverse;
5216ddcc 208static size_t g_proxyProtocolMaximumSize;
d187038c 209static size_t g_tcpMaxQueriesPerConn;
a5886e6a 210static size_t s_maxUDPQueriesPerRound;
d187038c
RG
211static uint64_t g_latencyStatSize;
212static uint32_t g_disthashseed;
213static unsigned int g_maxTCPPerClient;
d187038c 214static unsigned int g_maxMThreads;
b243ca3b 215static unsigned int g_numDistributorThreads;
d187038c
RG
216static unsigned int g_numWorkerThreads;
217static int g_tcpTimeout;
218static uint16_t g_udpTruncationThreshold;
59cb4a79 219static uint16_t g_xpfRRCode{0};
d187038c
RG
220static std::atomic<bool> statsWanted;
221static std::atomic<bool> g_quiet;
222static bool g_logCommonErrors;
223static bool g_anyToTcp;
b243ca3b 224static bool g_weDistributeQueries; // if true, 1 or more threads listen on the incoming query sockets and distribute them to workers
810ff705 225static bool g_reusePort{false};
00b8cadc 226static bool g_gettagNeedsEDNSOptions{false};
0ec489bf 227static time_t g_statisticsInterval;
9065eb05 228static bool g_useIncomingECS;
c29d820c 229static bool g_useKernelTimestamp;
a6f7f5fe 230std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
af1377b7
NC
231#ifdef NOD_ENABLED
232static bool g_nodEnabled;
233static DNSName g_nodLookupDomain;
234static bool g_nodLog;
235static SuffixMatchNode g_nodDomainWL;
ca2526f5 236static std::string g_nod_pbtag;
41c542ec
NC
237static bool g_udrEnabled;
238static bool g_udrLog;
ca2526f5 239static std::string g_udr_pbtag;
af1377b7 240#endif /* NOD_ENABLED */
f097141b 241#ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
bf6f28ca 242static boost::container::flat_set<uint16_t> s_avoidUdpSourcePorts;
f097141b
CHB
243#else
244static std::set<uint16_t> s_avoidUdpSourcePorts;
245#endif
bf6f28ca
CHB
246static uint16_t s_minUdpSourcePort;
247static uint16_t s_maxUdpSourcePort;
144040be 248static double s_balancingFactor;
699bc10a 249static bool s_addExtendedResolutionDNSErrors;
49a699c4 250
b243ca3b 251RecursorControlChannel s_rcc; // only active in the handler thread
d187038c 252RecursorStats g_stats;
2d733c0f 253string s_programname="pdns_recursor";
d187038c 254string s_pidfname;
c1c29961 255bool g_lowercaseOutgoing;
bf19ccfd 256unsigned int g_networkTimeoutMsec;
d187038c
RG
257unsigned int g_numThreads;
258uint16_t g_outgoingEDNSBufsize;
98d36505 259bool g_logRPZChanges{false};
c3828c03 260
559b6c93
PL
261// Used in the Syncres to not throttle certain servers
262GlobalStateHolder<SuffixMatchNode> g_dontThrottleNames;
263GlobalStateHolder<NetmaskGroup> g_dontThrottleNetmasks;
264
12cd44ee 265#define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
2fe3354d 266#define LOCAL_NETS_INVERSE "!127.0.0.0/8, !10.0.0.0/8, !100.64.0.0/10, !169.254.0.0/16, !192.168.0.0/16, !172.16.0.0/12, !::1/128, !fc00::/7, !fe80::/10"
12cd44ee 267// Bad Nets taken from both:
3ddb9247 268// http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
12cd44ee 269// and
270// http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
271// where such a network may not be considered a valid destination
272#define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
273#define DONT_QUERY LOCAL_NETS ", " BAD_NETS
49a699c4 274
d7dae798 275//! used to send information to a newborn mthread
ea634573 276struct DNSComboWriter {
08b02366 277 DNSComboWriter(const std::string& query, const struct timeval& now): d_mdp(true, query), d_now(now), d_query(query)
2749c3fe
RG
278 {
279 }
5cc8371b 280
b502d522 281 DNSComboWriter(const std::string& query, const struct timeval& now, std::unordered_set<std::string>&& policyTags, LuaContext::LuaObject&& data, std::vector<DNSRecord>&& records): d_mdp(true, query), d_now(now), d_query(query), d_policyTags(std::move(policyTags)), d_records(std::move(records)), d_data(std::move(data))
5164bac3
RG
282 {
283 }
284
5cc8371b
RG
285 void setRemote(const ComboAddress& sa)
286 {
287 d_remote=sa;
288 }
289
290 void setSource(const ComboAddress& sa)
ea634573 291 {
5cc8371b 292 d_source=sa;
ea634573
BH
293 }
294
b71b60ee 295 void setLocal(const ComboAddress& sa)
296 {
297 d_local=sa;
298 }
299
5cc8371b
RG
300 void setDestination(const ComboAddress& sa)
301 {
302 d_destination=sa;
303 }
b71b60ee 304
ea634573
BH
305 void setSocket(int sock)
306 {
307 d_socket=sock;
308 }
a1754c6a
BH
309
310 string getRemote() const
311 {
5cc8371b
RG
312 if (d_source == d_remote) {
313 return d_source.toStringWithPort();
314 }
315 return d_source.toStringWithPort() + " (proxied by " + d_remote.toStringWithPort() + ")";
a1754c6a
BH
316 }
317
5216ddcc 318 std::vector<ProxyProtocolValue> d_proxyProtocolValues;
5cc8371b 319 MOADNSParser d_mdp;
c9e9e5e0 320 struct timeval d_now;
5cc8371b
RG
321 /* Remote client, might differ from d_source
322 in case of XPF, in which case d_source holds
323 the IP of the client and d_remote of the proxy
324 */
325 ComboAddress d_remote;
326 ComboAddress d_source;
327 /* Destination address, might differ from
328 d_destination in case of XPF, in which case
329 d_destination holds the IP of the proxy and
330 d_local holds our own. */
331 ComboAddress d_local;
332 ComboAddress d_destination;
aa7929a3
RG
333#ifdef HAVE_PROTOBUF
334 boost::uuids::uuid d_uuid;
67e31ebe 335 string d_requestorId;
590388d2 336 string d_deviceId;
0a6a45c8 337 string d_deviceName;
c29d820c 338 struct timeval d_kernelTimestamp{0,0};
aa7929a3 339#endif
08b02366 340 std::string d_query;
b502d522 341 std::unordered_set<std::string> d_policyTags;
163ed916 342 std::string d_routingTag;
37a919d4 343 std::vector<DNSRecord> d_records;
5164bac3 344 LuaContext::LuaObject d_data;
b40562da 345 EDNSSubnetOpts d_ednssubnet;
5164bac3 346 shared_ptr<TCPConnection> d_tcpConnection;
e95b2a7c
RG
347 boost::optional<uint16_t> d_extendedErrorCode{boost::none};
348 string d_extendedErrorExtra;
37a919d4 349 boost::optional<int> d_rcode{boost::none};
e53b77e2 350 int d_socket{-1};
b673817a 351 unsigned int d_tag{0};
e9f63d47 352 uint32_t d_qhash{0};
70fb28d9
RG
353 uint32_t d_ttlCap{std::numeric_limits<uint32_t>::max()};
354 bool d_variable{false};
5164bac3
RG
355 bool d_ecsFound{false};
356 bool d_ecsParsed{false};
37a919d4 357 bool d_followCNAMERecords{false};
406b722e 358 bool d_logResponse{false};
a73da04b 359 bool d_tcp{false};
ea634573
BH
360};
361
06857845
RG
362MT_t* getMT()
363{
364 return MT ? MT.get() : nullptr;
365}
ea634573 366
288f4aa9
BH
367ArgvMap &arg()
368{
369 static ArgvMap theArg;
370 return theArg;
371}
4ef015cd 372
8fb594ba 373unsigned int getRecursorThreadId()
b4015453 374{
30da2030 375 return t_id;
b4015453 376}
09e6702a 377
b243ca3b
RG
378static bool isDistributorThread()
379{
380 if (t_id == 0) {
381 return false;
382 }
383
384 return g_weDistributeQueries && s_threadInfos.at(t_id).isListener;
385}
386
387static bool isHandlerThread()
388{
389 if (t_id == 0) {
390 return true;
391 }
392
393 return s_threadInfos.at(t_id).isHandler;
394}
395
d187038c 396static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 397
308f4c43 398LWResult::Result asendtcp(const string& data, Socket* sock)
5c633640
BH
399{
400 PacketID pident;
401 pident.sock=sock;
402 pident.outMSG=data;
3ddb9247 403
bb4bdbaf 404 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
50c81227 405 string packet;
5c633640 406
308f4c43
RG
407 int ret = MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
408 if (ret == 0) { //timeout
bb4bdbaf 409 t_fdm->removeWriteFD(sock->getHandle());
308f4c43 410 return LWResult::Result::Timeout;
5c633640 411 }
308f4c43
RG
412 else if (ret == -1) { // error
413 t_fdm->removeWriteFD(sock->getHandle());
414 return LWResult::Result::PermanentError;
50c81227 415 }
308f4c43
RG
416 else if (packet.size() != data.size()) { // main loop tells us what it sent out, or empty in case of an error
417 return LWResult::Result::PermanentError;
418 }
419
420 return LWResult::Result::Success;
5c633640
BH
421}
422
d187038c 423static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 424
308f4c43 425LWResult::Result arecvtcp(string& data, const size_t len, Socket* sock, const bool incompleteOkay)
288f4aa9 426{
50c81227 427 data.clear();
5c633640
BH
428 PacketID pident;
429 pident.sock=sock;
430 pident.inNeeded=len;
825fa717 431 pident.inIncompleteOkay=incompleteOkay;
bb4bdbaf 432 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
5c633640 433
308f4c43
RG
434 int ret = MT->waitEvent(pident,&data, g_networkTimeoutMsec);
435 if (ret == 0) {
bb4bdbaf 436 t_fdm->removeReadFD(sock->getHandle());
308f4c43 437 return LWResult::Result::Timeout;
288f4aa9 438 }
308f4c43
RG
439 else if (ret == -1) {
440 t_fdm->removeWriteFD(sock->getHandle());
441 return LWResult::Result::PermanentError;
442 }
443 else if (data.empty()) {// error, EOF or other
444 return LWResult::Result::PermanentError;
50c81227
BH
445 }
446
308f4c43 447 return LWResult::Result::Success;
288f4aa9
BH
448}
449
d187038c 450static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
4465e941 451{
dc593046 452 PacketID pident=*boost::any_cast<PacketID>(&var);
4465e941 453 char resp[512];
7c77ce63
RG
454 ComboAddress fromaddr;
455 socklen_t addrlen=sizeof(fromaddr);
456
457 ssize_t ret=recvfrom(fd, resp, sizeof(resp), 0, (sockaddr *)&fromaddr, &addrlen);
458 if (fromaddr != pident.remote) {
e6a9dde5 459 g_log<<Logger::Notice<<"Response received from the wrong remote host ("<<fromaddr.toStringWithPort()<<" instead of "<<pident.remote.toStringWithPort()<<"), discarding"<<endl;
7c77ce63
RG
460
461 }
462
4465e941 463 t_fdm->removeReadFD(fd);
464 if(ret >= 0) {
a683e8bd 465 string data(resp, (size_t) ret);
fba1e944 466 MT->sendEvent(pident, &data);
4465e941 467 }
468 else {
fba1e944 469 string empty;
470 MT->sendEvent(pident, &empty);
04360367 471 // cerr<<"Had some kind of error: "<<ret<<", "<<stringerror()<<endl;
4465e941 472 }
473}
fba1e944 474string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
4465e941 475{
4465e941 476 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
477 s.setNonBlocking();
20829585 478 ComboAddress local = pdns::getQueryLocalAddress(dest.sin4.sin_family, 0);
4465e941 479
480 s.bind(local);
481 s.connect(dest);
4465e941 482 s.send(query);
483
484 PacketID pident;
485 pident.sock=&s;
7c77ce63 486 pident.remote=dest;
4465e941 487 pident.type=0;
fba1e944 488 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
4465e941 489
490 string data;
fba1e944 491
4465e941 492 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
fba1e944 493
4465e941 494 if(!ret || ret==-1) { // timeout
4465e941 495 t_fdm->removeReadFD(s.getHandle());
496 }
497 else if(data.empty()) {// error, EOF or other
fba1e944 498 // we could special case this
4465e941 499 return data;
500 }
4465e941 501 return data;
502}
503
d187038c 504static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
09e6702a 505
d187038c 506static void setSocketBuffer(int fd, int optname, uint32_t size)
d7dae798
BH
507{
508 uint32_t psize=0;
509 socklen_t len=sizeof(psize);
3ddb9247 510
d7dae798 511 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
e6a9dde5 512 g_log<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
3ddb9247 513 return;
d7dae798
BH
514 }
515
a2a81d42
OM
516 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0) {
517 int err = errno;
296ddbfe 518 g_log << Logger::Error << "Unable to raise socket buffer size to " << size << ": " << stringerror(err) << endl;
a2a81d42 519 }
d7dae798
BH
520}
521
522
523static void setSocketReceiveBuffer(int fd, uint32_t size)
524{
525 setSocketBuffer(fd, SO_RCVBUF, size);
526}
527
528static void setSocketSendBuffer(int fd, uint32_t size)
529{
530 setSocketBuffer(fd, SO_SNDBUF, size);
531}
532
533
4ef015cd
BH
534// you can ask this class for a UDP socket to send a query from
535// this socket is not yours, don't even think about deleting it
536// but after you call 'returnSocket' on it, don't assume anything anymore
537class UDPClientSocks
538{
4ef015cd 539 unsigned int d_numsocks;
4ef015cd 540public:
e2642526 541 UDPClientSocks() : d_numsocks(0)
4ef015cd
BH
542 {
543 }
544
308f4c43 545 LWResult::Result getSocket(const ComboAddress& toaddr, int* fd)
4ef015cd 546 {
308f4c43
RG
547 *fd = makeClientSocket(toaddr.sin4.sin_family);
548 if(*fd < 0) { // temporary error - receive exception otherwise
549 return LWResult::Result::OSLimitError;
550 }
d8f6d49f
BH
551
552 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
553 int err = errno;
a7b68ae7
RG
554 try {
555 closesocket(*fd);
556 }
557 catch(const PDNSException& e) {
e6a9dde5 558 g_log<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
a7b68ae7
RG
559 }
560
308f4c43
RG
561 if (err == ENETUNREACH) { // Seth "My Interfaces Are Like A Yo Yo" Arnold special
562 return LWResult::Result::OSLimitError;
563 }
564
565 return LWResult::Result::PermanentError;
d8f6d49f 566 }
998a4334 567
998a4334 568 d_numsocks++;
308f4c43 569 return LWResult::Result::Success;
4ef015cd
BH
570 }
571
572 // return a socket to the pool, or simply erase it
2bee9b7c 573 void returnSocket(int fd)
4ef015cd 574 {
80baf329 575 try {
2bee9b7c 576 t_fdm->removeReadFD(fd);
80baf329 577 }
2bee9b7c 578 catch(const FDMultiplexerException& e) {
bb4bdbaf 579 // we sometimes return a socket that has not yet been assigned to t_fdm
80baf329 580 }
2bee9b7c 581
a7b68ae7 582 try {
2bee9b7c 583 closesocket(fd);
a7b68ae7
RG
584 }
585 catch(const PDNSException& e) {
e6a9dde5 586 g_log<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
a7b68ae7 587 }
3ddb9247 588
998a4334 589 --d_numsocks;
4ef015cd 590 }
d8f6d49f 591
2bee9b7c
RG
592private:
593
d8f6d49f 594 // returns -1 for errors which might go away, throws for ones that won't
bb4bdbaf 595 static int makeClientSocket(int family)
d8f6d49f 596 {
2ea1d2c0 597 int ret = socket(family, SOCK_DGRAM, 0); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
42c235e5 598
2ea1d2c0 599 if (ret < 0 && errno == EMFILE) { // this is not a catastrophic error
d8f6d49f 600 return ret;
2ea1d2c0
OM
601 }
602 if (ret < 0) {
603 throw PDNSException("Making a socket for resolver (family = " + std::to_string(family) + "): " + stringerror());
604 }
3ddb9247 605
d2fd2f8c
OM
606 // The loop below runs the body with [tries-1 tries-2 ... 1]. Last iteration with tries == 1 is special: it uses a kernel
607 // allocated UDP port.
608#if !defined( __OpenBSD__)
2ea1d2c0
OM
609 int tries = 10;
610#else
819d97d2 611 int tries = 2; // hit the reliable kernel random case for OpenBSD immediately (because it will match tries==1 below), using sysctl net.inet.udp.baddynamic to exclude ports
2ea1d2c0 612#endif
3aa91c3e 613 ComboAddress sin;
2ea1d2c0
OM
614 while (--tries) {
615 in_port_t port;
3ddb9247 616
d2fd2f8c 617 if (tries == 1) { // last iteration: fall back to kernel 'random'
4957a608 618 port = 0;
2ea1d2c0 619 } else {
bf6f28ca
CHB
620 do {
621 port = s_minUdpSourcePort + dns_random(s_maxUdpSourcePort - s_minUdpSourcePort + 1);
2ea1d2c0 622 } while (s_avoidUdpSourcePorts.count(port));
bf6f28ca 623 }
5a38281c 624
2ea1d2c0
OM
625 sin = pdns::getQueryLocalAddress(family, port); // does htons for us
626 if (::bind(ret, reinterpret_cast<struct sockaddr*>(&sin), sin.getSocklen()) >= 0)
4957a608 627 break;
d8f6d49f 628 }
9ec48f21 629
2ea1d2c0 630 if (!tries) {
9ec48f21 631 closesocket(ret);
2ea1d2c0 632 throw PDNSException("Resolver binding to local query client socket on " + sin.toString() + ": " + stringerror());
9ec48f21
RG
633 }
634
635 try {
636 setReceiveSocketErrors(ret, family);
637 setNonBlocking(ret);
638 }
2ea1d2c0 639 catch (...) {
9ec48f21
RG
640 closesocket(ret);
641 throw;
642 }
d8f6d49f
BH
643 return ret;
644 }
49a699c4
BH
645};
646
f26bf547 647static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
4ef015cd 648
288f4aa9 649/* these two functions are used by LWRes */
308f4c43
RG
650LWResult::Result asendto(const char *data, size_t len, int flags,
651 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
288f4aa9 652{
34801ab1
BH
653
654 PacketID pident;
787e5eab
BH
655 pident.domain = domain;
656 pident.remote = toaddr;
657 pident.type = qtype;
34801ab1 658
027e8f47
RG
659 // see if there is an existing outstanding request we can chain on to, using partial equivalence function looking for the same
660 // query (qname and qtype) to the same host, but with a different message ID
34801ab1
BH
661 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
662
663 for(; chain.first != chain.second; chain.first++) {
c647a254 664 if(chain.first->key.fd > -1 && !chain.first->key.closed) { // don't chain onto existing chained waiter or a chain already processed
e27e91a8 665 /*
4665c31e
BH
666 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
667 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
4957a608 668 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
e27e91a8 669 */
34801ab1
BH
670 chain.first->key.chain.insert(id); // we can chain
671 *fd=-1; // gets used in waitEvent / sendEvent later on
308f4c43 672 return LWResult::Result::Success;
34801ab1
BH
673 }
674 }
675
308f4c43
RG
676 auto ret = t_udpclientsocks->getSocket(toaddr, fd);
677 if (ret != LWResult::Result::Success) {
d8f6d49f 678 return ret;
308f4c43 679 }
34801ab1 680
998a4334
BH
681 pident.fd=*fd;
682 pident.id=id;
3ddb9247 683
bb4bdbaf 684 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
308f4c43 685 ssize_t sent = send(*fd, data, len, 0);
bb4bdbaf 686
5b0ddd18 687 int tmp = errno;
bb4bdbaf 688
308f4c43 689 if (sent < 0) {
49a699c4 690 t_udpclientsocks->returnSocket(*fd);
308f4c43
RG
691 errno = tmp; // this is for logging purposes only
692 return LWResult::Result::PermanentError;
693 }
bb4bdbaf 694
308f4c43 695 return LWResult::Result::Success;
288f4aa9
BH
696}
697
308f4c43
RG
698LWResult::Result arecvfrom(std::string& packet, int flags, const ComboAddress& fromaddr, size_t *d_len,
699 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, const struct timeval* now)
288f4aa9 700{
e73106c0 701 static const unsigned int nearMissLimit = ::arg().asNum("spoof-nearmiss-max");
0d5f0a9f 702
288f4aa9 703 PacketID pident;
4ef015cd 704 pident.fd=fd;
288f4aa9 705 pident.id=id;
0d5f0a9f 706 pident.domain=domain;
787e5eab 707 pident.type = qtype;
996c89cc 708 pident.remote=fromaddr;
b636533b 709
5b0ddd18 710 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
34801ab1 711
9ec48f21 712 /* -1 means error, 0 means timeout, 1 means a result from handleUDPServerResponse() which might still be an error */
308f4c43 713 if (ret > 0) {
9ec48f21 714 /* handleUDPServerResponse() will close the socket for us no matter what */
308f4c43
RG
715 if (packet.empty()) { // means "error"
716 return LWResult::Result::PermanentError;
717 }
998a4334 718
a683e8bd 719 *d_len=packet.size();
f128d20d 720
e73106c0 721 if (nearMissLimit > 0 && pident.nearMisses > nearMissLimit) {
027e8f47
RG
722 /* we have received more than nearMissLimit answers on the right IP and port, from the right source (we are using connected sockets),
723 for the correct qname and qtype, but with an unexpected message ID. That looks like a spoofing attempt. */
724 g_log<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<nearMissLimit<<") answers with a wrong message ID for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
0d5f0a9f 725 g_stats.spoofCount++;
1a6d9b13 726 return LWResult::Result::Spoofed;
35ce8576 727 }
308f4c43
RG
728
729 return LWResult::Result::Success;
288f4aa9 730 }
09e6702a 731 else {
9ec48f21 732 /* getting there means error or timeout, it's up to us to close the socket */
308f4c43 733 if (fd >= 0) {
49a699c4 734 t_udpclientsocks->returnSocket(fd);
308f4c43 735 }
09e6702a 736 }
308f4c43
RG
737
738 return ret == 0 ? LWResult::Result::Timeout : LWResult::Result::PermanentError;
288f4aa9
BH
739}
740
88def049
BH
741static void writePid(void)
742{
191f2e47 743 if(!::arg().mustDo("write-pid"))
744 return;
18e7758c 745 ofstream of(s_pidfname.c_str(), std::ios_base::app);
88def049 746 if(of)
705f31ae 747 of<< Utility::getpid() <<endl;
a2a81d42
OM
748 else {
749 int err = errno;
750 g_log << Logger::Error << "Writing pid for " << Utility::getpid() << " to " << s_pidfname << " failed: "
296ddbfe 751 << stringerror(err) << endl;
a2a81d42 752 }
88def049
BH
753}
754
c51c551e
OM
755uint16_t TCPConnection::s_maxInFlight;
756
2749c3fe 757TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : data(2, 0), d_remote(addr), d_fd(fd)
3ddb9247
PD
758{
759 ++s_currentConnections;
cd989c87 760 (*t_tcpClientCounts)[d_remote]++;
0e408828 761}
cd989c87
BH
762
763TCPConnection::~TCPConnection()
0e408828 764{
a7b68ae7
RG
765 try {
766 if(closesocket(d_fd) < 0)
e6a9dde5 767 g_log<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
a7b68ae7
RG
768 }
769 catch(const PDNSException& e) {
e6a9dde5 770 g_log<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
a7b68ae7
RG
771 }
772
3ddb9247 773 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
cd989c87 774 t_tcpClientCounts->erase(d_remote);
1bc9e6bd 775 --s_currentConnections;
0e408828 776}
0e9d9ce2 777
3ddb9247 778AtomicCounter TCPConnection::s_currentConnections;
d187038c 779
67552030
RG
780static void terminateTCPConnection(int fd)
781{
782 try {
783 t_fdm->removeReadFD(fd);
784 }
785 catch (const FDMultiplexerException& fde)
786 {
787 }
788}
789
28647bcf
RG
790static bool sendResponseOverTCP(const std::unique_ptr<DNSComboWriter>& dc, const std::vector<uint8_t>& packet)
791{
792 char buf[2];
793 buf[0] = packet.size() / 256;
794 buf[1] = packet.size() % 256;
795
796 Utility::iovec iov[2];
797 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
798 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
799
800 int wret = Utility::writev(dc->d_socket, iov, 2);
801 bool hadError = true;
802
803 if (wret == 0) {
804 g_log<<Logger::Warning<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
805 } else if (wret < 0 ) {
806 int err = errno;
807 g_log << Logger::Warning << "Error writing TCP answer to " << dc->getRemote() << ": " << strerror(err) << endl;
808 } else if ((unsigned int)wret != 2 + packet.size()) {
809 g_log<<Logger::Warning<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
810 } else {
811 hadError = false;
812 }
813
814 return hadError;
815}
816
817static void sendErrorOverTCP(std::unique_ptr<DNSComboWriter>& dc, int rcode)
818{
819 std::vector<uint8_t> packet;
820 if (dc->d_mdp.d_header.qdcount == 0) {
821 /* header-only */
822 packet.resize(sizeof(dnsheader));
823 }
824 else {
825 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
826 if (dc->d_mdp.hasEDNS()) {
827 /* we try to add the EDNS OPT RR even for truncated answers,
828 as rfc6891 states:
829 "The minimal response MUST be the DNS header, question section, and an
830 OPT record. This MUST also occur when a truncated response (using
831 the DNS header's TC bit) is returned."
832 */
833 pw.addOpt(512, 0, 0);
834 pw.commit();
835 }
836 }
837
4bb9b886 838 dnsheader& header = reinterpret_cast<dnsheader&>(packet.at(0));
28647bcf
RG
839 header.aa = 0;
840 header.ra = 1;
841 header.qr = 1;
842 header.tc = 0;
843 header.id = dc->d_mdp.d_header.id;
844 header.rd = dc->d_mdp.d_header.rd;
845 header.cd = dc->d_mdp.d_header.cd;
846 header.rcode = rcode;
847
848 sendResponseOverTCP(dc, packet);
849}
850
d187038c 851static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
6dcd28c3 852
92011b8f 853// the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
d187038c 854static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
2cc13433 855{
92011b8f 856 if(packetsize > 1000 && t_largeanswerremotes)
857 t_largeanswerremotes->push_back(remote);
2cc13433
BH
858 switch(res) {
859 case RCode::ServFail:
92011b8f 860 if(t_servfailremotes) {
861 t_servfailremotes->push_back(remote);
5af86fdc 862 if(query && t_servfailqueryring) // packet cache
92011b8f 863 t_servfailqueryring->push_back(make_pair(*query, qtype));
864 }
2cc13433
BH
865 g_stats.servFails++;
866 break;
867 case RCode::NXDomain:
868 g_stats.nxDomains++;
869 break;
870 case RCode::NoError:
871 g_stats.noErrors++;
872 break;
873 }
874}
875
9a864da4 876static string makeLoginfo(const std::unique_ptr<DNSComboWriter>& dc)
a903b39c 877try
878{
5cc8371b 879 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->getRemote())+")";
a903b39c 880}
881catch(...)
882{
883 return "Exception making error message for exception";
884}
885
aa7929a3 886#ifdef HAVE_PROTOBUF
b502d522 887static void protobufLogQuery(uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::unordered_set<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId, const std::string& deviceName)
aa7929a3 888{
b773359c
RG
889 if (!t_protobufServers) {
890 return;
891 }
892
e1c8a4bb 893 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
79816288 894 ComboAddress requestor = requestorNM.getMaskedNetwork();
5d2e9a83 895 requestor.setPort(remote.getPort());
ac10822e 896
83b261a7 897 pdns::ProtoZero::Message m{128, std::string::size_type(policyTags.empty() ? 0 : 64)}; // It's a guess
d58249ee 898 m.setRequest(uniqueId, requestor, local, qname, qtype, qclass, id, tcp, len);
a44a8d66
OM
899 m.setServerIdentity(SyncRes::s_serverID);
900 m.setEDNSSubnet(ednssubnet, ednssubnet.isIPv4() ? maskV4 : maskV6);
901 m.setRequestorId(requestorId);
902 m.setDeviceId(deviceId);
903 m.setDeviceName(deviceName);
02b47f43 904
02b47f43 905 if (!policyTags.empty()) {
47a6825e 906 m.addPolicyTags(policyTags);
02b47f43 907 }
aa7929a3 908
47a6825e 909 std::string msg(m.finishAndMoveBuf());
b773359c 910 for (auto& server : *t_protobufServers) {
ac10822e 911 server->queueData(msg);
b773359c 912 }
aa7929a3
RG
913}
914
ac10822e 915static void protobufLogResponse(pdns::ProtoZero::Message& message)
aa7929a3 916{
b773359c
RG
917 if (!t_protobufServers) {
918 return;
919 }
920
47a6825e 921 std::string msg(message.finishAndMoveBuf());
b773359c 922 for (auto& server : *t_protobufServers) {
ac10822e 923 server->queueData(msg);
b773359c 924 }
aa7929a3
RG
925}
926#endif
927
53508135
PL
928/**
929 * Chases the CNAME provided by the PolicyCustom RPZ policy.
930 *
931 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
932 * @param qtype: The QType of the original query
933 * @param sr: A SyncRes
934 * @param res: An integer that will contain the RCODE of the lookup we do
935 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
936 */
d187038c 937static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
53508135
PL
938{
939 if (spoofed.d_type == QType::CNAME) {
30ee601a
RG
940 bool oldWantsRPZ = sr.getWantsRPZ();
941 sr.setWantsRPZ(false);
53508135 942 vector<DNSRecord> ans;
6da513b2 943 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, QClass::IN, ans);
53508135
PL
944 for (const auto& rec : ans) {
945 if(rec.d_place == DNSResourceRecord::ANSWER) {
946 ret.push_back(rec);
947 }
948 }
949 // Reset the RPZ state of the SyncRes
30ee601a 950 sr.setWantsRPZ(oldWantsRPZ);
53508135
PL
951 }
952}
953
70fb28d9 954static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, uint32_t ttlCap, const uint16_t maxAnswerSize)
97c6d7e5 955{
70fb28d9 956 pw.startRecord(rec.d_name, rec.d_type, (rec.d_ttl > ttlCap ? ttlCap : rec.d_ttl), rec.d_class, rec.d_place);
97c6d7e5
RG
957
958 if(rec.d_type != QType::OPT) // their TTL ain't real
959 minTTL = min(minTTL, rec.d_ttl);
960
961 rec.d_content->toPacket(pw);
962 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
963 pw.rollback();
964 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
965 pw.getHeader()->tc=1;
966 pw.truncate();
967 }
968 return false;
969 }
970
971 return true;
972}
973
d4f08082
RG
974enum class PolicyResult : uint8_t { NoAction, HaveAnswer, Drop };
975
2b1309a8 976static PolicyResult handlePolicyHit(const DNSFilterEngine::Policy& appliedPolicy, const std::unique_ptr<DNSComboWriter>& dc, SyncRes& sr, int& res, vector<DNSRecord>& ret, DNSPacketWriter& pw)
d4f08082
RG
977{
978 /* don't account truncate actions for TCP queries, since they are not applied */
979 if (appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::Truncate || !dc->d_tcp) {
980 ++g_stats.policyResults[appliedPolicy.d_kind];
981 }
982
b3e25e9e
OM
983 if (sr.doLog() && appliedPolicy.d_type != DNSFilterEngine::PolicyType::None) {
984 g_log << Logger::Warning << dc->d_mdp.d_qname << "|" << QType(dc->d_mdp.d_qtype).getName() << appliedPolicy.getLogString() << endl;
985 }
986
af231ceb
RG
987 if (appliedPolicy.d_zoneData && appliedPolicy.d_zoneData->d_extendedErrorCode) {
988 dc->d_extendedErrorCode = *appliedPolicy.d_zoneData->d_extendedErrorCode;
989 dc->d_extendedErrorExtra = appliedPolicy.d_zoneData->d_extendedErrorExtra;
990 }
991
d4f08082
RG
992 switch (appliedPolicy.d_kind) {
993
994 case DNSFilterEngine::PolicyKind::NoAction:
995 return PolicyResult::NoAction;
996
997 case DNSFilterEngine::PolicyKind::Drop:
998 ++g_stats.policyDrops;
999 return PolicyResult::Drop;
1000
1001 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1002 ret.clear();
1003 res = RCode::NXDomain;
1004 return PolicyResult::HaveAnswer;
1005
1006 case DNSFilterEngine::PolicyKind::NODATA:
1007 ret.clear();
1008 res = RCode::NoError;
1009 return PolicyResult::HaveAnswer;
1010
1011 case DNSFilterEngine::PolicyKind::Truncate:
1012 if (!dc->d_tcp) {
1013 ret.clear();
1014 res = RCode::NoError;
1015 pw.getHeader()->tc = 1;
1016 return PolicyResult::HaveAnswer;
1017 }
1018 return PolicyResult::NoAction;
1019
1020 case DNSFilterEngine::PolicyKind::Custom:
1021 res = RCode::NoError;
1022 {
1023 auto spoofed = appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1024 for (auto& dr : spoofed) {
1025 ret.push_back(dr);
1026 try {
1027 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
1028 }
1029 catch (const ImmediateServFailException& e) {
1030 if (g_logCommonErrors) {
1031 g_log << Logger::Notice << "Sending SERVFAIL to " << dc->getRemote() << " during resolve of the custom filter policy '" << appliedPolicy.getName() << "' while resolving '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
1032 }
1033 res = RCode::ServFail;
1034 break;
1035 }
1036 catch (const PolicyHitException& e) {
1037 if (g_logCommonErrors) {
1038 g_log << Logger::Notice << "Sending SERVFAIL to " << dc->getRemote() << " during resolve of the custom filter policy '" << appliedPolicy.getName() << "' while resolving '" << dc->d_mdp.d_qname << "' because another RPZ policy was hit" << endl;
1039 }
1040 res = RCode::ServFail;
1041 break;
1042 }
1043 }
1044
1045 return PolicyResult::HaveAnswer;
1046 }
1047 }
1048
1049 return PolicyResult::NoAction;
1050}
1051
63341e8d 1052#ifdef HAVE_PROTOBUF
3fe06137 1053static std::shared_ptr<std::vector<std::unique_ptr<RemoteLogger>>> startProtobufServers(const ProtobufExportConfig& config)
63341e8d 1054{
3fe06137 1055 auto result = std::make_shared<std::vector<std::unique_ptr<RemoteLogger>>>();
b773359c
RG
1056
1057 for (const auto& server : config.servers) {
1058 try {
5d6c7a46
RG
1059 auto logger = make_unique<RemoteLogger>(server, config.timeout, 100*config.maxQueuedEntries, config.reconnectWaitTime, config.asyncConnect);
1060 logger->setLogQueries(config.logQueries);
1061 logger->setLogResponses(config.logResponses);
1062 result->emplace_back(std::move(logger));
b773359c
RG
1063 }
1064 catch(const std::exception& e) {
1065 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.what()<<endl;
1066 }
1067 catch(const PDNSException& e) {
1068 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<server<<": "<<e.reason<<endl;
1069 }
63341e8d
RG
1070 }
1071
1072 return result;
1073}
1074
1075static bool checkProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
1076{
1077 if (!luaconfsLocal->protobufExportConfig.enabled) {
b773359c
RG
1078 if (t_protobufServers) {
1079 for (auto& server : *t_protobufServers) {
1080 server->stop();
1081 }
1082 t_protobufServers.reset();
63341e8d
RG
1083 }
1084
1085 return false;
1086 }
1087
1088 /* if the server was not running, or if it was running according to a
1089 previous configuration */
b773359c
RG
1090 if (!t_protobufServers ||
1091 t_protobufServersGeneration < luaconfsLocal->generation) {
63341e8d 1092
b773359c
RG
1093 if (t_protobufServers) {
1094 for (auto& server : *t_protobufServers) {
1095 server->stop();
1096 }
63341e8d 1097 }
b773359c 1098 t_protobufServers.reset();
63341e8d 1099
b773359c
RG
1100 t_protobufServers = startProtobufServers(luaconfsLocal->protobufExportConfig);
1101 t_protobufServersGeneration = luaconfsLocal->generation;
63341e8d
RG
1102 }
1103
1104 return true;
1105}
1106
1107static bool checkOutgoingProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
1108{
1109 if (!luaconfsLocal->outgoingProtobufExportConfig.enabled) {
b773359c
RG
1110 if (t_outgoingProtobufServers) {
1111 for (auto& server : *t_outgoingProtobufServers) {
1112 server->stop();
1113 }
63341e8d 1114 }
b773359c 1115 t_outgoingProtobufServers.reset();
63341e8d
RG
1116
1117 return false;
1118 }
1119
1120 /* if the server was not running, or if it was running according to a
1121 previous configuration */
b773359c
RG
1122 if (!t_outgoingProtobufServers ||
1123 t_outgoingProtobufServersGeneration < luaconfsLocal->generation) {
63341e8d 1124
b773359c
RG
1125 if (t_outgoingProtobufServers) {
1126 for (auto& server : *t_outgoingProtobufServers) {
1127 server->stop();
1128 }
63341e8d 1129 }
b773359c 1130 t_outgoingProtobufServers.reset();
63341e8d 1131
b773359c
RG
1132 t_outgoingProtobufServers = startProtobufServers(luaconfsLocal->outgoingProtobufExportConfig);
1133 t_outgoingProtobufServersGeneration = luaconfsLocal->generation;
63341e8d
RG
1134 }
1135
1136 return true;
1137}
b9fa43e0
OM
1138
1139#ifdef HAVE_FSTRM
1140
10ba6d01 1141static std::shared_ptr<std::vector<std::unique_ptr<FrameStreamLogger>>> startFrameStreamServers(const FrameStreamExportConfig& config)
b9fa43e0 1142{
10ba6d01 1143 auto result = std::make_shared<std::vector<std::unique_ptr<FrameStreamLogger>>>();
b9fa43e0
OM
1144
1145 for (const auto& server : config.servers) {
1146 try {
573f4ff0
OM
1147 std::unordered_map<string,unsigned> options;
1148 options["bufferHint"] = config.bufferHint;
1149 options["flushTimeout"] = config.flushTimeout;
1150 options["inputQueueSize"] = config.inputQueueSize;
1151 options["outputQueueSize"] = config.outputQueueSize;
1152 options["queueNotifyThreshold"] = config.queueNotifyThreshold;
1153 options["reopenInterval"] = config.reopenInterval;
dea8a6bc
OM
1154 FrameStreamLogger *fsl = nullptr;
1155 try {
1156 ComboAddress address(server);
1157 fsl = new FrameStreamLogger(address.sin4.sin_family, address.toStringWithPort(), true, options);
1158 }
1159 catch (const PDNSException& e) {
1160 fsl = new FrameStreamLogger(AF_UNIX, server, true, options);
1161 }
573f4ff0
OM
1162 fsl->setLogQueries(config.logQueries);
1163 fsl->setLogResponses(config.logResponses);
1164 result->emplace_back(fsl);
b9fa43e0
OM
1165 }
1166 catch(const std::exception& e) {
1167 g_log<<Logger::Error<<"Error while starting dnstap framestream logger to '"<<server<<": "<<e.what()<<endl;
1168 }
1169 catch(const PDNSException& e) {
1170 g_log<<Logger::Error<<"Error while starting dnstap framestream logger to '"<<server<<": "<<e.reason<<endl;
1171 }
1172 }
1173
1174 return result;
1175}
1176
1177static bool checkFrameStreamExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
1178{
1179 if (!luaconfsLocal->frameStreamExportConfig.enabled) {
1180 if (t_frameStreamServers) {
1181 // dt's take care of cleanup
1182 t_frameStreamServers.reset();
1183 }
1184
1185 return false;
1186 }
1187
1188 /* if the server was not running, or if it was running according to a
1189 previous configuration */
1190 if (!t_frameStreamServers ||
1191 t_frameStreamServersGeneration < luaconfsLocal->generation) {
1192
1193 if (t_frameStreamServers) {
1194 // dt's take care of cleanup
1195 t_frameStreamServers.reset();
1196 }
1197
1198 t_frameStreamServers = startFrameStreamServers(luaconfsLocal->frameStreamExportConfig);
1199 t_frameStreamServersGeneration = luaconfsLocal->generation;
1200 }
1201
1202 return true;
1203}
1204#endif /* HAVE_FSTRM */
63341e8d
RG
1205#endif /* HAVE_PROTOBUF */
1206
af1377b7 1207#ifdef NOD_ENABLED
41c542ec 1208static bool nodCheckNewDomain(const DNSName& dname)
af1377b7 1209{
41c542ec 1210 bool ret = false;
af1377b7
NC
1211 // First check the (sub)domain isn't whitelisted for NOD purposes
1212 if (!g_nodDomainWL.check(dname)) {
ef2ea4bf 1213 // Now check the NODDB (note this is probabilistic so can have FNs/FPs)
af1377b7
NC
1214 if (t_nodDBp && t_nodDBp->isNewDomain(dname)) {
1215 if (g_nodLog) {
1216 // This should probably log to a dedicated log file
3f4073ec 1217 g_log<<Logger::Notice<<"Newly observed domain nod="<<dname<<endl;
af1377b7 1218 }
41c542ec 1219 ret = true;
af1377b7
NC
1220 }
1221 }
41c542ec 1222 return ret;
af1377b7
NC
1223}
1224
3bd27e4a
RG
1225static void sendNODLookup(const DNSName& dname)
1226{
1227 if (!(g_nodLookupDomain.isRoot())) {
1228 // Send a DNS A query to <domain>.g_nodLookupDomain
1229 static const QType qt(QType::A);
1230 static const uint16_t qc(QClass::IN);
5e7d1450
PD
1231 DNSName qname;
1232 try {
1233 qname = dname + g_nodLookupDomain;
1234 }
1235 catch(const std::range_error &e) {
1236 ++g_stats.nodLookupsDroppedOversize;
1237 return;
1238 }
3bd27e4a
RG
1239 vector<DNSRecord> dummy;
1240 directResolve(qname, qt, qc, dummy);
1241 }
1242}
1243
41c542ec
NC
1244static bool udrCheckUniqueDNSRecord(const DNSName& dname, uint16_t qtype, const DNSRecord& record)
1245{
1246 bool ret = false;
1247 if (record.d_place == DNSResourceRecord::ANSWER ||
1248 record.d_place == DNSResourceRecord::ADDITIONAL) {
1249 // Create a string that represent a triplet of (qname, qtype and RR[type, name, content])
1250 std::stringstream ss;
1251 ss << dname.toDNSStringLC() << ":" << qtype << ":" << qtype << ":" << record.d_type << ":" << record.d_name.toDNSStringLC() << ":" << record.d_content->getZoneRepresentation();
1252 if (t_udrDBp && t_udrDBp->isUniqueResponse(ss.str())) {
ff4d391d
NC
1253 if (g_udrLog) {
1254 // This should also probably log to a dedicated file.
3f4073ec 1255 g_log<<Logger::Notice<<"Unique response observed: qname="<<dname<<" qtype="<<QType(qtype).getName()<< " rrtype=" << QType(record.d_type).getName() << " rrname=" << record.d_name << " rrcontent=" << record.d_content->getZoneRepresentation() << endl;
41c542ec
NC
1256 }
1257 ret = true;
1258 }
1259 }
1260 return ret;
1261}
af1377b7
NC
1262#endif /* NOD_ENABLED */
1263
37a919d4
RG
1264int followCNAMERecords(vector<DNSRecord>& ret, const QType& qtype)
1265{
1266 vector<DNSRecord> resolved;
1267 DNSName target;
1268 for(const DNSRecord& rr : ret) {
1269 if(rr.d_type == QType::CNAME) {
1270 auto rec = getRR<CNAMERecordContent>(rr);
1271 if(rec) {
1272 target=rec->getTarget();
1273 break;
1274 }
1275 }
1276 }
1277
1278 if(target.empty()) {
1279 return 0;
1280 }
1281
1282 int rcode = directResolve(target, qtype, QClass::IN, resolved);
1283
1284 for(DNSRecord& rr : resolved) {
1285 ret.push_back(std::move(rr));
1286 }
1287 return rcode;
1288}
1289
ef3ee606
RG
1290int getFakeAAAARecords(const DNSName& qname, ComboAddress prefix, vector<DNSRecord>& ret)
1291{
acc97511
RG
1292 /* we pass a separate vector of records because we will be resolving the initial qname
1293 again, possibly encountering the same CNAME(s), and we don't want to trigger the CNAME
1294 loop detection. */
1295 vector<DNSRecord> newRecords;
1296 int rcode = directResolve(qname, QType(QType::A), QClass::IN, newRecords);
1297
1298 ret.reserve(ret.size() + newRecords.size());
1299 for (auto& record : newRecords) {
1300 ret.push_back(std::move(record));
1301 }
ef3ee606
RG
1302
1303 // Remove double CNAME records
1304 std::set<DNSName> seenCNAMEs;
1305 ret.erase(std::remove_if(
1306 ret.begin(),
1307 ret.end(),
1308 [&seenCNAMEs](DNSRecord& rr) {
1309 if (rr.d_type == QType::CNAME) {
1310 auto target = getRR<CNAMERecordContent>(rr);
1311 if (target == nullptr) {
1312 return false;
1313 }
1314 if (seenCNAMEs.count(target->getTarget()) > 0) {
1315 // We've had this CNAME before, remove it
1316 return true;
1317 }
1318 seenCNAMEs.insert(target->getTarget());
1319 }
1320 return false;
1321 }),
1322 ret.end());
1323
1324 bool seenA = false;
1325 for (DNSRecord& rr : ret) {
1326 if (rr.d_type == QType::A && rr.d_place == DNSResourceRecord::ANSWER) {
1327 if (auto rec = getRR<ARecordContent>(rr)) {
1328 ComboAddress ipv4(rec->getCA());
75e31a0b 1329 memcpy(&prefix.sin6.sin6_addr.s6_addr[12], &ipv4.sin4.sin_addr.s_addr, sizeof(ipv4.sin4.sin_addr.s_addr));
ef3ee606
RG
1330 rr.d_content = std::make_shared<AAAARecordContent>(prefix);
1331 rr.d_type = QType::AAAA;
1332 }
1333 seenA = true;
1334 }
1335 }
1336
1337 if (seenA) {
1338 // We've seen an A in the ANSWER section, so there is no need to keep any
1339 // SOA in the AUTHORITY section as this is not a NODATA response.
1340 ret.erase(std::remove_if(
1341 ret.begin(),
1342 ret.end(),
1343 [](DNSRecord& rr) {
1344 return (rr.d_type == QType::SOA && rr.d_place == DNSResourceRecord::AUTHORITY);
1345 }),
1346 ret.end());
1347 }
1348 return rcode;
1349}
1350
1351int getFakePTRRecords(const DNSName& qname, vector<DNSRecord>& ret)
1352{
1353 /* qname has a reverse ordered IPv6 address, need to extract the underlying IPv4 address from it
1354 and turn it into an IPv4 in-addr.arpa query */
1355 ret.clear();
1356 vector<string> parts = qname.getRawLabels();
1357
1358 if (parts.size() < 8) {
1359 return -1;
1360 }
1361
1362 string newquery;
1363 for (int n = 0; n < 4; ++n) {
1364 newquery +=
1365 std::to_string(stoll(parts[n*2], 0, 16) + 16*stoll(parts[n*2+1], 0, 16));
1366 newquery.append(1, '.');
1367 }
1368 newquery += "in-addr.arpa.";
1369
1370 DNSRecord rr;
1371 rr.d_name = qname;
1372 rr.d_type = QType::CNAME;
1373 rr.d_content = std::make_shared<CNAMERecordContent>(newquery);
1374 ret.push_back(rr);
1375
1376 int rcode = directResolve(DNSName(newquery), QType(QType::PTR), QClass::IN, ret);
1377
1378 return rcode;
1379}
1380
d187038c 1381static void startDoResolve(void *p)
288f4aa9 1382{
9a864da4 1383 auto dc=std::unique_ptr<DNSComboWriter>(reinterpret_cast<DNSComboWriter*>(p));
288f4aa9 1384 try {
5af86fdc
RG
1385 if (t_queryring)
1386 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
92011b8f 1387
32015748 1388 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
7f7b8d55 1389 EDNSOpts edo;
5164bac3 1390 std::vector<pair<uint16_t, string> > ednsOpts;
eb9444be 1391 bool variableAnswer = dc->d_variable;
8e079f3a 1392 bool haveEDNS=false;
ca2526f5
NC
1393#ifdef NOD_ENABLED
1394 bool hasUDR = false;
1395#endif /* NOD_ENABLED */
f1db0de2
PL
1396 DNSPacketWriter::optvect_t returnedEdnsOptions; // Here we stuff all the options for the return packet
1397 uint8_t ednsExtRCode = 0;
8e079f3a 1398 if(getEDNSOpts(dc->d_mdp, &edo)) {
f1db0de2
PL
1399 haveEDNS=true;
1400 if (edo.d_version != 0) {
1401 ednsExtRCode = ERCode::BADVERS;
1402 }
1403
32015748
RG
1404 if(!dc->d_tcp) {
1405 /* rfc6891 6.2.3:
1406 "Values lower than 512 MUST be treated as equal to 512."
1407 */
1408 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
1409 }
5164bac3 1410 ednsOpts = edo.d_options;
3af35968 1411 maxanswersize -= 11; // EDNS header size
b40562da 1412
1f691b94
PL
1413 for (const auto& o : edo.d_options) {
1414 if (o.first == EDNSOptionCode::ECS && g_useIncomingECS && !dc->d_ecsParsed) {
1415 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
1416 } else if (o.first == EDNSOptionCode::NSID) {
f1db0de2 1417 const static string mode_server_id = ::arg()["server-id"];
f49ae8cf
RG
1418 if (mode_server_id != "disabled" && !mode_server_id.empty() &&
1419 maxanswersize > (EDNSOptionCodeSize + EDNSOptionLengthSize + mode_server_id.size())) {
f1db0de2
PL
1420 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::NSID, mode_server_id));
1421 variableAnswer = true; // Can't packetcache an answer with NSID
f49ae8cf 1422 maxanswersize -= EDNSOptionCodeSize + EDNSOptionLengthSize + mode_server_id.size();
f1db0de2 1423 }
b40562da
RG
1424 }
1425 }
10321a98 1426 }
b40562da
RG
1427 /* perhaps there was no EDNS or no ECS but by now we looked */
1428 dc->d_ecsParsed = true;
e325f20c 1429 vector<DNSRecord> ret;
ea634573 1430 vector<uint8_t> packet;
b23b8614 1431
ad42489c 1432 auto luaconfsLocal = g_luaconfs.getLocal();
b8470add
PL
1433 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
1434 bool wantsRPZ(true);
ac10822e 1435 RecursorPacketCache::OptPBData pbDataForCache;
aa7929a3 1436#ifdef HAVE_PROTOBUF
83b261a7 1437 pdns::ProtoZero::Message pbMessage;
63341e8d 1438 if (checkProtobufExport(luaconfsLocal)) {
83b261a7
OM
1439 pbMessage.reserve(128, 128); // It's a bit of a guess...
1440 pbMessage.setResponse(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
1441 pbMessage.setServerIdentity(SyncRes::s_serverID);
a44a8d66
OM
1442
1443 // RRSets added below
d9d3f9c1
RG
1444 }
1445#endif /* HAVE_PROTOBUF */
ad42489c 1446
b9fa43e0
OM
1447#ifdef HAVE_FSTRM
1448 checkFrameStreamExport(luaconfsLocal);
1449#endif
1450
3ddb9247 1451 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
ea634573
BH
1452
1453 pw.getHeader()->aa=0;
1454 pw.getHeader()->ra=1;
c154c8a4 1455 pw.getHeader()->qr=1;
bb4bdbaf 1456 pw.getHeader()->tc=0;
ea634573 1457 pw.getHeader()->id=dc->d_mdp.d_header.id;
10321a98 1458 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
57769f13 1459 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
ea634573 1460
70fb28d9
RG
1461 /* This is the lowest TTL seen in the records of the response,
1462 so we can't cache it for longer than this value.
1463 If we have a TTL cap, this value can't be larger than the
1464 cap no matter what. */
1465 uint32_t minTTL = dc->d_ttlCap;
904d3219
PD
1466
1467 SyncRes sr(dc->d_now);
37a919d4 1468 sr.setId(MT->getTid());
0c43f455 1469
2e921ec6 1470 bool DNSSECOK=false;
3457a2a0 1471 if(t_pdl) {
f26bf547 1472 sr.setLuaEngine(t_pdl);
3457a2a0 1473 }
9eec8c98 1474 if(g_dnssecmode != DNSSECMode::Off) {
30ee601a 1475 sr.setDoDNSSEC(true);
9eec8c98
PL
1476
1477 // Does the requestor want DNSSEC records?
d6c335ab 1478 if(edo.d_extFlags & EDNSOpts::DNSSECOK) {
9eec8c98
PL
1479 DNSSECOK=true;
1480 g_stats.dnssecQueries++;
1481 }
88c33dca
RG
1482 if (dc->d_mdp.d_header.cd) {
1483 /* Per rfc6840 section 5.9, "When processing a request with
1484 the Checking Disabled (CD) bit set, a resolver SHOULD attempt
1485 to return all response data, even data that has failed DNSSEC
1486 validation. */
1487 ++g_stats.dnssecCheckDisabledQueries;
1488 }
1489 if (dc->d_mdp.d_header.ad) {
1490 /* Per rfc6840 section 5.7, "the AD bit in a query as a signal
1491 indicating that the requester understands and is interested in the
1492 value of the AD bit in the response. This allows a requester to
1493 indicate that it understands the AD bit without also requesting
1494 DNSSEC data via the DO bit. */
1495 ++g_stats.dnssecAuthenticDataQueries;
1496 }
9eec8c98
PL
1497 } else {
1498 // Ignore the client-set CD flag
1499 pw.getHeader()->cd=0;
5b9853c9 1500 }
0c43f455
RG
1501 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
1502
4898a348 1503#ifdef HAVE_PROTOBUF
30ee601a 1504 sr.setInitialRequestId(dc->d_uuid);
b773359c 1505 sr.setOutgoingProtobufServers(t_outgoingProtobufServers);
4898a348 1506#endif
b9fa43e0
OM
1507#ifdef HAVE_FSTRM
1508 sr.setFrameStreamServers(t_frameStreamServers);
1509#endif
2fe3354d 1510 sr.setQuerySource(dc->d_remote, g_useIncomingECS && !dc->d_ednssubnet.source.empty() ? boost::optional<const EDNSSubnetOpts&>(dc->d_ednssubnet) : boost::none);
d4f08082 1511 sr.setQueryReceivedOverTCP(dc->d_tcp);
57769f13 1512
904d3219 1513 bool tracedQuery=false; // we could consider letting Lua know about this too
9fc36e90 1514 bool shouldNotValidate = false;
904d3219 1515
ef3b6cd7
RG
1516 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
1517 int res = RCode::NoError;
37a919d4 1518
1f1ca368 1519 DNSFilterEngine::Policy appliedPolicy;
406b722e 1520 RecursorLua4::DNSQuestion dq(dc->d_source, dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ, dc->d_logResponse);
d6c335ab 1521 dq.ednsFlags = &edo.d_extFlags;
5164bac3 1522 dq.ednsOptions = &ednsOpts;
6e505c5e
RG
1523 dq.tag = dc->d_tag;
1524 dq.discardedPolicies = &sr.d_discardedPolicies;
1525 dq.policyTags = &dc->d_policyTags;
1526 dq.appliedPolicy = &appliedPolicy;
1527 dq.currentRecords = &ret;
1528 dq.dh = &dc->d_mdp.d_header;
05c74122 1529 dq.data = dc->d_data;
67e31ebe
RG
1530#ifdef HAVE_PROTOBUF
1531 dq.requestorId = dc->d_requestorId;
590388d2 1532 dq.deviceId = dc->d_deviceId;
0a6a45c8 1533 dq.deviceName = dc->d_deviceName;
67e31ebe 1534#endif
38d8b937 1535 dq.proxyProtocolValues = &dc->d_proxyProtocolValues;
e95b2a7c
RG
1536 dq.extendedErrorCode = &dc->d_extendedErrorCode;
1537 dq.extendedErrorExtra = &dc->d_extendedErrorExtra;
ba21fcfe 1538
6cf96227
PL
1539 if(ednsExtRCode != 0) {
1540 goto sendit;
1541 }
1542
e661a20b 1543 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
56b4d21b
PD
1544 pw.getHeader()->tc = 1;
1545 res = 0;
1546 variableAnswer = true;
e661a20b
PD
1547 goto sendit;
1548 }
1549
f26bf547 1550 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
77499b05
BH
1551 sr.setLogMode(SyncRes::Store);
1552 tracedQuery=true;
1553 }
3ddb9247 1554
976ec823 1555 if(!g_quiet || tracedQuery) {
e6a9dde5 1556 g_log<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
976ec823 1557 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
b40562da 1558 if(!dc->d_ednssubnet.source.empty()) {
e6a9dde5 1559 g_log<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
6e986f5e 1560 }
e6a9dde5 1561 g_log<<endl;
976ec823 1562 }
c75a6a9e 1563
37a919d4 1564 if(!dc->d_mdp.d_header.rd) {
c836dc19 1565 sr.setCacheOnly();
37a919d4
RG
1566 }
1567
f26bf547
RG
1568 if (t_pdl) {
1569 t_pdl->prerpz(dq, res);
0a273054
RG
1570 }
1571
16649cb0 1572 // Check if the client has a policy attached to it
99ced1e1 1573 if (wantsRPZ && !appliedPolicy.wasHit()) {
baaed53b
OM
1574
1575 if (luaconfsLocal->dfe.getClientPolicy(dc->d_source, sr.d_discardedPolicies, appliedPolicy)) {
b502d522
RG
1576 mergePolicyTags(dc->d_policyTags, appliedPolicy.getTags());
1577 }
0a273054 1578 }
644dd1da 1579
d4f08082
RG
1580 /* If we already have an answer generated from gettag_ffi, let's see if the filtering policies
1581 should be applied to it */
1582 if (dc->d_rcode != boost::none) {
1583
1584 bool policyOverride = false;
1585 /* Unless we already matched on the client IP, time to check the qname.
1586 We normally check it in beginResolve() but it will be bypassed since we already have an answer */
1587 if (wantsRPZ && appliedPolicy.policyOverridesGettag()) {
1588 if (appliedPolicy.d_type != DNSFilterEngine::PolicyType::None) {
1589 // Client IP already matched
1590 }
1591 else {
1592 // no match on the client IP, check the qname
1593 if (luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, sr.d_discardedPolicies, appliedPolicy)) {
1594 // got a match
1595 mergePolicyTags(dc->d_policyTags, appliedPolicy.getTags());
1596 }
1597 }
1598
99ced1e1 1599 if (appliedPolicy.wasHit()) {
d4f08082 1600 policyOverride = true;
baaed53b
OM
1601 }
1602 }
644dd1da 1603
995884ff 1604 if (!policyOverride) {
d4f08082 1605 /* No RPZ or gettag overrides it anyway */
96491e6c
OM
1606 ret = std::move(dc->d_records);
1607 res = *dc->d_rcode;
1608 if (res == RCode::NoError && dc->d_followCNAMERecords) {
1609 res = followCNAMERecords(ret, QType(dc->d_mdp.d_qtype));
1610 }
1611 goto haveAnswer;
1612 }
1613 }
1614
54be222b 1615 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
ef3ee606
RG
1616 if (!t_pdl || !t_pdl->preresolve(dq, res)) {
1617
1618 if (!g_dns64PrefixReverse.empty() && dq.qtype == QType::PTR && dq.qname.isPartOf(g_dns64PrefixReverse)) {
1619 res = getFakePTRRecords(dq.qname, ret);
1620 goto haveAnswer;
1621 }
b8470add 1622
30ee601a 1623 sr.setWantsRPZ(wantsRPZ);
acf86ed7 1624
b502d522 1625 if (wantsRPZ && appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::NoAction) {
acf86ed7
RG
1626
1627 if (t_pdl && t_pdl->policyHitEventFilter(dc->d_remote, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_tcp, appliedPolicy, dc->d_policyTags, sr.d_discardedPolicies)) {
1628 /* reset to no match */
1629 appliedPolicy = DNSFilterEngine::Policy();
b502d522 1630 }
acf86ed7 1631 else {
2b1309a8 1632 auto policyResult = handlePolicyHit(appliedPolicy, dc, sr, res, ret, pw);
acf86ed7
RG
1633 if (policyResult == PolicyResult::HaveAnswer) {
1634 goto haveAnswer;
1635 }
1636 else if (policyResult == PolicyResult::Drop) {
1637 return;
1638 }
b8470add 1639 }
db486de5
PL
1640 }
1641
acf86ed7 1642 // Query did not get handled for Client IP or QNAME Policy reasons, now actually go out to find an answer
44971ca0 1643 try {
124dd1d4 1644 sr.d_appliedPolicy = appliedPolicy;
b502d522 1645 sr.d_policyTags = std::move(dc->d_policyTags);
d6fd3cb8 1646
163ed916
OM
1647 if (!dc->d_routingTag.empty()) {
1648 sr.d_routingTag = dc->d_routingTag;
d6fd3cb8
OM
1649 }
1650
16649cb0 1651 ret.clear(); // policy might have filled it with custom records but we decided not to use them
44971ca0 1652 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
9fc36e90 1653 shouldNotValidate = sr.wasOutOfBand();
44971ca0 1654 }
d4f08082 1655 catch (const ImmediateQueryDropException& e) {
acf86ed7 1656 // XXX We need to export a protobuf message (and do a NOD lookup) if requested!
d4f08082
RG
1657 g_stats.policyDrops++;
1658 g_log<<Logger::Debug<<"Dropping query because of a filtering policy "<<makeLoginfo(dc)<<endl;
1659 return;
1660 }
1661 catch (const ImmediateServFailException &e) {
124dd1d4 1662 if(g_logCommonErrors) {
e6a9dde5 1663 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
124dd1d4 1664 }
44971ca0
PD
1665 res = RCode::ServFail;
1666 }
d4f08082
RG
1667 catch (const SendTruncatedAnswerException& e) {
1668 ret.clear();
1669 res = RCode::NoError;
1670 pw.getHeader()->tc = 1;
1671 }
1672 catch (const PolicyHitException& e) {
124dd1d4
RG
1673 res = -2;
1674 }
1921a4c2 1675 dq.validationState = sr.getValidationState();
2996400c 1676 appliedPolicy = sr.d_appliedPolicy;
b502d522 1677 dc->d_policyTags = std::move(sr.d_policyTags);
af231ceb
RG
1678 if (appliedPolicy.d_type != DNSFilterEngine::PolicyType::None && appliedPolicy.d_zoneData && appliedPolicy.d_zoneData->d_extendedErrorCode) {
1679 dc->d_extendedErrorCode = *appliedPolicy.d_zoneData->d_extendedErrorCode;
1680 dc->d_extendedErrorExtra = appliedPolicy.d_zoneData->d_extendedErrorExtra;
1681 }
1921a4c2 1682
b8470add
PL
1683 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
1684 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
b502d522
RG
1685 if (appliedPolicy.d_kind == DNSFilterEngine::PolicyKind::NoAction) {
1686 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
1687 }
1688 auto policyResult = handlePolicyHit(appliedPolicy, dc, sr, res, ret, pw);
1689 if (policyResult == PolicyResult::HaveAnswer) {
1690 goto haveAnswer;
1691 }
1692 else if (policyResult == PolicyResult::Drop) {
1693 return;
b8470add
PL
1694 }
1695 }
1696
fd870915 1697 if (t_pdl || (g_dns64Prefix && dq.qtype == QType::AAAA && !vStateIsBogus(dq.validationState))) {
ef3ee606
RG
1698 if (res == RCode::NoError) {
1699 auto i = ret.cbegin();
1700 for(; i!= ret.cend(); ++i) {
1701 if (i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER) {
1702 break;
1703 }
1704 }
1705
1706 if (i == ret.cend()) {
1707 /* no record in the answer section, NODATA */
1708 if (t_pdl && t_pdl->nodata(dq, res)) {
1709 shouldNotValidate = true;
1710 }
fd870915 1711 else if (g_dns64Prefix && dq.qtype == QType::AAAA && !vStateIsBogus(dq.validationState)) {
ef3ee606
RG
1712 res = getFakeAAAARecords(dq.qname, *g_dns64Prefix, ret);
1713 shouldNotValidate = true;
1714 }
1715 }
3ca4e735 1716
db486de5 1717 }
ef3ee606 1718 else if(res == RCode::NXDomain && t_pdl && t_pdl->nxdomain(dq, res)) {
3ca4e735 1719 shouldNotValidate = true;
ef3ee606 1720 }
db486de5 1721
ef3ee606 1722 if (t_pdl && t_pdl->postresolve(dq, res)) {
3ca4e735 1723 shouldNotValidate = true;
ef3ee606 1724 }
db486de5 1725 }
4485aa35 1726 }
d4f08082 1727
644dd1da 1728 haveAnswer:;
9cdfab64 1729 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1dc8f4d0 1730 {
85ffbc53
PD
1731 string trace(sr.getTrace());
1732 if(!trace.empty()) {
1733 vector<string> lines;
1734 boost::split(lines, trace, boost::is_any_of("\n"));
1dc8f4d0 1735 for(const string& line : lines) {
85ffbc53 1736 if(!line.empty())
e6a9dde5 1737 g_log<<Logger::Warning<< line << endl;
85ffbc53
PD
1738 }
1739 }
1740 }
3ddb9247 1741
9cdfab64 1742 if(res == -1) {
0fe1d080
PD
1743 pw.getHeader()->rcode=RCode::ServFail;
1744 // no commit here, because no record
1745 g_stats.servFails++;
1746 }
288f4aa9 1747 else {
ea634573 1748 pw.getHeader()->rcode=res;
92011b8f 1749
f3fe4ae6 1750 // Does the validation mode or query demand validation?
0c43f455 1751 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
b25cae9a 1752 try {
f3fe4ae6 1753 if(sr.doLog()) {
e6a9dde5 1754 g_log<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<endl;
2e921ec6 1755 }
4d2be65d
RG
1756
1757 auto state = sr.getValidationState();
1758
98307d0f 1759 if(state == vState::Secure) {
2e921ec6 1760 if(sr.doLog()) {
e6a9dde5 1761 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates correctly"<<endl;
2e921ec6 1762 }
b25cae9a 1763
1764 // Is the query source interested in the value of the ad-bit?
885c8881 1765 if (dc->d_mdp.d_header.ad || DNSSECOK)
b25cae9a 1766 pw.getHeader()->ad=1;
1767 }
98307d0f 1768 else if(state == vState::Insecure) {
f3fe4ae6 1769 if(sr.doLog()) {
e6a9dde5 1770 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Insecure"<<endl;
12ce523e 1771 }
b25cae9a 1772
1773 pw.getHeader()->ad=0;
f3fe4ae6 1774 }
fd870915 1775 else if (vStateIsBogus(state)) {
66f2e6ad
KM
1776 if(t_bogusremotes)
1777 t_bogusremotes->push_back(dc->d_source);
1778 if(t_bogusqueryring)
1779 t_bogusqueryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
c87e1876 1780 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
e6a9dde5 1781 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Bogus"<<endl;
b25cae9a 1782 }
1783
1784 // Does the query or validation mode sending out a SERVFAIL on validation errors?
885c8881 1785 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
b25cae9a 1786 if(sr.doLog()) {
e6a9dde5 1787 g_log<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
b25cae9a 1788 }
1789
1790 pw.getHeader()->rcode=RCode::ServFail;
1791 goto sendit;
1792 } else {
1793 if(sr.doLog()) {
e6a9dde5 1794 g_log<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
b25cae9a 1795 }
1796 }
1797 }
1798 }
124dd1d4 1799 catch(const ImmediateServFailException &e) {
b25cae9a 1800 if(g_logCommonErrors)
e6a9dde5 1801 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
b25cae9a 1802 pw.getHeader()->rcode=RCode::ServFail;
1803 goto sendit;
f3fe4ae6 1804 }
b3f0ed10 1805 }
1806
c154c8a4 1807 if(ret.size()) {
bbec1961 1808 pdns::orderAndShuffle(ret);
5cc8371b 1809 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_source)) {
20d84f77 1810 stable_sort(ret.begin(), ret.end(), *sl);
3e61e7f7 1811 variableAnswer=true;
1812 }
8e079f3a 1813 }
0afa32d4
RG
1814
1815 bool needCommit = false;
8e079f3a 1816 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
3e80ebce
KM
1817 if( ! DNSSECOK &&
1818 ( i->d_type == QType::NSEC3 ||
1819 (
1820 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1821 (
1822 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1823 i->d_place != DNSResourceRecord::ANSWER
1824 )
1825 )
1826 )
1827 ) {
2e921ec6 1828 continue;
3e80ebce
KM
1829 }
1830
70fb28d9 1831 if (!addRecordToPacket(pw, *i, minTTL, dc->d_ttlCap, maxanswersize)) {
97c6d7e5
RG
1832 needCommit = false;
1833 break;
1834 }
1835 needCommit = true;
1836
41c542ec 1837 bool udr = false;
d58249ee 1838#ifdef NOD_ENABLED
41c542ec
NC
1839 if (g_udrEnabled) {
1840 udr = udrCheckUniqueDNSRecord(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, *i);
ca2526f5
NC
1841 if (!hasUDR && udr)
1842 hasUDR = true;
41c542ec 1843 }
d58249ee 1844#endif /* NOD ENABLED */
41c542ec 1845
aa7929a3 1846#ifdef HAVE_PROTOBUF
b773359c 1847 if (t_protobufServers) {
83b261a7 1848 pbMessage.addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes, udr);
aa7929a3
RG
1849 }
1850#endif
ea634573 1851 }
0afa32d4 1852 if(needCommit)
8e079f3a 1853 pw.commit();
288f4aa9 1854 }
10321a98 1855 sendit:;
b3f0ed10 1856
f49ae8cf 1857 if (g_useIncomingECS && dc->d_ecsFound && !sr.wasVariable() && !variableAnswer) {
5a7f99b4 1858 EDNSSubnetOpts eo;
1859 eo.source = dc->d_ednssubnet.source;
1860 ComboAddress sa;
1ef18cab 1861 sa.reset();
5a7f99b4 1862 sa.sin4.sin_family = eo.source.getNetwork().sin4.sin_family;
1863 eo.scope = Netmask(sa, 0);
94e2a9b0 1864 auto ecsPayload = makeEDNSSubnetOptsString(eo);
5a7f99b4 1865
f49ae8cf
RG
1866 // if we don't have enough space available let's just not set that scope of zero,
1867 // it will prevent some caching, mostly from dnsdist, but that's fine
1868 if (pw.size() < maxanswersize && (maxanswersize - pw.size()) >= (EDNSOptionCodeSize + EDNSOptionLengthSize + ecsPayload.size())) {
94e2a9b0 1869
f49ae8cf
RG
1870 maxanswersize -= EDNSOptionCodeSize + EDNSOptionLengthSize + ecsPayload.size();
1871
1872 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::ECS, std::move(ecsPayload)));
1873 }
5a7f99b4 1874 }
1875
97c6d7e5 1876 if (haveEDNS) {
94e2a9b0 1877 auto state = sr.getValidationState();
699bc10a 1878 if (dc->d_extendedErrorCode || (s_addExtendedResolutionDNSErrors && vStateIsBogus(state))) {
94e2a9b0 1879 EDNSExtendedError::code code;
e95b2a7c 1880 std::string extra;
94e2a9b0 1881
e95b2a7c
RG
1882 if (dc->d_extendedErrorCode) {
1883 code = static_cast<EDNSExtendedError::code>(*dc->d_extendedErrorCode);
1884 extra = std::move(dc->d_extendedErrorExtra);
1885 }
1886 else {
1887 switch (state) {
1888 case vState::BogusNoValidDNSKEY:
1889 code = EDNSExtendedError::code::DNSKEYMissing;
1890 break;
1891 case vState::BogusInvalidDenial:
1892 code = EDNSExtendedError::code::NSECMissing;
1893 break;
1894 case vState::BogusUnableToGetDSs:
1895 code = EDNSExtendedError::code::DNSSECBogus;
1896 break;
1897 case vState::BogusUnableToGetDNSKEYs:
1898 code = EDNSExtendedError::code::DNSKEYMissing;
1899 break;
1900 case vState::BogusSelfSignedDS:
1901 code = EDNSExtendedError::code::DNSSECBogus;
1902 break;
1903 case vState::BogusNoRRSIG:
1904 code = EDNSExtendedError::code::RRSIGsMissing;
1905 break;
1906 case vState::BogusNoValidRRSIG:
1907 code = EDNSExtendedError::code::DNSSECBogus;
1908 break;
1909 case vState::BogusMissingNegativeIndication:
1910 code = EDNSExtendedError::code::NSECMissing;
1911 break;
1912 case vState::BogusSignatureNotYetValid:
1913 code = EDNSExtendedError::code::SignatureNotYetValid;
1914 break;
1915 case vState::BogusSignatureExpired:
1916 code = EDNSExtendedError::code::SignatureExpired;
1917 break;
1918 case vState::BogusUnsupportedDNSKEYAlgo:
1919 code = EDNSExtendedError::code::UnsupportedDNSKEYAlgorithm;
1920 break;
1921 case vState::BogusUnsupportedDSDigestType:
1922 code = EDNSExtendedError::code::UnsupportedDSDigestType;
1923 break;
1924 case vState::BogusNoZoneKeyBitSet:
1925 code = EDNSExtendedError::code::NoZoneKeyBitSet;
1926 break;
1927 case vState::BogusRevokedDNSKEY:
1928 code = EDNSExtendedError::code::DNSSECBogus;
1929 break;
1930 case vState::BogusInvalidDNSKEYProtocol:
1931 code = EDNSExtendedError::code::DNSSECBogus;
1932 break;
1933 default:
1934 throw std::runtime_error("Bogus validation state not handled: " + vStateToString(state));
1935 }
94e2a9b0
RG
1936 }
1937
1938 EDNSExtendedError eee;
1939 eee.infoCode = static_cast<uint16_t>(code);
e95b2a7c
RG
1940 eee.extraText = std::move(extra);
1941
f49ae8cf 1942 if (pw.size() < maxanswersize && (maxanswersize - pw.size()) >= (EDNSOptionCodeSize + EDNSOptionLengthSize + sizeof(eee.infoCode) + eee.extraText.size())) {
e95b2a7c
RG
1943 returnedEdnsOptions.push_back(make_pair(EDNSOptionCode::EXTENDEDERROR, makeEDNSExtendedErrorOptString(eee)));
1944 }
94e2a9b0
RG
1945 }
1946
97c6d7e5
RG
1947 /* we try to add the EDNS OPT RR even for truncated answers,
1948 as rfc6891 states:
1949 "The minimal response MUST be the DNS header, question section, and an
1950 OPT record. This MUST also occur when a truncated response (using
1951 the DNS header's TC bit) is returned."
1952 */
9b60fb71 1953 pw.addOpt(512, ednsExtRCode, DNSSECOK ? EDNSOpts::DNSSECOK : 0, returnedEdnsOptions);
1f691b94 1954 pw.commit();
97c6d7e5
RG
1955 }
1956
79332bff 1957 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
5cc8371b 1958 updateResponseStats(res, dc->d_source, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
ff4d391d
NC
1959#ifdef NOD_ENABLED
1960 bool nod = false;
1961 if (g_nodEnabled) {
3bd27e4a 1962 if (nodCheckNewDomain(dc->d_mdp.d_qname)) {
ff4d391d 1963 nod = true;
3bd27e4a 1964 }
ff4d391d
NC
1965 }
1966#endif /* NOD_ENABLED */
aa7929a3 1967#ifdef HAVE_PROTOBUF
b502d522 1968 if (t_protobufServers && !(luaconfsLocal->protobufExportConfig.taggedOnly && appliedPolicy.getName().empty() && dc->d_policyTags.empty())) {
a44a8d66 1969 // Start constructing embedded DNSResponse object
83b261a7 1970 pbMessage.setResponseCode(pw.getHeader()->rcode);
b502d522 1971 if (!appliedPolicy.getName().empty()) {
83b261a7
OM
1972 pbMessage.setAppliedPolicy(appliedPolicy.getName());
1973 pbMessage.setAppliedPolicyType(appliedPolicy.d_type);
1974 pbMessage.setAppliedPolicyTrigger(appliedPolicy.d_trigger);
1975 pbMessage.setAppliedPolicyHit(appliedPolicy.d_hit);
0a273054 1976 }
83b261a7
OM
1977 pbMessage.addPolicyTags(dc->d_policyTags);
1978 pbMessage.setInBytes(packet.size());
a44a8d66 1979
47a6825e 1980 // Take s snap of the current protobuf buffer state to store in the PC
ac10822e 1981 pbDataForCache = boost::make_optional(RecursorPacketCache::PBData{
83b261a7
OM
1982 pbMessage.getMessageBuf(),
1983 pbMessage.getResponseBuf(),
ac10822e 1984 !appliedPolicy.getName().empty() || !dc->d_policyTags.empty()});
47a6825e 1985#ifdef NOD_ENABLED
d78bcadc 1986 // if (g_udrEnabled) ??
83b261a7 1987 pbMessage.clearUDR(pbDataForCache->d_response);
47a6825e 1988#endif
ac10822e 1989 // Below are the fields that are not stored in the packet cache and will be appended here and on a cache hit
c29d820c 1990 if (g_useKernelTimestamp && dc->d_kernelTimestamp.tv_sec) {
83b261a7 1991 pbMessage.setQueryTime(dc->d_kernelTimestamp.tv_sec, dc->d_kernelTimestamp.tv_usec);
c29d820c
RG
1992 }
1993 else {
83b261a7 1994 pbMessage.setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
c29d820c 1995 }
83b261a7
OM
1996 pbMessage.setMessageIdentity(dc->d_uuid);
1997 pbMessage.setSocketFamily(dc->d_source.sin4.sin_family);
1998 pbMessage.setSocketProtocol(dc->d_tcp);
a44a8d66
OM
1999 Netmask requestorNM(dc->d_source, dc->d_source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
2000 ComboAddress requestor = requestorNM.getMaskedNetwork();
83b261a7
OM
2001 pbMessage.setFrom(requestor);
2002 pbMessage.setTo(dc->d_destination);
2003 pbMessage.setId(dc->d_mdp.d_header.id);
2004
2005 pbMessage.setTime();
2006 pbMessage.setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIPv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
2007 pbMessage.setRequestorId(dq.requestorId);
2008 pbMessage.setDeviceId(dq.deviceId);
2009 pbMessage.setDeviceName(dq.deviceName);
2010 pbMessage.setFromPort(dc->d_source.getPort());
2011 pbMessage.setToPort(dc->d_destination.getPort());
41c542ec
NC
2012#ifdef NOD_ENABLED
2013 if (g_nodEnabled) {
ca2526f5 2014 if (nod) {
83b261a7
OM
2015 pbMessage.setNewlyObservedDomain(true);
2016 pbMessage.addPolicyTag(g_nod_pbtag);
ca2526f5
NC
2017 }
2018 if (hasUDR) {
83b261a7 2019 pbMessage.addPolicyTag(g_udr_pbtag);
ca2526f5 2020 }
41c542ec
NC
2021 }
2022#endif /* NOD_ENABLED */
406b722e 2023 if (dc->d_logResponse) {
83b261a7 2024 protobufLogResponse(pbMessage);
406b722e 2025 }
aa7929a3 2026 }
47a6825e 2027#endif /* HAVE_PROTOBUF */
ea634573 2028 if(!dc->d_tcp) {
b71b60ee 2029 struct msghdr msgh;
2030 struct iovec iov;
7bec330a
OM
2031 cmsgbuf_aligned cbuf;
2032 fillMSGHdr(&msgh, &iov, &cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
2c0af54f
PD
2033 msgh.msg_control=NULL;
2034
cbc03320 2035 if(g_fromtosockets.count(dc->d_socket)) {
4272d071 2036 addCMsgSrcAddr(&msgh, &cbuf, &dc->d_local, 0);
2c0af54f 2037 }
2ea1d2c0
OM
2038 int sendErr = sendOnNBSocket(dc->d_socket, &msgh);
2039 if (sendErr && g_logCommonErrors) {
a2a81d42 2040 g_log << Logger::Warning << "Sending UDP reply to client " << dc->getRemote() << " failed with: "
2ea1d2c0 2041 << strerror(sendErr) << endl;
a2a81d42 2042 }
70fb28d9 2043
49dc532e 2044 if(variableAnswer || sr.wasVariable()) {
1ef18cab 2045 g_stats.variableResponses++;
49dc532e 2046 }
3762e821 2047 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
b5e675a7 2048 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, std::move(dc->d_query), dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
76e2b9e3 2049 string((const char*)&*packet.begin(), packet.size()),
3ddb9247 2050 g_now.tv_sec,
76e2b9e3 2051 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
d9d3f9c1 2052 min(minTTL,SyncRes::s_packetcachettl),
88694a6a 2053 dq.validationState,
9e6128d0 2054 std::move(pbDataForCache));
1051f8a9 2055 }
feccc9fc 2056 }
9c495589 2057 else {
28647bcf 2058 bool hadError = sendResponseOverTCP(dc, packet);
feccc9fc 2059
b5b94beb 2060 // update tcp connection status, closing if needed and doing the fd multiplexer accounting
c51c551e
OM
2061 if (dc->d_tcpConnection->d_requestsInFlight > 0) {
2062 dc->d_tcpConnection->d_requestsInFlight--;
2063 }
3ddb9247 2064
b5b94beb
OM
2065 // In the code below, we try to remove the fd from the set, but
2066 // we don't know if another mthread already did the remove, so we can get a
2067 // "Tried to remove unlisted fd" exception. Not that an inflight < limit test
2068 // will not work since we do not know if the other mthread got an error or not.
09e6702a 2069 if(hadError) {
67552030 2070 terminateTCPConnection(dc->d_socket);
c36bc97a 2071 dc->d_socket = -1;
09e6702a 2072 }
a6ae6414 2073 else {
fde296a3
RG
2074 dc->d_tcpConnection->queriesCount++;
2075 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
b5b94beb
OM
2076 try {
2077 t_fdm->removeReadFD(dc->d_socket);
2078 }
2079 catch (FDMultiplexerException &) {
2080 }
fde296a3
RG
2081 dc->d_socket = -1;
2082 }
2083 else {
fde296a3 2084 Utility::gettimeofday(&g_now, 0); // needs to be updated
27ae2e3c 2085 struct timeval ttd = g_now;
c51c551e
OM
2086 // If we cross from max to max-1 in flight requests, the fd was not listened to, add it back
2087 if (dc->d_tcpConnection->d_requestsInFlight == TCPConnection::s_maxInFlight - 1) {
3cabb750
OM
2088 // A read error might have happened. If we add the fd back, it will most likely error again.
2089 // This is not a big issue, the next handleTCPClientReadable() will see another read error
2090 // and take action.
d5c6ec95
OM
2091 ttd.tv_sec += g_tcpTimeout;
2092 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection, &ttd);
2093 } else {
b05ef1b4 2094 // fd might have been removed by read error code, or a read timeout, so expect an exception
3cabb750
OM
2095 try {
2096 t_fdm->setReadTTD(dc->d_socket, ttd, g_tcpTimeout);
2097 }
b05ef1b4 2098 catch (const FDMultiplexerException &) {
ebcde9cb 2099 // but if the FD was removed because of a timeout while we were sending a response,
b05ef1b4
RG
2100 // we need to re-arm it. If it was an error it will error again.
2101 ttd.tv_sec += g_tcpTimeout;
2102 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection, &ttd);
3cabb750 2103 }
d5c6ec95 2104 }
fde296a3 2105 }
0e9d9ce2 2106 }
9c495589 2107 }
3bd27e4a 2108
2c9119cd 2109 float spent=makeFloat(sr.getNow()-dc->d_now);
3bd27e4a 2110 if (!g_quiet) {
e6a9dde5
PL
2111 g_log<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
2112 g_log<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
2c9119cd 2113 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
2114 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
2115
2116 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
5294e503 2117 g_log<< ", dnssec="<<sr.getValidationState();
2c9119cd 2118 }
e6a9dde5 2119 g_log<<endl;
c75a6a9e 2120 }
b23b8614 2121
f7b8cffa 2122 if (sr.d_outqueries || sr.d_authzonequeries) {
ccfadb6c 2123 g_recCache->cacheMisses++;
f7b8cffa
RG
2124 }
2125 else {
ccfadb6c 2126 g_recCache->cacheHits++;
f7b8cffa 2127 }
2c9119cd 2128
fe213470
BH
2129 if(spent < 0.001)
2130 g_stats.answers0_1++;
2131 else if(spent < 0.010)
2132 g_stats.answers1_10++;
2133 else if(spent < 0.1)
2134 g_stats.answers10_100++;
2135 else if(spent < 1.0)
2136 g_stats.answers100_1000++;
2137 else
2138 g_stats.answersSlow++;
2139
574af7ea 2140 uint64_t newLat=(uint64_t)(spent*1000000);
b841314c 2141 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
08f3f638 2142 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
0a6b1027 2143 // no worries, we do this for packet cache hits elsewhere
19178da9 2144
2145 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
2146 if(ourtime < 1)
2147 g_stats.ourtime0_1++;
2148 else if(ourtime < 2)
2149 g_stats.ourtime1_2++;
2150 else if(ourtime < 4)
2151 g_stats.ourtime2_4++;
2152 else if(ourtime < 8)
2153 g_stats.ourtime4_8++;
2154 else if(ourtime < 16)
2155 g_stats.ourtime8_16++;
2156 else if(ourtime < 32)
2157 g_stats.ourtime16_32++;
2158 else {
2159 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
2160 g_stats.ourtimeSlow++;
2161 }
042da1a1 2162 if(ourtime >= 0.0) {
2163 newLat=ourtime*1000; // usec
2164 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
2165 }
3bd27e4a
RG
2166
2167#ifdef NOD_ENABLED
2168 if (nod) {
2169 sendNODLookup(dc->d_mdp.d_qname);
2170 }
2171#endif /* NOD_ENABLED */
2172
c6d04bdc 2173 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
288f4aa9 2174 }
d4f08082 2175 catch (const PDNSException &ae) {
e6a9dde5 2176 g_log<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
288f4aa9 2177 }
d4f08082 2178 catch (const MOADNSException &mde) {
16ce7f18 2179 g_log<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<mde.what()<<endl;
7b1469bb 2180 }
d4f08082 2181 catch (const std::exception& e) {
e6a9dde5 2182 g_log<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
068c7634
PD
2183
2184 // Luawrapper nests the exception from Lua, so we unnest it here
2185 try {
2186 std::rethrow_if_nested(e);
2010ac95 2187 } catch(const std::exception& ne) {
e6a9dde5 2188 g_log<<". Extra info: "<<ne.what();
068c7634
PD
2189 } catch(...) {}
2190
e6a9dde5 2191 g_log<<endl;
c154c8a4 2192 }
288f4aa9 2193 catch(...) {
e6a9dde5 2194 g_log<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
288f4aa9 2195 }
3ddb9247 2196
ec6eacbc 2197 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
288f4aa9
BH
2198}
2199
d187038c 2200static void makeControlChannelSocket(int processNum=-1)
1d5b3ce6 2201{
2d733c0f 2202 string sockname=::arg()["socket-dir"]+"/"+s_programname;
677e2a46 2203 if(processNum >= 0)
335da0ba 2204 sockname += "."+std::to_string(processNum);
677e2a46 2205 sockname+=".controlsocket";
41f7a068 2206 s_rcc.listen(sockname);
3ddb9247 2207
387de317
BH
2208 int sockowner = -1;
2209 int sockgroup = -1;
2210
2211 if (!::arg().isEmpty("socket-group"))
2212 sockgroup=::arg().asGid("socket-group");
2213 if (!::arg().isEmpty("socket-owner"))
2214 sockowner=::arg().asUid("socket-owner");
3ddb9247 2215
f838ad8d
BH
2216 if (sockgroup > -1 || sockowner > -1) {
2217 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
2218 unixDie("Failed to chown control socket");
2219 }
2220 }
387de317
BH
2221
2222 // do mode change if socket-mode is given
2223 if(!::arg().isEmpty("socket-mode")) {
2224 mode_t sockmode=::arg().asMode("socket-mode");
34c513f9
RG
2225 if(chmod(sockname.c_str(), sockmode) < 0) {
2226 unixDie("Failed to chmod control socket");
2227 }
387de317 2228 }
1d5b3ce6
BH
2229}
2230
5cc8371b 2231static void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass,
29e6303a 2232 bool& foundECS, EDNSSubnetOpts* ednssubnet, EDNSOptionViewMap* options,
5cc8371b 2233 bool& foundXPF, ComboAddress* xpfSource, ComboAddress* xpfDest)
02b47f43 2234{
59cb4a79 2235 const bool lookForXPF = xpfSource != nullptr && g_xpfRRCode != 0;
5cc8371b
RG
2236 const bool lookForECS = ednssubnet != nullptr;
2237 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(question.c_str());
02b47f43
RG
2238 size_t questionLen = question.length();
2239 unsigned int consumed=0;
2240 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
2241
2242 size_t pos= sizeof(dnsheader)+consumed+4;
5cc8371b
RG
2243 const size_t headerSize = /* root */ 1 + sizeof(dnsrecordheader);
2244 const uint16_t arcount = ntohs(dh->arcount);
2245
2246 for (uint16_t arpos = 0; arpos < arcount && questionLen > (pos + headerSize) && ((lookForECS && !foundECS) || (lookForXPF && !foundXPF)); arpos++) {
2247 if (question.at(pos) != 0) {
2248 /* not an OPT or a XPF, bye. */
2249 return;
2250 }
2251
2252 pos += 1;
2253 const dnsrecordheader* drh = reinterpret_cast<const dnsrecordheader*>(&question.at(pos));
2254 pos += sizeof(dnsrecordheader);
2255
2256 if (pos >= questionLen) {
2257 return;
2258 }
2259
02b47f43 2260 /* OPT root label (1) followed by type (2) */
5cc8371b 2261 if(lookForECS && ntohs(drh->d_type) == QType::OPT) {
00b8cadc
RG
2262 if (!options) {
2263 char* ecsStart = nullptr;
2264 size_t ecsLen = 0;
5cc8371b
RG
2265 /* we need to pass the record len */
2266 int res = getEDNSOption(const_cast<char*>(reinterpret_cast<const char*>(&question.at(pos - sizeof(drh->d_clen)))), questionLen - pos + sizeof(drh->d_clen), EDNSOptionCode::ECS, &ecsStart, &ecsLen);
00b8cadc
RG
2267 if (res == 0 && ecsLen > 4) {
2268 EDNSSubnetOpts eso;
2269 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
2270 *ednssubnet=eso;
5cc8371b 2271 foundECS = true;
00b8cadc
RG
2272 }
2273 }
2274 }
2275 else {
5cc8371b
RG
2276 /* we need to pass the record len */
2277 int res = getEDNSOptions(reinterpret_cast<const char*>(&question.at(pos -sizeof(drh->d_clen))), questionLen - pos + (sizeof(drh->d_clen)), *options);
00b8cadc
RG
2278 if (res == 0) {
2279 const auto& it = options->find(EDNSOptionCode::ECS);
29e6303a 2280 if (it != options->end() && !it->second.values.empty() && it->second.values.at(0).content != nullptr && it->second.values.at(0).size > 0) {
00b8cadc 2281 EDNSSubnetOpts eso;
29e6303a 2282 if(getEDNSSubnetOptsFromString(it->second.values.at(0).content, it->second.values.at(0).size, &eso)) {
00b8cadc 2283 *ednssubnet=eso;
5cc8371b 2284 foundECS = true;
00b8cadc
RG
2285 }
2286 }
02b47f43
RG
2287 }
2288 }
2289 }
59cb4a79 2290 else if (lookForXPF && ntohs(drh->d_type) == g_xpfRRCode && ntohs(drh->d_class) == QClass::IN && drh->d_ttl == 0) {
5cc8371b
RG
2291 if ((questionLen - pos) < ntohs(drh->d_clen)) {
2292 return;
2293 }
2294
2295 foundXPF = parseXPFPayload(reinterpret_cast<const char*>(&question.at(pos)), ntohs(drh->d_clen), *xpfSource, xpfDest);
2296 }
2297
2298 pos += ntohs(drh->d_clen);
02b47f43
RG
2299 }
2300}
2301
5216ddcc
RG
2302static bool handleTCPReadResult(int fd, ssize_t bytes)
2303{
2304 if (bytes == 0) {
2305 /* EOF */
67552030 2306 terminateTCPConnection(fd);
5216ddcc
RG
2307 return false;
2308 }
2309 else if (bytes < 0) {
2310 if (errno != EAGAIN && errno != EWOULDBLOCK) {
67552030 2311 terminateTCPConnection(fd);
5216ddcc
RG
2312 return false;
2313 }
2314 }
2315
2316 return true;
2317}
2318
d187038c 2319static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2320{
dc593046 2321 shared_ptr<TCPConnection> conn=boost::any_cast<shared_ptr<TCPConnection> >(var);
c038218b 2322
5216ddcc
RG
2323 if (conn->state == TCPConnection::PROXYPROTOCOLHEADER) {
2324 ssize_t bytes = recv(conn->getFD(), &conn->data.at(conn->proxyProtocolGot), conn->proxyProtocolNeed, 0);
2325 if (bytes <= 0) {
2326 handleTCPReadResult(fd, bytes);
2327 return;
2328 }
2329
2330 conn->proxyProtocolGot += bytes;
2331 conn->data.resize(conn->proxyProtocolGot);
2332 ssize_t remaining = isProxyHeaderComplete(conn->data);
2333 if (remaining == 0) {
3bdc4508
RG
2334 if (g_logCommonErrors) {
2335 g_log<<Logger::Error<<"Unable to consume proxy protocol header in packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
2336 }
5216ddcc 2337 ++g_stats.proxyProtocolInvalidCount;
67552030 2338 terminateTCPConnection(fd);
5216ddcc
RG
2339 return;
2340 }
2341 else if (remaining < 0) {
2342 conn->proxyProtocolNeed = -remaining;
2343 conn->data.resize(conn->proxyProtocolGot + conn->proxyProtocolNeed);
2344 return;
2345 }
2346 else {
2347 /* proxy header received */
2348 /* we ignore the TCP field for now, but we could properly set whether
0dd02171 2349 the connection was received over UDP or TCP if needed */
5216ddcc 2350 bool tcp;
8c73c703 2351 bool proxy = false;
95f851d6
RG
2352 size_t used = parseProxyHeader(conn->data, proxy, conn->d_source, conn->d_destination, tcp, conn->proxyProtocolValues);
2353 if (used <= 0) {
3bdc4508
RG
2354 if (g_logCommonErrors) {
2355 g_log<<Logger::Error<<"Unable to parse proxy protocol header in packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
2356 }
2357 ++g_stats.proxyProtocolInvalidCount;
67552030 2358 terminateTCPConnection(fd);
5216ddcc
RG
2359 return;
2360 }
95f851d6
RG
2361 else if (static_cast<size_t>(used) > g_proxyProtocolMaximumSize) {
2362 if (g_logCommonErrors) {
2363 g_log<<Logger::Error<<"Proxy protocol header in packet from TCP client "<< conn->d_remote.toStringWithPort() << " is larger than proxy-protocol-maximum-size (" << used << "), dropping"<< endl;
2364 }
2365 ++g_stats.proxyProtocolInvalidCount;
67552030 2366 terminateTCPConnection(fd);
95f851d6
RG
2367 return;
2368 }
38d8b937 2369
a4888b73
RG
2370 /* Now that we have retrieved the address of the client, as advertised by the proxy
2371 via the proxy protocol header, check that it is allowed by our ACL */
8c73c703 2372 /* note that if the proxy header used a 'LOCAL' command, the original source and destination are untouched so everything should be fine */
38d8b937
RG
2373 if (t_allowFrom && !t_allowFrom->match(&conn->d_source)) {
2374 if (!g_quiet) {
2375 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<conn->d_source.toString()<<", address not matched by allow-from"<<endl;
2376 }
2377
2378 ++g_stats.unauthorizedTCP;
67552030 2379 terminateTCPConnection(fd);
38d8b937
RG
2380 return;
2381 }
2382
5216ddcc
RG
2383 conn->data.resize(2);
2384 conn->state = TCPConnection::BYTE0;
2385 }
2386 }
2387
2388 if (conn->state==TCPConnection::BYTE0) {
2749c3fe 2389 ssize_t bytes=recv(conn->getFD(), &conn->data[0], 2, 0);
09e6702a 2390 if(bytes==1)
667f7e60 2391 conn->state=TCPConnection::BYTE1;
3ddb9247 2392 if(bytes==2) {
a0aa4f64 2393 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2749c3fe 2394 conn->data.resize(conn->qlen);
667f7e60
BH
2395 conn->bytesread=0;
2396 conn->state=TCPConnection::GETQUESTION;
09e6702a 2397 }
a4888b73 2398 if (bytes <= 0) {
5216ddcc 2399 handleTCPReadResult(fd, bytes);
09e6702a
BH
2400 return;
2401 }
2402 }
5216ddcc
RG
2403
2404 if (conn->state==TCPConnection::BYTE1) {
2749c3fe 2405 ssize_t bytes=recv(conn->getFD(), &conn->data[1], 1, 0);
09e6702a 2406 if(bytes==1) {
667f7e60 2407 conn->state=TCPConnection::GETQUESTION;
a0aa4f64 2408 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
2749c3fe 2409 conn->data.resize(conn->qlen);
667f7e60 2410 conn->bytesread=0;
09e6702a 2411 }
a4888b73 2412 if (bytes <= 0) {
5216ddcc
RG
2413 if (!handleTCPReadResult(fd, bytes)) {
2414 if(g_logCommonErrors) {
2415 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected after first byte"<<endl;
2416 }
2417 }
09e6702a
BH
2418 return;
2419 }
2420 }
5216ddcc
RG
2421
2422 if(conn->state==TCPConnection::GETQUESTION) {
2749c3fe 2423 ssize_t bytes=recv(conn->getFD(), &conn->data[conn->bytesread], conn->qlen - conn->bytesread, 0);
5216ddcc
RG
2424 if (bytes <= 0) {
2425 if (!handleTCPReadResult(fd, bytes)) {
2426 if(g_logCommonErrors) {
2427 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected while reading question body"<<endl;
2428 }
2429 }
2430 return;
2431 }
2432 else if (bytes > std::numeric_limits<std::uint16_t>::max()) {
c0f9be19 2433 if(g_logCommonErrors) {
5216ddcc 2434 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" sent an invalid question size while reading question body"<<endl;
c0f9be19 2435 }
67552030 2436 terminateTCPConnection(fd);
09e6702a
BH
2437 return;
2438 }
b841314c 2439 conn->bytesread+=(uint16_t)bytes;
667f7e60 2440 if(conn->bytesread==conn->qlen) {
87ff2287 2441 conn->state = TCPConnection::BYTE0;
9a864da4 2442 std::unique_ptr<DNSComboWriter> dc;
09e6702a 2443 try {
9a864da4 2444 dc=std::unique_ptr<DNSComboWriter>(new DNSComboWriter(conn->data, g_now));
09e6702a 2445 }
16ce7f18 2446 catch(const MOADNSException &mde) {
3ddb9247 2447 g_stats.clientParseError++;
67552030 2448 if (g_logCommonErrors) {
e6a9dde5 2449 g_log<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
67552030
RG
2450 }
2451 terminateTCPConnection(fd);
4957a608 2452 return;
09e6702a 2453 }
cd989c87
BH
2454 dc->d_tcpConnection = conn; // carry the torch
2455 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
09e6702a 2456 dc->d_tcp=true;
5cc8371b 2457 dc->setRemote(conn->d_remote);
5216ddcc 2458 dc->setSource(conn->d_source);
a6147cd2 2459 ComboAddress dest;
d38e2ba9 2460 dest.reset();
a6147cd2 2461 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
2462 socklen_t len = dest.getSocklen();
2463 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
2464 dc->setLocal(dest);
5216ddcc 2465 dc->setDestination(conn->d_destination);
f00de7d2
RG
2466 /* we can't move this if we want to be able to access the values in
2467 all queries sent over this connection */
2468 dc->d_proxyProtocolValues = conn->proxyProtocolValues;
33dcceba
RG
2469 DNSName qname;
2470 uint16_t qtype=0;
2471 uint16_t qclass=0;
2472 bool needECS = false;
5cc8371b 2473 bool needXPF = g_XPFAcl.match(conn->d_remote);
67e31ebe 2474 string requestorId;
590388d2 2475 string deviceId;
0a6a45c8 2476 string deviceName;
16bbc6e3 2477 bool logQuery = false;
aa7929a3 2478#ifdef HAVE_PROTOBUF
02b47f43 2479 auto luaconfsLocal = g_luaconfs.getLocal();
63341e8d 2480 if (checkProtobufExport(luaconfsLocal)) {
33dcceba
RG
2481 needECS = true;
2482 }
b773359c 2483 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
406b722e 2484 dc->d_logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
b9fa43e0
OM
2485#endif /* HAVE_PROTOBUF */
2486
2487#ifdef HAVE_FSTRM
2488 checkFrameStreamExport(luaconfsLocal);
33dcceba
RG
2489#endif
2490
70fb28d9 2491 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag_ffi || t_pdl->d_gettag))) {
33dcceba
RG
2492
2493 try {
29e6303a 2494 EDNSOptionViewMap ednsOptions;
5cc8371b 2495 bool xpfFound = false;
b40562da 2496 dc->d_ecsParsed = true;
5cc8371b 2497 dc->d_ecsFound = false;
2749c3fe 2498 getQNameAndSubnet(conn->data, &qname, &qtype, &qclass,
5cc8371b
RG
2499 dc->d_ecsFound, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2500 xpfFound, needXPF ? &dc->d_source : nullptr, needXPF ? &dc->d_destination : nullptr);
02b47f43 2501
70fb28d9 2502 if(t_pdl) {
33dcceba 2503 try {
70fb28d9 2504 if (t_pdl->d_gettag_ffi) {
cca6d6a8
RG
2505 RecursorLua4::FFIParams params(qname, qtype, dc->d_destination, dc->d_source, dc->d_ednssubnet.source, dc->d_data, dc->d_policyTags, dc->d_records, ednsOptions, dc->d_proxyProtocolValues, requestorId, deviceId, deviceName, dc->d_routingTag, dc->d_rcode, dc->d_ttlCap, dc->d_variable, true, logQuery, dc->d_logResponse, dc->d_followCNAMERecords, dc->d_extendedErrorCode, dc->d_extendedErrorExtra);
2506 dc->d_tag = t_pdl->gettag_ffi(params);
70fb28d9
RG
2507 }
2508 else if (t_pdl->d_gettag) {
163ed916 2509 dc->d_tag = t_pdl->gettag(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, deviceName, dc->d_routingTag, dc->d_proxyProtocolValues);
70fb28d9 2510 }
33dcceba 2511 }
70fb28d9 2512 catch(const std::exception& e) {
67552030 2513 if(g_logCommonErrors) {
e6a9dde5 2514 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
67552030 2515 }
33dcceba
RG
2516 }
2517 }
2518 }
70fb28d9 2519 catch(const std::exception& e)
33dcceba 2520 {
67552030 2521 if (g_logCommonErrors) {
e6a9dde5 2522 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
67552030 2523 }
33dcceba
RG
2524 }
2525 }
f52177c3
RG
2526
2527 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(&conn->data[0]);
2528
33dcceba 2529#ifdef HAVE_PROTOBUF
b773359c 2530 if(t_protobufServers || t_outgoingProtobufServers) {
67e31ebe 2531 dc->d_requestorId = requestorId;
590388d2 2532 dc->d_deviceId = deviceId;
0a6a45c8 2533 dc->d_deviceName = deviceName;
d61aa945 2534 dc->d_uuid = getUniqueID();
4898a348 2535 }
02b47f43 2536
b773359c 2537 if(t_protobufServers) {
02b47f43 2538 try {
02b47f43 2539
845cbf4c 2540 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && dc->d_policyTags.empty())) {
0a6a45c8 2541 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, dc->d_source, dc->d_destination, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId, dc->d_deviceName);
b790ef3d 2542 }
02b47f43 2543 }
67552030
RG
2544 catch (const std::exception& e) {
2545 if (g_logCommonErrors) {
e6a9dde5 2546 g_log<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
67552030 2547 }
02b47f43
RG
2548 }
2549 }
aa7929a3 2550#endif
67552030
RG
2551 if (t_pdl) {
2552 if (t_pdl->ipfilter(dc->d_source, dc->d_destination, *dh)) {
2553 if (!g_quiet) {
5034517a 2554 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED TCP question from "<<dc->d_source.toStringWithPort()<<(dc->d_source != dc->d_remote ? " (via "+dc->d_remote.toStringWithPort()+")" : "")<<" based on policy"<<endl;
67552030 2555 }
5034517a 2556 g_stats.policyDrops++;
67552030 2557 terminateTCPConnection(fd);
5034517a
RG
2558 return;
2559 }
2560 }
2561
67552030 2562 if (dc->d_mdp.d_header.qr) {
048f5db6 2563 g_stats.ignoredCount++;
67552030 2564 if (g_logCommonErrors) {
c0f9be19
RG
2565 g_log<<Logger::Error<<"Ignoring answer from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
2566 }
67552030 2567 terminateTCPConnection(fd);
4957a608 2568 return;
879b3f70 2569 }
67552030 2570 if (dc->d_mdp.d_header.opcode) {
048f5db6 2571 g_stats.ignoredCount++;
67552030 2572 if (g_logCommonErrors) {
c0f9be19
RG
2573 g_log<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
2574 }
28647bcf 2575 sendErrorOverTCP(dc, RCode::NotImp);
c0f9be19
RG
2576 return;
2577 }
2578 else if (dh->qdcount == 0) {
2579 g_stats.emptyQueriesCount++;
67552030 2580 if (g_logCommonErrors) {
c0f9be19
RG
2581 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<< dc->getRemote() <<" on server socket!"<<endl;
2582 }
28647bcf 2583 sendErrorOverTCP(dc, RCode::NotImp);
3abcdab2
PD
2584 return;
2585 }
09e6702a 2586 else {
4957a608
BH
2587 ++g_stats.qcounter;
2588 ++g_stats.tcpqcounter;
87ff2287 2589 ++conn->d_requestsInFlight;
c51c551e 2590 if (conn->d_requestsInFlight >= TCPConnection::s_maxInFlight) {
87ff2287
OM
2591 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
2592 } else {
2593 Utility::gettimeofday(&g_now, 0); // needed?
2594 struct timeval ttd = g_now;
2595 t_fdm->setReadTTD(fd, ttd, g_tcpTimeout);
2596 }
d5c6ec95 2597 MT->makeThread(startDoResolve, dc.release()); // deletes dc
4957a608 2598 return;
09e6702a
BH
2599 }
2600 }
2601 }
2602}
2603
5216ddcc
RG
2604static bool expectProxyProtocol(const ComboAddress& from)
2605{
2606 return g_proxyProtocolACL.match(from);
2607}
2608
6dcd28c3 2609//! Handle new incoming TCP connection
d187038c 2610static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
09e6702a 2611{
37d3f960 2612 ComboAddress addr;
09e6702a 2613 socklen_t addrlen=sizeof(addr);
a683e8bd 2614 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
b841314c 2615 if(newsock>=0) {
85c32340
BH
2616 if(MT->numProcesses() > g_maxMThreads) {
2617 g_stats.overCapacityDrops++;
a7b68ae7
RG
2618 try {
2619 closesocket(newsock);
2620 }
2621 catch(const PDNSException& e) {
e6a9dde5 2622 g_log<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
a7b68ae7 2623 }
85c32340
BH
2624 return;
2625 }
2626
38d8b937 2627 if(t_remotes) {
92011b8f 2628 t_remotes->push_back(addr);
38d8b937
RG
2629 }
2630
2631 bool fromProxyProtocolSource = expectProxyProtocol(addr);
2632 if(t_allowFrom && !t_allowFrom->match(&addr) && !fromProxyProtocolSource) {
3ddb9247 2633 if(!g_quiet)
38d8b937 2634 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address neither matched by allow-from nor proxy-protocol-from"<<endl;
2914b022 2635
09e6702a 2636 g_stats.unauthorizedTCP++;
a7b68ae7
RG
2637 try {
2638 closesocket(newsock);
2639 }
2640 catch(const PDNSException& e) {
e6a9dde5 2641 g_log<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
a7b68ae7 2642 }
09e6702a
BH
2643 return;
2644 }
38d8b937 2645
bd0289fc 2646 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
09e6702a 2647 g_stats.tcpClientOverflow++;
a7b68ae7
RG
2648 try {
2649 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
2650 }
2651 catch(const PDNSException& e) {
e6a9dde5 2652 g_log<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
a7b68ae7 2653 }
09e6702a
BH
2654 return;
2655 }
3ddb9247 2656
3897b9e1 2657 setNonBlocking(newsock);
f26bf547 2658 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
5216ddcc
RG
2659 tc->d_source = addr;
2660 tc->d_destination.reset();
2661 tc->d_destination.sin4.sin_family = addr.sin4.sin_family;
2662 socklen_t len = tc->d_destination.getSocklen();
2663 getsockname(tc->getFD(), reinterpret_cast<sockaddr*>(&tc->d_destination), &len); // if this fails, we're ok with it
2664
38d8b937 2665 if (fromProxyProtocolSource) {
5216ddcc
RG
2666 tc->proxyProtocolNeed = s_proxyProtocolMinimumHeaderSize;
2667 tc->data.resize(tc->proxyProtocolNeed);
2668 tc->state = TCPConnection::PROXYPROTOCOLHEADER;
2669 }
2670 else {
2671 tc->state = TCPConnection::BYTE0;
2672 }
3ddb9247 2673
27ae2e3c
RG
2674 struct timeval ttd;
2675 Utility::gettimeofday(&ttd, 0);
2676 ttd.tv_sec += g_tcpTimeout;
c038218b 2677
27ae2e3c 2678 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc, &ttd);
09e6702a
BH
2679 }
2680}
3ddb9247 2681
5216ddcc 2682static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, ComboAddress source, ComboAddress destination, struct timeval tv, int fd, std::vector<ProxyProtocolValue>& proxyProtocolValues)
1bc3c142 2683{
183eb877 2684 gettimeofday(&g_now, 0);
c29d820c
RG
2685 if (tv.tv_sec) {
2686 struct timeval diff = g_now - tv;
2687 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
183eb877 2688
c29d820c
RG
2689 if(delta > 1000.0) {
2690 g_stats.tooOldDrops++;
2691 return nullptr;
2692 }
b71b60ee 2693 }
2694
1bc3c142 2695 ++g_stats.qcounter;
d7f10541
BH
2696 if(fromaddr.sin4.sin_family==AF_INET6)
2697 g_stats.ipv6qcounter++;
1bc3c142
BH
2698
2699 string response;
93f0da94 2700 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
49a3500d 2701 unsigned int ctag=0;
f57486f1 2702 uint32_t qhash = 0;
12aff2e5 2703 bool needECS = false;
5cc8371b 2704 bool needXPF = g_XPFAcl.match(fromaddr);
b502d522 2705 std::unordered_set<std::string> policyTags;
5fd2577f 2706 LuaContext::LuaObject data;
67e31ebe 2707 string requestorId;
590388d2 2708 string deviceId;
0a6a45c8 2709 string deviceName;
163ed916 2710 string routingTag;
16bbc6e3 2711 bool logQuery = false;
406b722e 2712 bool logResponse = false;
12aff2e5 2713#ifdef HAVE_PROTOBUF
02b47f43 2714 boost::uuids::uuid uniqueId;
02b47f43 2715 auto luaconfsLocal = g_luaconfs.getLocal();
63341e8d 2716 if (checkProtobufExport(luaconfsLocal)) {
d61aa945 2717 uniqueId = getUniqueID();
02b47f43 2718 needECS = true;
63341e8d 2719 } else if (checkOutgoingProtobufExport(luaconfsLocal)) {
d61aa945 2720 uniqueId = getUniqueID();
02b47f43 2721 }
b773359c 2722 logQuery = t_protobufServers && luaconfsLocal->protobufExportConfig.logQueries;
406b722e 2723 logResponse = t_protobufServers && luaconfsLocal->protobufExportConfig.logResponses;
b9fa43e0
OM
2724#endif
2725#ifdef HAVE_FSTRM
2726 checkFrameStreamExport(luaconfsLocal);
12aff2e5 2727#endif
b40562da
RG
2728 EDNSSubnetOpts ednssubnet;
2729 bool ecsFound = false;
2730 bool ecsParsed = false;
37a919d4 2731 std::vector<DNSRecord> records;
e95b2a7c 2732 std::string extendedErrorExtra;
37a919d4 2733 boost::optional<int> rcode = boost::none;
e95b2a7c 2734 boost::optional<uint16_t> extendedErrorCode{boost::none};
70fb28d9
RG
2735 uint32_t ttlCap = std::numeric_limits<uint32_t>::max();
2736 bool variable = false;
37a919d4 2737 bool followCNAMEs = false;
1bc3c142 2738 try {
02b47f43
RG
2739 DNSName qname;
2740 uint16_t qtype=0;
2741 uint16_t qclass=0;
1bc3c142 2742 uint32_t age;
c15ff3df 2743 bool qnameParsed=false;
8f7473d7 2744#ifdef MALLOC_TRACE
2745 /*
2746 static uint64_t last=0;
2747 if(!last)
2748 g_mtracer->clearAllocators();
2749 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
2750 last=g_mtracer->getAllocs();
2751 cout<<g_mtracer->topAllocatorsString()<<endl;
2752 g_mtracer->clearAllocators();
2753 */
2754#endif
55a1378f 2755
70fb28d9 2756 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag || t_pdl->d_gettag_ffi))) {
b2eacd67 2757 try {
29e6303a 2758 EDNSOptionViewMap ednsOptions;
5cc8371b
RG
2759 bool xpfFound = false;
2760
2761 ecsFound = false;
2762
2763 getQNameAndSubnet(question, &qname, &qtype, &qclass,
2764 ecsFound, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
2765 xpfFound, needXPF ? &source : nullptr, needXPF ? &destination : nullptr);
2766
c15ff3df
RG
2767 qnameParsed = true;
2768 ecsParsed = true;
12aff2e5 2769
70fb28d9 2770 if(t_pdl) {
12aff2e5 2771 try {
70fb28d9 2772 if (t_pdl->d_gettag_ffi) {
cca6d6a8
RG
2773 RecursorLua4::FFIParams params(qname, qtype, destination, source, ednssubnet.source, data, policyTags, records, ednsOptions, proxyProtocolValues, requestorId, deviceId, deviceName, routingTag, rcode, ttlCap, variable, false, logQuery, logResponse, followCNAMEs, extendedErrorCode, extendedErrorExtra);
2774
2775 ctag = t_pdl->gettag_ffi(params);
70fb28d9
RG
2776 }
2777 else if (t_pdl->d_gettag) {
163ed916 2778 ctag = t_pdl->gettag(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, deviceName, routingTag, proxyProtocolValues);
70fb28d9 2779 }
12aff2e5 2780 }
cca6d6a8
RG
2781 catch (const std::exception& e) {
2782 if (g_logCommonErrors) {
e6a9dde5 2783 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
cca6d6a8 2784 }
12aff2e5 2785 }
8ea8c302 2786 }
b2eacd67 2787 }
cca6d6a8 2788 catch (const std::exception& e)
b2eacd67 2789 {
cca6d6a8 2790 if (g_logCommonErrors) {
e6a9dde5 2791 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
cca6d6a8 2792 }
12aff2e5 2793 }
12ce523e 2794 }
3ddb9247 2795
02b47f43 2796 bool cacheHit = false;
ac10822e 2797 RecursorPacketCache::OptPBData pbData{boost::none};
02b47f43 2798#ifdef HAVE_PROTOBUF
b773359c 2799 if (t_protobufServers) {
845cbf4c 2800 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && policyTags.empty())) {
0a6a45c8 2801 protobufLogQuery(luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, source, destination, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId, deviceName);
b790ef3d 2802 }
d9d3f9c1
RG
2803 }
2804#endif /* HAVE_PROTOBUF */
02b47f43 2805
70fb28d9
RG
2806 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
2807 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
2808 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
8467ec26 2809 vState valState;
c15ff3df 2810 if (qnameParsed) {
ac10822e 2811 cacheHit = !SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &pbData);
c15ff3df
RG
2812 }
2813 else {
ac10822e 2814 cacheHit = !SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, &qtype, &qclass, g_now.tv_sec, &response, &age, &valState, &qhash, &pbData);
c15ff3df
RG
2815 }
2816
d9d3f9c1 2817 if (cacheHit) {
fd870915 2818 if (vStateIsBogus(valState)) {
8467ec26
KM
2819 if(t_bogusremotes)
2820 t_bogusremotes->push_back(source);
2821 if(t_bogusqueryring)
2822 t_bogusqueryring->push_back(make_pair(qname, qtype));
2823 }
2824
d9d3f9c1 2825#ifdef HAVE_PROTOBUF
ac10822e 2826 if(t_protobufServers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && pbData && !pbData->d_tagged)) { // XXX
f29760d2 2827 pdns::ProtoZero::Message pbMessage(pbData ? pbData->d_message : "", pbData ? pbData->d_response : "", 64, 10); // The extra bytes we are going to add
ac10822e 2828 if (pbData) {
f29760d2 2829 // We take the inmutable string from the cache and are appending a few values
a44a8d66 2830 } else {
83b261a7
OM
2831 pbMessage.setType(2); // Response
2832 pbMessage.setServerIdentity(SyncRes::s_serverID);
a44a8d66 2833 }
ac10822e 2834
9e6128d0
OM
2835 // In response part
2836 if (g_useKernelTimestamp && tv.tv_sec) {
83b261a7 2837 pbMessage.setQueryTime(tv.tv_sec, tv.tv_usec);
9e6128d0
OM
2838 }
2839 else {
83b261a7 2840 pbMessage.setQueryTime(g_now.tv_sec, g_now.tv_usec);
9e6128d0
OM
2841 }
2842 // In message part
5cc8371b 2843 Netmask requestorNM(source, source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
5d2e9a83 2844 ComboAddress requestor = requestorNM.getMaskedNetwork();
83b261a7
OM
2845 pbMessage.setMessageIdentity(uniqueId);
2846 pbMessage.setFrom(requestor);
2847 pbMessage.setTo(destination);
2848 pbMessage.setSocketProtocol(false);
2849 pbMessage.setId(dh->id);
2850
2851 pbMessage.setTime();
2852 pbMessage.setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIPv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
2853 pbMessage.setRequestorId(requestorId);
2854 pbMessage.setDeviceId(deviceId);
2855 pbMessage.setDeviceName(deviceName);
2856 pbMessage.setFromPort(source.getPort());
2857 pbMessage.setToPort(destination.getPort());
47a6825e
OM
2858#ifdef NOD_ENABLED
2859 if (g_nodEnabled) {
83b261a7 2860 pbMessage.setNewlyObservedDomain(false);
47a6825e
OM
2861 }
2862#endif
83b261a7 2863 protobufLogResponse(pbMessage);
02b47f43 2864 }
d9d3f9c1 2865#endif /* HAVE_PROTOBUF */
49a3500d 2866 if(!g_quiet)
e6a9dde5 2867 g_log<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<endl;
8f7473d7 2868
1bc3c142
BH
2869 g_stats.packetCacheHits++;
2870 SyncRes::s_queries++;
2871 ageDNSPacket(response, age);
b71b60ee 2872 struct msghdr msgh;
2873 struct iovec iov;
7bec330a
OM
2874 cmsgbuf_aligned cbuf;
2875 fillMSGHdr(&msgh, &iov, &cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
2c0af54f
PD
2876 msgh.msg_control=NULL;
2877
cbc03320 2878 if(g_fromtosockets.count(fd)) {
7bec330a 2879 addCMsgSrcAddr(&msgh, &cbuf, &destaddr, 0);
b71b60ee 2880 }
2ea1d2c0
OM
2881 int sendErr = sendOnNBSocket(fd, &msgh);
2882 if (sendErr && g_logCommonErrors) {
a2a81d42
OM
2883 g_log << Logger::Warning << "Sending UDP reply to client " << source.toStringWithPort()
2884 << (source != fromaddr ? " (via " + fromaddr.toStringWithPort() + ")" : "") << " failed with: "
2ea1d2c0 2885 << strerror(sendErr) << endl;
a2a81d42 2886 }
97bee66d 2887 if(response.length() >= sizeof(struct dnsheader)) {
dd079764
RG
2888 struct dnsheader tmpdh;
2889 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
5cc8371b 2890 updateResponseStats(tmpdh.rcode, source, response.length(), 0, 0);
97bee66d 2891 }
08f3f638 2892 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
19178da9 2893 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
1bc3c142
BH
2894 return 0;
2895 }
3ddb9247 2896 }
1bc3c142 2897 catch(std::exception& e) {
4b4566e8
RG
2898 if(g_logCommonErrors)
2899 g_log<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1bc3c142
BH
2900 return 0;
2901 }
3ddb9247 2902
f26bf547 2903 if(t_pdl) {
5cc8371b 2904 if(t_pdl->ipfilter(source, destination, *dh)) {
4ea94941 2905 if(!g_quiet)
e6a9dde5 2906 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" based on policy"<<endl;
4ea94941 2907 g_stats.policyDrops++;
2908 return 0;
2909 }
2910 }
2911
1bc3c142 2912 if(MT->numProcesses() > g_maxMThreads) {
461df9d2 2913 if(!g_quiet)
e6a9dde5 2914 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<", over capacity"<<endl;
461df9d2 2915
1bc3c142
BH
2916 g_stats.overCapacityDrops++;
2917 return 0;
2918 }
3ddb9247 2919
37a919d4 2920 auto dc = std::unique_ptr<DNSComboWriter>(new DNSComboWriter(question, g_now, std::move(policyTags), std::move(data), std::move(records)));
1bc3c142 2921 dc->setSocket(fd);
49a3500d 2922 dc->d_tag=ctag;
e9f63d47 2923 dc->d_qhash=qhash;
5cc8371b
RG
2924 dc->setRemote(fromaddr);
2925 dc->setSource(source);
b71b60ee 2926 dc->setLocal(destaddr);
5cc8371b 2927 dc->setDestination(destination);
1bc3c142 2928 dc->d_tcp=false;
b40562da
RG
2929 dc->d_ecsFound = ecsFound;
2930 dc->d_ecsParsed = ecsParsed;
2931 dc->d_ednssubnet = ednssubnet;
70fb28d9
RG
2932 dc->d_ttlCap = ttlCap;
2933 dc->d_variable = variable;
37a919d4
RG
2934 dc->d_followCNAMERecords = followCNAMEs;
2935 dc->d_rcode = rcode;
406b722e 2936 dc->d_logResponse = logResponse;
aa7929a3 2937#ifdef HAVE_PROTOBUF
b773359c 2938 if (t_protobufServers || t_outgoingProtobufServers) {
5164bac3 2939 dc->d_uuid = std::move(uniqueId);
d9d3f9c1 2940 }
67e31ebe 2941 dc->d_requestorId = requestorId;
590388d2 2942 dc->d_deviceId = deviceId;
0a6a45c8 2943 dc->d_deviceName = deviceName;
c29d820c 2944 dc->d_kernelTimestamp = tv;
aa7929a3 2945#endif
5216ddcc 2946 dc->d_proxyProtocolValues = std::move(proxyProtocolValues);
1b54dc37 2947 dc->d_routingTag = std::move(routingTag);
e95b2a7c
RG
2948 dc->d_extendedErrorCode = extendedErrorCode;
2949 dc->d_extendedErrorExtra = std::move(extendedErrorExtra);
aa7929a3 2950
9a864da4 2951 MT->makeThread(startDoResolve, (void*) dc.release()); // deletes dc
1bc3c142 2952 return 0;
3ddb9247
PD
2953}
2954
b71b60ee 2955
d187038c 2956static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
5db529f8 2957{
a683e8bd 2958 ssize_t len;
5216ddcc 2959 static const size_t maxIncomingQuerySize = g_proxyProtocolACL.empty() ? 512 : (512 + g_proxyProtocolMaximumSize);
04896b99 2960 static thread_local std::string data;
5db529f8 2961 ComboAddress fromaddr;
5216ddcc
RG
2962 ComboAddress source;
2963 ComboAddress destination;
b71b60ee 2964 struct msghdr msgh;
2965 struct iovec iov;
7bec330a 2966 cmsgbuf_aligned cbuf;
390f1dab 2967 bool firstQuery = true;
5216ddcc 2968 std::vector<ProxyProtocolValue> proxyProtocolValues;
b71b60ee 2969
c0a00acd 2970 for(size_t queriesCounter = 0; queriesCounter < s_maxUDPQueriesPerRound; queriesCounter++) {
5216ddcc 2971 bool proxyProto = false;
c0a00acd
RG
2972 data.resize(maxIncomingQuerySize);
2973 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
7bec330a 2974 fillMSGHdr(&msgh, &iov, &cbuf, sizeof(cbuf), &data[0], data.size(), &fromaddr);
b71b60ee 2975
c0a00acd 2976 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
390f1dab 2977
c0a00acd 2978 firstQuery = false;
6b8829d5
RG
2979
2980 if (msgh.msg_flags & MSG_TRUNC) {
2981 g_stats.truncatedDrops++;
2982 if (!g_quiet) {
2983 g_log<<Logger::Error<<"Ignoring truncated query from "<<fromaddr.toString()<<endl;
2984 }
2985 return;
2986 }
2987
5216ddcc
RG
2988 data.resize(static_cast<size_t>(len));
2989
2990 if (expectProxyProtocol(fromaddr)) {
2991 bool tcp;
8c73c703 2992 ssize_t used = parseProxyHeader(data, proxyProto, source, destination, tcp, proxyProtocolValues);
5216ddcc
RG
2993 if (used <= 0) {
2994 ++g_stats.proxyProtocolInvalidCount;
2995 if (!g_quiet) {
95f851d6 2996 g_log<<Logger::Error<<"Ignoring invalid proxy protocol ("<<std::to_string(len)<<", "<<std::to_string(used)<<") query from "<<fromaddr.toStringWithPort()<<endl;
5216ddcc
RG
2997 }
2998 return;
2999 }
95f851d6
RG
3000 else if (static_cast<size_t>(used) > g_proxyProtocolMaximumSize) {
3001 if (g_quiet) {
3002 g_log<<Logger::Error<<"Proxy protocol header in UDP packet from "<< fromaddr.toStringWithPort() << " is larger than proxy-protocol-maximum-size (" << used << "), dropping"<< endl;
3003 }
3004 ++g_stats.proxyProtocolInvalidCount;
3005 return;
3006 }
3007
5216ddcc
RG
3008 data.erase(0, used);
3009 }
6b8829d5
RG
3010 else if (len > 512) {
3011 /* we only allow UDP packets larger than 512 for those with a proxy protocol header */
3012 g_stats.truncatedDrops++;
3013 if (!g_quiet) {
95f851d6 3014 g_log<<Logger::Error<<"Ignoring truncated query from "<<fromaddr.toStringWithPort()<<endl;
6b8829d5
RG
3015 }
3016 return;
5216ddcc 3017 }
390f1dab 3018
5216ddcc 3019 if (data.size() < sizeof(dnsheader)) {
c0a00acd
RG
3020 g_stats.ignoredCount++;
3021 if (!g_quiet) {
5216ddcc 3022 g_log<<Logger::Error<<"Ignoring too-short ("<<std::to_string(data.size())<<") query from "<<fromaddr.toString()<<endl;
c0a00acd
RG
3023 }
3024 return;
04896b99 3025 }
04896b99 3026
6b8829d5
RG
3027 if (!proxyProto) {
3028 source = fromaddr;
ba892c7f 3029 }
b23b8614 3030
c0a00acd
RG
3031 if(t_remotes) {
3032 t_remotes->push_back(fromaddr);
3033 }
81859ba5 3034
38d8b937 3035 if(t_allowFrom && !t_allowFrom->match(&source)) {
c0a00acd 3036 if(!g_quiet) {
3bdc4508 3037 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<source.toString()<<", address not matched by allow-from"<<endl;
c0a00acd 3038 }
3ddb9247 3039
c0a00acd
RG
3040 g_stats.unauthorizedUDP++;
3041 return;
5db529f8 3042 }
3bdc4508 3043
c0a00acd
RG
3044 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
3045 if(!fromaddr.sin4.sin_port) { // also works for IPv6
3046 if(!g_quiet) {
3047 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
3048 }
3049
3050 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
3051 return;
3abcdab2 3052 }
c0a00acd
RG
3053
3054 try {
c0a00acd
RG
3055 dnsheader* dh=(dnsheader*)&data[0];
3056
3057 if(dh->qr) {
3058 g_stats.ignoredCount++;
3059 if(g_logCommonErrors) {
3060 g_log<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
3061 }
3062 }
3063 else if(dh->opcode) {
3064 g_stats.ignoredCount++;
3065 if(g_logCommonErrors) {
3066 g_log<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
3067 }
a6147cd2 3068 }
c0f9be19
RG
3069 else if (dh->qdcount == 0) {
3070 g_stats.emptyQueriesCount++;
3071 if(g_logCommonErrors) {
3072 g_log<<Logger::Error<<"Ignoring empty (qdcount == 0) query from "<<fromaddr.toString()<<" on server socket!"<<endl;
3073 }
3074 }
a6147cd2 3075 else {
c0a00acd
RG
3076 struct timeval tv={0,0};
3077 HarvestTimestamp(&msgh, &tv);
3078 ComboAddress dest;
3079 dest.reset(); // this makes sure we ignore this address if not returned by recvmsg above
3080 auto loc = rplookup(g_listenSocketsAddresses, fd);
3081 if(HarvestDestinationAddress(&msgh, &dest)) {
3082 // but.. need to get port too
3083 if(loc) {
3084 dest.sin4.sin_port = loc->sin4.sin_port;
3085 }
a6147cd2 3086 }
3087 else {
c0a00acd
RG
3088 if(loc) {
3089 dest = *loc;
3090 }
3091 else {
3092 dest.sin4.sin_family = fromaddr.sin4.sin_family;
3093 socklen_t slen = dest.getSocklen();
3094 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
3095 }
3096 }
5216ddcc
RG
3097 if (!proxyProto) {
3098 destination = dest;
3099 }
c0a00acd
RG
3100
3101 if(g_weDistributeQueries) {
6e983628 3102 std::string localdata = data;
d158ef96
OM
3103 distributeAsyncFunction(data, [localdata, fromaddr, dest, source, destination, tv, fd, proxyProtocolValues]() mutable
3104 { return doProcessUDPQuestion(localdata, fromaddr, dest, source, destination, tv, fd, proxyProtocolValues); });
c0a00acd
RG
3105 }
3106 else {
144040be 3107 ++s_threadInfos[t_id].numberOfDistributedQueries;
5216ddcc 3108 doProcessUDPQuestion(data, fromaddr, dest, source, destination, tv, fd, proxyProtocolValues);
a6147cd2 3109 }
3110 }
c0a00acd 3111 }
16ce7f18 3112 catch(const MOADNSException &mde) {
c0a00acd
RG
3113 g_stats.clientParseError++;
3114 if(g_logCommonErrors) {
3115 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
3116 }
3117 }
3118 catch(const std::runtime_error& e) {
3119 g_stats.clientParseError++;
3120 if(g_logCommonErrors) {
3121 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
3122 }
5db529f8
BH
3123 }
3124 }
c0a00acd
RG
3125 else {
3126 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
3127 if(firstQuery && errno == EAGAIN) {
3128 g_stats.noPacketError++;
3129 }
390f1dab 3130
c0a00acd
RG
3131 break;
3132 }
ac0e821b 3133 }
5db529f8
BH
3134}
3135
adb6cd72 3136static void makeTCPServerSockets(deferredAdd_t& deferredAdds, std::set<int>& tcpSockets)
9c495589 3137{
37d3f960 3138 int fd;
f28307ad 3139 vector<string>locals;
2e3d8a19 3140 stringtok(locals,::arg()["local-address"]," ,");
9c495589 3141
f28307ad 3142 if(locals.empty())
3f81d239 3143 throw PDNSException("No local address specified");
3ddb9247 3144
f28307ad 3145 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
3146 ServiceTuple st;
3147 st.port=::arg().asNum("local-port");
3148 parseService(*i, st);
3ddb9247 3149
32252594
BH
3150 ComboAddress sin;
3151
d38e2ba9 3152 sin.reset();
37d3f960 3153 sin.sin4.sin_family = AF_INET;
32252594 3154 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 3155 sin.sin6.sin6_family = AF_INET6;
f71bc087 3156 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 3157 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
37d3f960
BH
3158 }
3159
3160 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
3ddb9247 3161 if(fd<0)
3f81d239 3162 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
f28307ad 3163
3897b9e1 3164 setCloseOnExec(fd);
a903b39c 3165
f28307ad 3166 int tmp=1;
810ff705 3167 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
e6a9dde5 3168 g_log<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
c8ddb7c2 3169 exit(1);
f28307ad 3170 }
0dfa94ab 3171 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
a2a81d42
OM
3172 int err = errno;
3173 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(err)<<endl;
0dfa94ab 3174 }
3175
c8ddb7c2 3176#ifdef TCP_DEFER_ACCEPT
38ac0821 3177 if(setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
37d3f960 3178 if(i==locals.begin())
377602e3 3179 g_log<<Logger::Info<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
c8ddb7c2
BH
3180 }
3181#endif
3182
fec7dd5a
SS
3183 if( ::arg().mustDo("non-local-bind") )
3184 Utility::setBindAny(AF_INET, fd);
3185
665821e1
RG
3186 if (g_reusePort) {
3187#if defined(SO_REUSEPORT_LB)
3188 try {
3189 SSetsockopt(fd, SOL_SOCKET, SO_REUSEPORT_LB, 1);
3190 }
3191 catch (const std::exception& e) {
3192 throw PDNSException(std::string("SO_REUSEPORT_LB: ") + e.what());
3193 }
3194#elif defined(SO_REUSEPORT)
3195 try {
3196 SSetsockopt(fd, SOL_SOCKET, SO_REUSEPORT, 1);
3197 }
3198 catch (const std::exception& e) {
3199 throw PDNSException(std::string("SO_REUSEPORT: ") + e.what());
3200 }
2332f42d 3201#endif
665821e1 3202 }
2332f42d 3203
0735b17e
RG
3204 if (::arg().asNum("tcp-fast-open") > 0) {
3205#ifdef TCP_FASTOPEN
3206 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
3207 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
a2a81d42
OM
3208 int err = errno;
3209 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(err)<<endl;
0735b17e
RG
3210 }
3211#else
e6a9dde5 3212 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
0735b17e
RG
3213#endif
3214 }
3215
32252594 3216 sin.sin4.sin_port = htons(st.port);
a683e8bd 3217 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
3ddb9247 3218 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
3f81d239 3219 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
3ddb9247 3220
3897b9e1 3221 setNonBlocking(fd);
49a699c4 3222 setSocketSendBuffer(fd, 65000);
37d3f960 3223 listen(fd, 128);
b243ca3b 3224 deferredAdds.push_back(make_pair(fd, handleNewTCPQuestion));
adb6cd72
RG
3225 tcpSockets.insert(fd);
3226
84433b79 3227 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
3228 // - fd is not that which we know here, but returned from accept()
3ddb9247 3229 if(sin.sin4.sin_family == AF_INET)
377602e3 3230 g_log<<Logger::Info<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 3231 else
377602e3 3232 g_log<<Logger::Info<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 3233 }
9c495589
BH
3234}
3235
b243ca3b 3236static void makeUDPServerSockets(deferredAdd_t& deferredAdds)
288f4aa9 3237{
fec7dd5a 3238 int one=1;
f28307ad 3239 vector<string>locals;
2e3d8a19 3240 stringtok(locals,::arg()["local-address"]," ,");
288f4aa9 3241
f28307ad 3242 if(locals.empty())
3f81d239 3243 throw PDNSException("No local address specified");
3ddb9247 3244
f28307ad 3245 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
3246 ServiceTuple st;
3247 st.port=::arg().asNum("local-port");
3248 parseService(*i, st);
3249
37d3f960 3250 ComboAddress sin;
996c89cc 3251
d38e2ba9 3252 sin.reset();
37d3f960 3253 sin.sin4.sin_family = AF_INET;
32252594 3254 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 3255 sin.sin6.sin6_family = AF_INET6;
f71bc087 3256 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 3257 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
37d3f960 3258 }
3ddb9247 3259
bb4bdbaf 3260 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
d3b4137e 3261 if(fd < 0) {
a2a81d42 3262 throw PDNSException("Making a UDP server socket for resolver: "+stringerror());
d3b4137e 3263 }
915b0c39 3264 if (!setSocketTimestamps(fd))
e6a9dde5 3265 g_log<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
0dfa94ab 3266
b71b60ee 3267 if(IsAnyAddress(sin)) {
cbc03320 3268 if(sin.sin4.sin_family == AF_INET)
3269 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
3270 g_fromtosockets.insert(fd);
757d3179 3271#ifdef IPV6_RECVPKTINFO
cbc03320 3272 if(sin.sin4.sin_family == AF_INET6)
3273 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
3274 g_fromtosockets.insert(fd);
757d3179 3275#endif
0dfa94ab 3276 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
a2a81d42
OM
3277 int err = errno;
3278 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(err)<<endl;
0dfa94ab 3279 }
b71b60ee 3280 }
fec7dd5a
SS
3281 if( ::arg().mustDo("non-local-bind") )
3282 Utility::setBindAny(AF_INET6, fd);
3283
3897b9e1 3284 setCloseOnExec(fd);
a903b39c 3285
4e9a20e6 3286 setSocketReceiveBuffer(fd, 250000);
32252594 3287 sin.sin4.sin_port = htons(st.port);
37d3f960 3288
2332f42d 3289
665821e1
RG
3290 if (g_reusePort) {
3291#if defined(SO_REUSEPORT_LB)
3292 try {
3293 SSetsockopt(fd, SOL_SOCKET, SO_REUSEPORT_LB, 1);
3294 }
3295 catch (const std::exception& e) {
3296 throw PDNSException(std::string("SO_REUSEPORT_LB: ") + e.what());
3297 }
3298#elif defined(SO_REUSEPORT)
3299 try {
3300 SSetsockopt(fd, SOL_SOCKET, SO_REUSEPORT, 1);
3301 }
3302 catch (const std::exception& e) {
3303 throw PDNSException(std::string("SO_REUSEPORT: ") + e.what());
3304 }
2332f42d 3305#endif
665821e1 3306 }
90f9fbc0
RG
3307
3308 if (sin.isIPv4()) {
3309 try {
3310 setSocketIgnorePMTU(fd);
3311 }
3312 catch(const std::exception& e) {
3313 g_log<<Logger::Warning<<"Failed to set IP_MTU_DISCOVER on UDP server socket: "<<e.what()<<endl;
3314 }
3315 }
3316
3317 socklen_t socklen=sin.getSocklen();
3ddb9247 3318 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
335da0ba 3319 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
3ddb9247 3320
3897b9e1 3321 setNonBlocking(fd);
c2136bf0 3322
b243ca3b 3323 deferredAdds.push_back(make_pair(fd, handleNewUDPQuestion));
40a3dd64 3324 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
3ddb9247 3325 if(sin.sin4.sin_family == AF_INET)
377602e3 3326 g_log<<Logger::Info<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 3327 else
377602e3 3328 g_log<<Logger::Info<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 3329 }
c836dc19 3330}
caa6eefa 3331
d187038c 3332static void daemonize(void)
c836dc19
BH
3333{
3334 if(fork())
3335 exit(0); // bye bye
3ddb9247
PD
3336
3337 setsid();
c836dc19 3338
27a5ead5 3339 int i=open("/dev/null",O_RDWR); /* open stdin */
3ddb9247 3340 if(i < 0)
e6a9dde5 3341 g_log<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
27a5ead5
BH
3342 else {
3343 dup2(i,0); /* stdin */
3344 dup2(i,1); /* stderr */
3345 dup2(i,2); /* stderr */
3346 close(i);
3347 }
288f4aa9 3348}
caa6eefa 3349
9f374187
FL
3350static void termIntHandler(int)
3351{
cd180a71 3352 doExit();
9f374187
FL
3353}
3354
d187038c 3355static void usr1Handler(int)
c75a6a9e
BH
3356{
3357 statsWanted=true;
3358}
ae1b2e98 3359
d187038c 3360static void usr2Handler(int)
9170fbaf 3361{
f1f34cc2 3362 g_quiet= !g_quiet;
3363 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
3364 ::arg().set("quiet")=g_quiet ? "" : "no";
9170fbaf
BH
3365}
3366
5341e3af
OM
3367static int ratePercentage(uint64_t nom, uint64_t denom)
3368{
3369 if (denom == 0) {
3370 return 0;
3371 }
3372 return round(100.0 * nom / denom);
3373}
3374
d187038c 3375static void doStats(void)
c75a6a9e 3376{
16beeaa4
BH
3377 static time_t lastOutputTime;
3378 static uint64_t lastQueryCount;
d299d4f5 3379
ccfadb6c
OM
3380 uint64_t cacheHits = g_recCache->cacheHits;
3381 uint64_t cacheMisses = g_recCache->cacheMisses;
3382 uint64_t cacheSize = g_recCache->size();
3383 auto rc_stats = g_recCache->stats();
7ce9aad6 3384 double r = rc_stats.second == 0 ? 0.0 : (100.0 * rc_stats.first / rc_stats.second);
29d75577
OM
3385 uint64_t negCacheSize = g_negCache->size();
3386
d299d4f5 3387 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
e6a9dde5 3388 g_log<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
cdde2458 3389 cacheSize << " cache entries, "<<
29d75577 3390 negCacheSize<<" negative entries, "<<
5341e3af 3391 ratePercentage(cacheHits, cacheHits + cacheMisses)<<"% cache hits"<<endl;
7ce9aad6 3392 g_log << Logger::Notice<< "stats: cache contended/acquired " << rc_stats.first << '/' << rc_stats.second << " = " << r << '%' << endl;
3ddb9247 3393
e6a9dde5 3394 g_log<<Logger::Notice<<"stats: throttle map: "
3427fa8a 3395 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
60e5208a 3396 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<", failed ns: "
bbc7101c
OM
3397 << broadcastAccFunction<uint64_t>(pleaseGetFailedServersSize)<<", ednsmap: "
3398 <<broadcastAccFunction<uint64_t>(pleaseGetEDNSStatusesSize)<<endl;
5341e3af
OM
3399 g_log<<Logger::Notice<<"stats: outpacket/query ratio "<<ratePercentage(SyncRes::s_outqueries, SyncRes::s_queries)<<"%";
3400 g_log<<Logger::Notice<<", "<<ratePercentage(SyncRes::s_throttledqueries, SyncRes::s_outqueries+SyncRes::s_throttledqueries)<<"% throttled, "
525b8a7c 3401 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
e6a9dde5 3402 g_log<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
3427fa8a 3403 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
81883dcc 3404
5341e3af
OM
3405 uint64_t pcSize = broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize);
3406 uint64_t pcHits = broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits);
3407 g_log<<Logger::Notice<<"stats: " << pcSize <<
3408 " packet cache entries, "<< ratePercentage(pcHits, SyncRes::s_queries) << "% packet cache hits"<<endl;
3ddb9247 3409
144040be
RG
3410 size_t idx = 0;
3411 for (const auto& threadInfo : s_threadInfos) {
3412 if(threadInfo.isWorker) {
ad9fc3dc 3413 g_log<<Logger::Notice<<"stats: thread "<<idx<<" has been distributed "<<threadInfo.numberOfDistributedQueries<<" queries"<<endl;
144040be
RG
3414 ++idx;
3415 }
3416 }
3417
16beeaa4
BH
3418 time_t now = time(0);
3419 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
e6a9dde5 3420 g_log<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
16beeaa4
BH
3421 }
3422 lastOutputTime = now;
3423 lastQueryCount = SyncRes::s_queries;
c75a6a9e 3424 }
3ddb9247 3425 else if(statsWanted)
e6a9dde5 3426 g_log<<Logger::Notice<<"stats: no stats yet!"<<endl;
7becf07f 3427
c75a6a9e
BH
3428 statsWanted=false;
3429}
c836dc19 3430
29f0b1ce 3431static void houseKeeping(void *)
c836dc19 3432{
cdde2458
OM
3433 static thread_local time_t last_rootupdate, last_secpoll, last_trustAnchorUpdate{0}, last_RC_prune;
3434 static thread_local struct timeval last_prune;
3435
3337c2f7
RG
3436 static thread_local int cleanCounter=0;
3437 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
e4ae55e5
PL
3438 auto luaconfsLocal = g_luaconfs.getLocal();
3439
3440 if (last_trustAnchorUpdate == 0 && !luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0) {
3441 // Loading the Lua config file already "refreshed" the TAs
3442 last_trustAnchorUpdate = g_now.tv_sec + luaconfsLocal->trustAnchorFileInfo.interval * 3600;
3443 }
3444
cc59bce6 3445 try {
6b0d90ea 3446 if(s_running) {
cc59bce6 3447 return;
6b0d90ea 3448 }
cc59bce6 3449 s_running=true;
3ddb9247 3450
b9715061
OM
3451 struct timeval now, past;
3452 Utility::gettimeofday(&now, nullptr);
3453 past = now;
3454 past.tv_sec -= 5;
3455 if (last_prune < past) {
a6f7f5fe 3456 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
3ddb9247 3457
b9715061 3458 time_t limit;
cc59bce6 3459 if(!((cleanCounter++)%40)) { // this is a full scan!
b9715061 3460 limit=now.tv_sec-300;
a712cb56 3461 SyncRes::pruneNSSpeeds(limit);
cc59bce6 3462 }
b9715061
OM
3463 limit = now.tv_sec - SyncRes::s_serverdownthrottletime * 10;
3464 SyncRes::pruneFailedServers(limit);
3465 limit = now.tv_sec - 2*3600;
3466 SyncRes::pruneEDNSStatuses(limit);
3467 SyncRes::pruneThrottledServers();
3468 Utility::gettimeofday(&last_prune, nullptr);
d67620e4 3469 }
3ddb9247 3470
b243ca3b 3471 if(isHandlerThread()) {
cdde2458 3472 if (now.tv_sec - last_RC_prune > 5) {
ccfadb6c
OM
3473 g_recCache->doPrune(g_maxCacheEntries);
3474 g_negCache->prune(g_maxCacheEntries / 10);
cdde2458
OM
3475 last_RC_prune = now.tv_sec;
3476 }
3477 // XXX !!! global
d8f3508b
OM
3478 if (now.tv_sec - last_rootupdate > 7200) {
3479 int res = SyncRes::getRootNS(g_now, nullptr, 0);
cdde2458
OM
3480 if (!res) {
3481 last_rootupdate=now.tv_sec;
0ac327bc
RG
3482 try {
3483 primeRootNSZones(g_dnssecmode != DNSSECMode::Off, 0);
3484 }
3485 catch (const std::exception& e) {
3486 g_log<<Logger::Error<<"Exception while priming the root NS zones: "<<e.what()<<endl;
3487 }
3488 catch (const PDNSException& e) {
3489 g_log<<Logger::Error<<"Exception while priming the root NS zones: "<<e.reason<<endl;
3490 }
3491 catch (const ImmediateServFailException& e) {
3492 g_log<<Logger::Error<<"Exception while priming the root NS zones: "<<e.reason<<endl;
3493 }
3494 catch (const PolicyHitException& e) {
3495 g_log<<Logger::Error<<"Policy hit while priming the root NS zones"<<endl;
3496 }
3497 catch (...)
3498 {
3499 g_log<<Logger::Error<<"Exception while priming the root NS zones"<<endl;
3500 }
cdde2458
OM
3501 }
3502 }
3ddb9247 3503
cc59bce6 3504 if(now.tv_sec - last_secpoll >= 3600) {
3505 try {
3506 doSecPoll(&last_secpoll);
3507 }
0ac327bc 3508 catch (const std::exception& e)
581d4ea3 3509 {
e6a9dde5 3510 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
581d4ea3 3511 }
0ac327bc 3512 catch (const PDNSException& e)
47e9b74f 3513 {
e6a9dde5 3514 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
47e9b74f 3515 }
0ac327bc 3516 catch (const ImmediateServFailException &e)
d0992a65 3517 {
e6a9dde5 3518 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
d0992a65 3519 }
0ac327bc 3520 catch (const PolicyHitException& e) {
124dd1d4
RG
3521 g_log<<Logger::Error<<"Policy hit while performing security poll"<<endl;
3522 }
0ac327bc 3523 catch (...)
47e9b74f 3524 {
e6a9dde5 3525 g_log<<Logger::Error<<"Exception while performing security poll"<<endl;
47e9b74f 3526 }
18b73338 3527 }
e4ae55e5
PL
3528
3529 if (!luaconfsLocal->trustAnchorFileInfo.fname.empty() && luaconfsLocal->trustAnchorFileInfo.interval != 0 &&
3530 g_now.tv_sec - last_trustAnchorUpdate >= (luaconfsLocal->trustAnchorFileInfo.interval * 3600)) {
3531 g_log<<Logger::Debug<<"Refreshing Trust Anchors from file"<<endl;
3532 try {
3533 map<DNSName, dsmap_t> dsAnchors;
3534 if (updateTrustAnchorsFromFile(luaconfsLocal->trustAnchorFileInfo.fname, dsAnchors)) {
3535 g_luaconfs.modify([&dsAnchors](LuaConfigItems& lci) {
3536 lci.dsAnchors = dsAnchors;
3537 });
3538 }
3539 last_trustAnchorUpdate = now.tv_sec;
3540 } catch (const PDNSException &pe) {
3541 g_log<<Logger::Error<<"Unable to update Trust Anchors: "<<pe.reason<<endl;
3542 }
3543 }
d67620e4 3544 }
0ac327bc
RG
3545 s_running = false;
3546 }
3547 catch (const PDNSException& ae)
3548 {
3549 s_running = false;
3550 g_log<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
3551 throw;
3552 }
3553 catch (...)
3554 {
3555 s_running = false;
3556 g_log<<Logger::Error<<"Uncaught exception in housekeeping thread"<<endl;
3557 throw;
d67620e4 3558 }
779828c4 3559}
d6d5dea7 3560
d187038c 3561static void makeThreadPipes()
49a699c4 3562{
ee271fc4
RG
3563 auto pipeBufferSize = ::arg().asNum("distribution-pipe-buffer-size");
3564 if (pipeBufferSize > 0) {
3565 g_log<<Logger::Info<<"Resizing the buffer of the distribution pipe to "<<pipeBufferSize<<endl;
3566 }
3567
b243ca3b
RG
3568 /* thread 0 is the handler / SNMP, we start at 1 */
3569 for(unsigned int n = 1; n <= (g_numWorkerThreads + g_numDistributorThreads); ++n) {
3570 auto& threadInfos = s_threadInfos.at(n);
3571
49a699c4
BH
3572 int fd[2];
3573 if(pipe(fd) < 0)
3574 unixDie("Creating pipe for inter-thread communications");
3ddb9247 3575
b243ca3b
RG
3576 threadInfos.pipes.readToThread = fd[0];
3577 threadInfos.pipes.writeToThread = fd[1];
3ddb9247 3578
49a699c4
BH
3579 if(pipe(fd) < 0)
3580 unixDie("Creating pipe for inter-thread communications");
b243ca3b
RG
3581
3582 threadInfos.pipes.readFromThread = fd[0];
3583 threadInfos.pipes.writeFromThread = fd[1];
3ddb9247 3584
cf8cda18
RG
3585 if(pipe(fd) < 0)
3586 unixDie("Creating pipe for inter-thread communications");
d10307c5 3587
b243ca3b
RG
3588 threadInfos.pipes.readQueriesToThread = fd[0];
3589 threadInfos.pipes.writeQueriesToThread = fd[1];
3590
ee271fc4
RG
3591 if (pipeBufferSize > 0) {
3592 if (!setPipeBufferSize(threadInfos.pipes.writeQueriesToThread, pipeBufferSize)) {
a2a81d42
OM
3593 int err = errno;
3594 g_log<<Logger::Warning<<"Error resizing the buffer of the distribution pipe for thread "<<n<<" to "<<pipeBufferSize<<": "<<strerror(err)<<endl;
ee271fc4
RG
3595 auto existingSize = getPipeBufferSize(threadInfos.pipes.writeQueriesToThread);
3596 if (existingSize > 0) {
3597 g_log<<Logger::Warning<<"The current size of the distribution pipe's buffer for thread "<<n<<" is "<<existingSize<<endl;
3598 }
3599 }
3600 }
3601
b243ca3b 3602 if (!setNonBlocking(threadInfos.pipes.writeQueriesToThread)) {
d10307c5
RG
3603 unixDie("Making pipe for inter-thread communications non-blocking");
3604 }
49a699c4
BH
3605 }
3606}
3607
00c9b8c1
BH
3608struct ThreadMSG
3609{
3610 pipefunc_t func;
3611 bool wantAnswer;
3612};
3613
b4e76a18 3614void broadcastFunction(const pipefunc_t& func)
49a699c4 3615{
b243ca3b
RG
3616 /* This function might be called by the worker with t_id 0 during startup
3617 for the initialization of ACLs and domain maps. After that it should only
3618 be called by the handler. */
d77abca1 3619
b243ca3b
RG
3620 if (s_threadInfos.empty() && isHandlerThread()) {
3621 /* the handler and distributors will call themselves below, but
3622 during startup we get called while s_threadInfos has not been
3623 populated yet to update the ACL or domain maps, so we need to
3624 handle that case.
3625 */
3626 func();
3627 }
b4e76a18 3628
b243ca3b
RG
3629 unsigned int n = 0;
3630 for (const auto& threadInfo : s_threadInfos) {
49a699c4 3631 if(n++ == t_id) {
b4e76a18 3632 func(); // don't write to ourselves!
49a699c4
BH
3633 continue;
3634 }
3ddb9247 3635
00c9b8c1
BH
3636 ThreadMSG* tmsg = new ThreadMSG();
3637 tmsg->func = func;
3638 tmsg->wantAnswer = true;
b243ca3b 3639 if(write(threadInfo.pipes.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
b841314c 3640 delete tmsg;
b243ca3b 3641
49a699c4 3642 unixDie("write to thread pipe returned wrong size or error");
b841314c 3643 }
3ddb9247 3644
49467864 3645 string* resp = nullptr;
b243ca3b 3646 if(read(threadInfo.pipes.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
49a699c4 3647 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 3648
49a699c4 3649 if(resp) {
49a699c4 3650 delete resp;
49467864 3651 resp = nullptr;
49a699c4
BH
3652 }
3653 }
3654}
06ea9015 3655
592d7ade 3656static bool trySendingQueryToWorker(unsigned int target, ThreadMSG* tmsg)
00c9b8c1 3657{
144040be 3658 auto& targetInfo = s_threadInfos[target];
b243ca3b
RG
3659 if(!targetInfo.isWorker) {
3660 g_log<<Logger::Error<<"distributeAsyncFunction() tried to assign a query to a non-worker thread"<<endl;
d77abca1 3661 exit(1);
00c9b8c1 3662 }
d77abca1 3663
b243ca3b 3664 const auto& tps = targetInfo.pipes;
3ddb9247 3665
cf8cda18
RG
3666 ssize_t written = write(tps.writeQueriesToThread, &tmsg, sizeof(tmsg));
3667 if (written > 0) {
3668 if (static_cast<size_t>(written) != sizeof(tmsg)) {
3669 delete tmsg;
3670 unixDie("write to thread pipe returned wrong size or error");
3671 }
3672 }
3673 else {
3674 int error = errno;
cf8cda18 3675 if (error == EAGAIN || error == EWOULDBLOCK) {
592d7ade 3676 return false;
cf8cda18 3677 } else {
592d7ade 3678 delete tmsg;
17634427 3679 unixDie("write to thread pipe returned wrong size or error:" + std::to_string(error));
cf8cda18 3680 }
b841314c 3681 }
592d7ade 3682
144040be
RG
3683 ++targetInfo.numberOfDistributedQueries;
3684
592d7ade
RG
3685 return true;
3686}
3687
144040be
RG
3688static unsigned int getWorkerLoad(size_t workerIdx)
3689{
3690 const auto mt = s_threadInfos[/* skip handler */ 1 + g_numDistributorThreads + workerIdx].mt;
3691 if (mt != nullptr) {
3692 return mt->numProcesses();
3693 }
3694 return 0;
3695}
3696
3697static unsigned int selectWorker(unsigned int hash)
3698{
3699 if (s_balancingFactor == 0) {
3700 return /* skip handler */ 1 + g_numDistributorThreads + (hash % g_numWorkerThreads);
3701 }
3702
3703 /* we start with one, representing the query we are currently handling */
3704 double currentLoad = 1;
3705 std::vector<unsigned int> load(g_numWorkerThreads);
3706 for (size_t idx = 0; idx < g_numWorkerThreads; idx++) {
3707 load[idx] = getWorkerLoad(idx);
3708 currentLoad += load[idx];
3709 // cerr<<"load for worker "<<idx<<" is "<<load[idx]<<endl;
3710 }
3711
3712 double targetLoad = (currentLoad / g_numWorkerThreads) * s_balancingFactor;
3713 // cerr<<"total load is "<<currentLoad<<", number of workers is "<<g_numWorkerThreads<<", target load is "<<targetLoad<<endl;
3714
3715 unsigned int worker = hash % g_numWorkerThreads;
1b9d2d46 3716 /* at least one server has to be at or below the average load */
596bf482
RG
3717 if (load[worker] > targetLoad) {
3718 ++g_stats.rebalancedQueries;
3719 do {
3720 // cerr<<"worker "<<worker<<" is above the target load, selecting another one"<<endl;
3721 worker = (worker + 1) % g_numWorkerThreads;
3722 }
3723 while(load[worker] > targetLoad);
144040be
RG
3724 }
3725
3726 return /* skip handler */ 1 + g_numDistributorThreads + worker;
3727}
3728
592d7ade
RG
3729// This function is only called by the distributor threads, when pdns-distributes-queries is set
3730void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
3731{
3732 if (!isDistributorThread()) {
3733 g_log<<Logger::Error<<"distributeAsyncFunction() has been called by a worker ("<<t_id<<")"<<endl;
3734 exit(1);
3735 }
3736
3737 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
144040be 3738 unsigned int target = selectWorker(hash);
592d7ade
RG
3739
3740 ThreadMSG* tmsg = new ThreadMSG();
3741 tmsg->func = func;
3742 tmsg->wantAnswer = false;
3743
3744 if (!trySendingQueryToWorker(target, tmsg)) {
3745 /* if this function failed but did not raise an exception, it means that the pipe
3746 was full, let's try another one */
3747 unsigned int newTarget = 0;
3748 do {
3749 newTarget = /* skip handler */ 1 + g_numDistributorThreads + dns_random(g_numWorkerThreads);
3750 } while (newTarget == target);
3751
3752 if (!trySendingQueryToWorker(newTarget, tmsg)) {
3753 g_stats.queryPipeFullDrops++;
3754 delete tmsg;
3755 }
3756 }
00c9b8c1 3757}
3427fa8a 3758
d187038c 3759static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
49a699c4 3760{
f26bf547 3761 ThreadMSG* tmsg = nullptr;
3ddb9247 3762
cf8cda18 3763 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread || fd == readQueriesToThread
49a699c4
BH
3764 unixDie("read from thread pipe returned wrong size or error");
3765 }
3ddb9247 3766
2f22827a 3767 void *resp=0;
3768 try {
3769 resp = tmsg->func();
3770 }
3771 catch(std::exception& e) {
6d2010a8 3772 if(g_logCommonErrors)
e6a9dde5 3773 g_log<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2f22827a 3774 }
3775 catch(PDNSException& e) {
6d2010a8 3776 if(g_logCommonErrors)
e6a9dde5 3777 g_log<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2f22827a 3778 }
d7c676a5 3779 if(tmsg->wantAnswer) {
b243ca3b
RG
3780 const auto& threadInfo = s_threadInfos.at(t_id);
3781 if(write(threadInfo.pipes.writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
d7c676a5 3782 delete tmsg;
00c9b8c1 3783 unixDie("write to thread pipe returned wrong size or error");
d7c676a5
RG
3784 }
3785 }
3ddb9247 3786
00c9b8c1 3787 delete tmsg;
49a699c4 3788}
09e6702a 3789
13034931
BH
3790template<class T> void *voider(const boost::function<T*()>& func)
3791{
3792 return func();
3793}
3794
050e6877 3795static vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
92011b8f 3796{
3797 a.insert(a.end(), b.begin(), b.end());
3798 return a;
3799}
3800
050e6877 3801static vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
3ddb9247
PD
3802{
3803 a.insert(a.end(), b.begin(), b.end());
3804 return a;
3805}
3806
92011b8f 3807
387b9ca6
RG
3808/*
3809 This function should only be called by the handler to gather metrics, wipe the cache,
788eeb4c
RG
3810 reload the Lua script (not the Lua config) or change the current trace regex,
3811 and by the SNMP thread to gather metrics. */
b4e76a18 3812template<class T> T broadcastAccFunction(const boost::function<T*()>& func)
3427fa8a 3813{
b243ca3b 3814 if (!isHandlerThread()) {
788eeb4c 3815 g_log<<Logger::Error<<"broadcastAccFunction has been called by a worker ("<<t_id<<")"<<endl;
d77abca1 3816 exit(1);
d77abca1
RG
3817 }
3818
b243ca3b 3819 unsigned int n = 0;
3427fa8a 3820 T ret=T();
b243ca3b
RG
3821 for (const auto& threadInfo : s_threadInfos) {
3822 if (n++ == t_id) {
3823 continue;
3824 }
3825
3826 const auto& tps = threadInfo.pipes;
00c9b8c1 3827 ThreadMSG* tmsg = new ThreadMSG();
a88c05b2 3828 tmsg->func = [func]{ return voider<T>(func); };
00c9b8c1 3829 tmsg->wantAnswer = true;
3ddb9247 3830
b841314c
RG
3831 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
3832 delete tmsg;
3427fa8a 3833 unixDie("write to thread pipe returned wrong size or error");
b841314c 3834 }
3ddb9247 3835
49467864 3836 T* resp = nullptr;
3427fa8a
BH
3837 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
3838 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 3839
3427fa8a 3840 if(resp) {
3427fa8a
BH
3841 ret += *resp;
3842 delete resp;
49467864 3843 resp = nullptr;
3427fa8a
BH
3844 }
3845 }
3846 return ret;
3847}
3848
b4e76a18
RG
3849template string broadcastAccFunction(const boost::function<string*()>& fun); // explicit instantiation
3850template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun); // explicit instantiation
3851template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun); // explicit instantiation
3852template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun); // explicit instantiation
5ac6d761 3853template ThreadTimes broadcastAccFunction(const boost::function<ThreadTimes*()>& fun);
3427fa8a 3854
d187038c 3855static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3856{
fbfc1809
RG
3857 try {
3858 string remote;
3859 string msg=s_rcc.recv(&remote);
3860 RecursorControlParser rcp;
3861 RecursorControlParser::func_t* command;
3ddb9247 3862
9aad5288 3863 g_log << Logger::Notice << "Received rec_control command '" << msg << "' from control socket" << endl;
fbfc1809 3864 string answer=rcp.getAnswer(msg, &command);
f0f3f0b0 3865
fbfc1809
RG
3866 // If we are inside a chroot, we need to strip
3867 if (!arg()["chroot"].empty()) {
3868 size_t len = arg()["chroot"].length();
3869 remote = remote.substr(len);
3870 }
f0f3f0b0 3871
ab5c053d
BH
3872 s_rcc.send(answer, &remote);
3873 command();
3874 }
fbfc1809 3875 catch(const std::exception& e) {
e6a9dde5 3876 g_log<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
ab5c053d 3877 }
fbfc1809 3878 catch(const PDNSException& ae) {
e6a9dde5 3879 g_log<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
ab5c053d 3880 }
09e6702a
BH
3881}
3882
d187038c 3883static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3884{
dc593046 3885 PacketID* pident=boost::any_cast<PacketID>(&var);
667f7e60 3886 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
09e6702a 3887
dc593046 3888 boost::shared_array<char> buffer(new char[pident->inNeeded]);
09e6702a 3889
a683e8bd 3890 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
09e6702a 3891 if(ret > 0) {
667f7e60 3892 pident->inMSG.append(&buffer[0], &buffer[ret]);
a683e8bd 3893 pident->inNeeded-=(size_t)ret;
825fa717 3894 if(!pident->inNeeded || pident->inIncompleteOkay) {
667f7e60
BH
3895 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
3896 PacketID pid=*pident;
3897 string msg=pident->inMSG;
3ddb9247 3898
bb4bdbaf 3899 t_fdm->removeReadFD(fd);
3ddb9247 3900 MT->sendEvent(pid, &msg);
09e6702a
BH
3901 }
3902 else {
667f7e60 3903 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
09e6702a
BH
3904 }
3905 }
3906 else {
667f7e60 3907 PacketID tmp=*pident;
bb4bdbaf 3908 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
09e6702a
BH
3909 string empty;
3910 MT->sendEvent(tmp, &empty); // this conveys error status
3911 }
3912}
3913
d187038c 3914static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3915{
dc593046 3916 PacketID* pid=boost::any_cast<PacketID>(&var);
a683e8bd 3917 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
09e6702a 3918 if(ret > 0) {
a683e8bd 3919 pid->outPos+=(ssize_t)ret;
667f7e60
BH
3920 if(pid->outPos==pid->outMSG.size()) {
3921 PacketID tmp=*pid;
bb4bdbaf 3922 t_fdm->removeWriteFD(fd);
09e6702a
BH
3923 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
3924 }
3925 }
3926 else { // error or EOF
667f7e60 3927 PacketID tmp(*pid);
bb4bdbaf 3928 t_fdm->removeWriteFD(fd);
09e6702a 3929 string sent;
998a4334 3930 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
09e6702a
BH
3931 }
3932}
3933
34801ab1 3934// resend event to everybody chained onto it
d187038c 3935static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
34801ab1 3936{
c647a254
OM
3937 // We close the chain for new entries, since they won't be processed anyway
3938 iter->key.closed = true;
3939
34801ab1
BH
3940 if(iter->key.chain.empty())
3941 return;
e27e91a8 3942 // cerr<<"doResends called!\n";
34801ab1
BH
3943 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
3944 resend.fd=-1;
3945 resend.id=*i;
e27e91a8 3946 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
4665c31e 3947
34801ab1
BH
3948 MT->sendEvent(resend, &content);
3949 g_stats.chainResends++;
34801ab1
BH
3950 }
3951}
3952
d187038c 3953static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 3954{
dc593046 3955 PacketID pid=boost::any_cast<PacketID>(var);
a683e8bd 3956 ssize_t len;
fae8fe07
RG
3957 std::string packet;
3958 packet.resize(g_outgoingEDNSBufsize);
996c89cc 3959 ComboAddress fromaddr;
09e6702a
BH
3960 socklen_t addrlen=sizeof(fromaddr);
3961
fae8fe07 3962 len=recvfrom(fd, &packet.at(0), packet.size(), 0, (sockaddr *)&fromaddr, &addrlen);
c1da7976 3963
a683e8bd 3964 if(len < (ssize_t) sizeof(dnsheader)) {
998a4334 3965 if(len < 0)
996c89cc 3966 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
09e6702a 3967 else {
3ddb9247 3968 g_stats.serverParseError++;
09e6702a 3969 if(g_logCommonErrors)
e6a9dde5 3970 g_log<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
e44d9fa7 3971 ": packet smaller than DNS header"<<endl;
998a4334 3972 }
34801ab1 3973
49a699c4 3974 t_udpclientsocks->returnSocket(fd);
34801ab1
BH
3975 string empty;
3976
3977 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3ddb9247 3978 if(iter != MT->d_waiters.end())
34801ab1 3979 doResends(iter, pid, empty);
3ddb9247 3980
34801ab1 3981 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
998a4334 3982 return;
3ddb9247 3983 }
998a4334 3984
fae8fe07 3985 packet.resize(len);
998a4334 3986 dnsheader dh;
fae8fe07 3987 memcpy(&dh, &packet.at(0), sizeof(dh));
3ddb9247 3988
6da3b3ad
PD
3989 PacketID pident;
3990 pident.remote=fromaddr;
3991 pident.id=dh.id;
3992 pident.fd=fd;
34801ab1 3993
33a928af 3994 if(!dh.qr && g_logCommonErrors) {
e6a9dde5 3995 g_log<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
6da3b3ad
PD
3996 }
3997
3998 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
3999 !dh.qr) { // one weird server
4000 pident.domain.clear();
4001 pident.type = 0;
4002 }
4003 else {
4004 try {
0b31e67e 4005 if(len > 12)
fae8fe07 4006 pident.domain=DNSName(&packet.at(0), len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
6da3b3ad
PD
4007 }
4008 catch(std::exception& e) {
4009 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
e6a9dde5 4010 g_log<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
6da3b3ad 4011 return;
34801ab1 4012 }
6da3b3ad 4013 }
34801ab1 4014
6da3b3ad
PD
4015 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
4016 if(iter != MT->d_waiters.end()) {
4017 doResends(iter, pident, packet);
4018 }
c1da7976 4019
6da3b3ad 4020retryWithName:
4957a608 4021
6da3b3ad 4022 if(!MT->sendEvent(pident, &packet)) {
9ec48f21
RG
4023 /* we did not find a match for this response, something is wrong */
4024
6da3b3ad 4025 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
027e8f47
RG
4026 for (MT_t::waiters_t::iterator mthread = MT->d_waiters.begin(); mthread != MT->d_waiters.end(); ++mthread) {
4027 if (pident.fd == mthread->key.fd && mthread->key.remote == pident.remote && mthread->key.type == pident.type &&
e325f20c 4028 pident.domain == mthread->key.domain) {
027e8f47
RG
4029 /* we are expecting an answer from that exact source, on that exact port (since we are using connected sockets), for that qname/qtype,
4030 but with a different message ID. That smells like a spoofing attempt. For now we will just increase the counter and will deal with
4031 that later. */
6da3b3ad 4032 mthread->key.nearMisses++;
998a4334 4033 }
6da3b3ad
PD
4034
4035 // be a bit paranoid here since we're weakening our matching
3ddb9247 4036 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
6da3b3ad
PD
4037 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
4038 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
4039 pident.domain = mthread->key.domain;
4040 pident.type = mthread->key.type;
4041 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
d4fb76e9 4042 }
09e6702a 4043 }
6da3b3ad
PD
4044 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
4045 if(g_logCommonErrors) {
e6a9dde5 4046 g_log<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
d8f6d49f 4047 }
09e6702a 4048 }
6da3b3ad 4049 else if(fd >= 0) {
9ec48f21 4050 /* we either found a waiter (1) or encountered an issue (-1), it's up to us to clean the socket anyway */
6da3b3ad
PD
4051 t_udpclientsocks->returnSocket(fd);
4052 }
09e6702a
BH
4053}
4054
050e6877 4055static FDMultiplexer* getMultiplexer()
1f4abb20
BH
4056{
4057 FDMultiplexer* ret;
f26bf547 4058 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
1f4abb20 4059 try {
f26bf547 4060 ret=i.second();
1f4abb20
BH
4061 return ret;
4062 }
98d0ee4a 4063 catch(FDMultiplexerException &fe) {
e6a9dde5 4064 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
98d0ee4a
BH
4065 }
4066 catch(...) {
e6a9dde5 4067 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
98d0ee4a 4068 }
1f4abb20 4069 }
e6a9dde5 4070 g_log<<Logger::Error<<"No working multiplexer found!"<<endl;
1f4abb20
BH
4071 exit(1);
4072}
4073
3ddb9247 4074
d187038c 4075static string* doReloadLuaScript()
4485aa35 4076{
674cf0f6 4077 string fname= ::arg()["lua-dns-script"];
4485aa35 4078 try {
674cf0f6 4079 if(fname.empty()) {
f26bf547 4080 t_pdl.reset();
377602e3 4081 g_log<<Logger::Info<<t_id<<" Unloaded current lua script"<<endl;
0f39c1a3 4082 return new string("unloaded\n");
4485aa35
BH
4083 }
4084 else {
9694e14f
AT
4085 t_pdl = std::make_shared<RecursorLua4>();
4086 t_pdl->loadFile(fname);
4485aa35
BH
4087 }
4088 }
fdbf35ac 4089 catch(std::exception& e) {
e6a9dde5 4090 g_log<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
0f39c1a3 4091 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
4485aa35 4092 }
3ddb9247 4093
e6a9dde5 4094 g_log<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
0f39c1a3 4095 return new string("(re)loaded '"+fname+"'\n");
4485aa35
BH
4096}
4097
49a699c4
BH
4098string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
4099{
3ddb9247 4100 if(begin != end)
49a699c4 4101 ::arg().set("lua-dns-script") = *begin;
3ddb9247 4102
0f39c1a3 4103 return broadcastAccFunction<string>(doReloadLuaScript);
3ddb9247 4104}
49a699c4 4105
d187038c 4106static string* pleaseUseNewTraceRegex(const std::string& newRegex)
77499b05
BH
4107try
4108{
4109 if(newRegex.empty()) {
f26bf547 4110 t_traceRegex.reset();
77499b05
BH
4111 return new string("unset\n");
4112 }
4113 else {
f26bf547 4114 t_traceRegex = std::make_shared<Regex>(newRegex);
77499b05
BH
4115 return new string("ok\n");
4116 }
4117}
3f81d239 4118catch(PDNSException& ae)
77499b05
BH
4119{
4120 return new string(ae.reason+"\n");
4121}
4122
4123string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
4124{
a88c05b2 4125 return broadcastAccFunction<string>([=]{ return pleaseUseNewTraceRegex(begin!=end ? *begin : ""); });
77499b05
BH
4126}
4127
4e9a20e6 4128static void checkLinuxIPv6Limits()
4129{
4130#ifdef __linux__
4131 string line;
4132 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
335da0ba 4133 int lim=std::stoi(line);
4e9a20e6 4134 if(lim < 16384) {
e6a9dde5 4135 g_log<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
4e9a20e6 4136 }
4137 }
4138#endif
4139}
36849ff2 4140static void checkOrFixFDS()
4e9a20e6 4141{
c0063e60 4142 unsigned int availFDs=getFilenumLimit();
4143 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
4144
4145 if(wantFDs > availFDs) {
067ad20e 4146 unsigned int hardlimit= getFilenumLimit(true);
4147 if(hardlimit >= wantFDs) {
c0063e60 4148 setFilenumLimit(wantFDs);
e6a9dde5 4149 g_log<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
36849ff2 4150 }
4151 else {
067ad20e 4152 int newval = (hardlimit - 25) / g_numWorkerThreads;
e6a9dde5 4153 g_log<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
36849ff2 4154 g_maxMThreads = newval;
067ad20e 4155 setFilenumLimit(hardlimit);
36849ff2 4156 }
4157 }
4e9a20e6 4158}
77499b05 4159
c390b2da 4160static void* recursorThread(unsigned int tid, const string& threadName);
51e2144e 4161
f26bf547 4162static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
49a699c4
BH
4163{
4164 t_allowFrom = ng;
f26bf547 4165 return nullptr;
49a699c4
BH
4166}
4167
dbd23fc2
BH
4168int g_argc;
4169char** g_argv;
4170
18af64a8 4171void parseACLs()
f7c1d4e3 4172{
18af64a8 4173 static bool l_initialized;
3ddb9247 4174
49a699c4 4175 if(l_initialized) { // only reload configuration file on second call
18af64a8 4176 string configname=::arg()["config-dir"]+"/recursor.conf";
3e63da83
JR
4177 if(::arg()["config-name"]!="") {
4178 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
4179 }
18af64a8 4180 cleanSlashes(configname);
3ddb9247
PD
4181
4182 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
7e818521 4183 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
49a699c4 4184 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
242b90e1 4185 ::arg().preParseFile(configname.c_str(), "include-dir");
829849d6
AT
4186 ::arg().preParse(g_argc, g_argv, "include-dir");
4187
4188 // then process includes
4189 std::vector<std::string> extraConfigs;
242b90e1
AT
4190 ::arg().gatherIncludes(extraConfigs);
4191
1dc8f4d0 4192 for(const std::string& fn : extraConfigs) {
7e818521 4193 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
4194 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
4195 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
4196 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
829849d6 4197 }
ca2c884c
AT
4198
4199 ::arg().preParse(g_argc, g_argv, "allow-from-file");
4200 ::arg().preParse(g_argc, g_argv, "allow-from");
f27e6356 4201 }
49a699c4 4202
f26bf547
RG
4203 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
4204 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
3ddb9247 4205
2c95fc65
BH
4206 if(!::arg()["allow-from-file"].empty()) {
4207 string line;
2c95fc65
BH
4208 ifstream ifs(::arg()["allow-from-file"].c_str());
4209 if(!ifs) {
9c61b9d0 4210 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2c95fc65
BH
4211 }
4212
4213 string::size_type pos;
4214 while(getline(ifs,line)) {
4215 pos=line.find('#');
4216 if(pos!=string::npos)
4217 line.resize(pos);
dc593046 4218 boost::trim(line);
2c95fc65
BH
4219 if(line.empty())
4220 continue;
4221
18af64a8 4222 allowFrom->addMask(line);
2c95fc65 4223 }
e6a9dde5 4224 g_log<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2c95fc65
BH
4225 }
4226 else if(!::arg()["allow-from"].empty()) {
f7c1d4e3
BH
4227 vector<string> ips;
4228 stringtok(ips, ::arg()["allow-from"], ", ");
3ddb9247 4229
e6a9dde5 4230 g_log<<Logger::Warning<<"Only allowing queries from: ";
f7c1d4e3 4231 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
18af64a8 4232 allowFrom->addMask(*i);
f7c1d4e3 4233 if(i!=ips.begin())
e6a9dde5
PL
4234 g_log<<Logger::Warning<<", ";
4235 g_log<<Logger::Warning<<*i;
f7c1d4e3 4236 }
e6a9dde5 4237 g_log<<Logger::Warning<<endl;
f7c1d4e3 4238 }
49a699c4 4239 else {
3ddb9247 4240 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
377602e3 4241 g_log<<Logger::Warning<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
f26bf547 4242 allowFrom = nullptr;
49a699c4 4243 }
3ddb9247 4244
49a699c4 4245 g_initialAllowFrom = allowFrom;
a88c05b2 4246 broadcastFunction([=]{ return pleaseSupplantACLs(allowFrom); });
f26bf547 4247 oldAllowFrom = nullptr;
3ddb9247 4248
49a699c4 4249 l_initialized = true;
18af64a8
BH
4250}
4251
795215f2 4252
756e82cf 4253static void setupDelegationOnly()
4254{
4255 vector<string> parts;
4256 stringtok(parts, ::arg()["delegation-only"], ", \t");
4257 for(const auto& p : parts) {
9065eb05 4258 SyncRes::addDelegationOnly(DNSName(p));
756e82cf 4259 }
4260}
795215f2 4261
8fd25133
RG
4262static std::map<unsigned int, std::set<int> > parseCPUMap()
4263{
4264 std::map<unsigned int, std::set<int> > result;
4265
4266 const std::string value = ::arg()["cpu-map"];
4267
4268 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
e6a9dde5 4269 g_log<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
8fd25133
RG
4270 return result;
4271 }
4272
4273 std::vector<std::string> parts;
4274
4275 stringtok(parts, value, " \t");
4276
4277 for(const auto& part : parts) {
4278 if (part.find('=') == string::npos)
4279 continue;
4280
4281 try {
4282 auto headers = splitField(part, '=');
dc593046
OM
4283 boost::trim(headers.first);
4284 boost::trim(headers.second);
8fd25133
RG
4285
4286 unsigned int threadId = pdns_stou(headers.first);
4287 std::vector<std::string> cpus;
4288
4289 stringtok(cpus, headers.second, ",");
4290
4291 for(const auto& cpu : cpus) {
4292 int cpuId = std::stoi(cpu);
4293
4294 result[threadId].insert(cpuId);
4295 }
4296 }
4297 catch(const std::exception& e) {
e6a9dde5 4298 g_log<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
8fd25133
RG
4299 }
4300 }
4301
4302 return result;
4303}
4304
4305static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
4306{
4307 const auto& cpuMapping = cpusMap.find(n);
4308 if (cpuMapping != cpusMap.cend()) {
4309 int rc = mapThreadToCPUList(tid, cpuMapping->second);
4310 if (rc == 0) {
e6a9dde5 4311 g_log<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
8fd25133 4312 for (const auto cpu : cpuMapping->second) {
e6a9dde5 4313 g_log<<Logger::Info<<" "<<cpu;
8fd25133 4314 }
e6a9dde5 4315 g_log<<Logger::Info<<endl;
8fd25133
RG
4316 }
4317 else {
e6a9dde5 4318 g_log<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
8fd25133 4319 for (const auto cpu : cpuMapping->second) {
e6a9dde5 4320 g_log<<Logger::Info<<" "<<cpu;
8fd25133 4321 }
e6a9dde5 4322 g_log<<Logger::Info<<strerror(rc)<<endl;
8fd25133
RG
4323 }
4324 }
4325}
4326
af1377b7
NC
4327#ifdef NOD_ENABLED
4328static void setupNODThread()
4329{
4330 if (g_nodEnabled) {
b78727c6
NC
4331 uint32_t num_cells = ::arg().asNum("new-domain-db-size");
4332 t_nodDBp = std::make_shared<nod::NODDB>(num_cells);
af1377b7
NC
4333 try {
4334 t_nodDBp->setCacheDir(::arg()["new-domain-history-dir"]);
4335 }
4336 catch (const PDNSException& e) {
4337 g_log<<Logger::Error<<"new-domain-history-dir (" << ::arg()["new-domain-history-dir"] << ") is not readable or does not exist"<<endl;
4338 _exit(1);
4339 }
4340 if (!t_nodDBp->init()) {
4341 g_log<<Logger::Error<<"Could not initialize domain tracking"<<endl;
4342 _exit(1);
4343 }
41c542ec 4344 std::thread t(nod::NODDB::startHousekeepingThread, t_nodDBp, std::this_thread::get_id());
af1377b7 4345 t.detach();
ca2526f5 4346 g_nod_pbtag = ::arg()["new-domain-pb-tag"];
41c542ec
NC
4347 }
4348 if (g_udrEnabled) {
b78727c6
NC
4349 uint32_t num_cells = ::arg().asNum("unique-response-db-size");
4350 t_udrDBp = std::make_shared<nod::UniqueResponseDB>(num_cells);
41c542ec
NC
4351 try {
4352 t_udrDBp->setCacheDir(::arg()["unique-response-history-dir"]);
4353 }
4354 catch (const PDNSException& e) {
4355 g_log<<Logger::Error<<"unique-response-history-dir (" << ::arg()["unique-response-history-dir"] << ") is not readable or does not exist"<<endl;
4356 _exit(1);
4357 }
4358 if (!t_udrDBp->init()) {
4359 g_log<<Logger::Error<<"Could not initialize unique response tracking"<<endl;
4360 _exit(1);
4361 }
4362 std::thread t(nod::UniqueResponseDB::startHousekeepingThread, t_udrDBp, std::this_thread::get_id());
af1377b7 4363 t.detach();
ca2526f5 4364 g_udr_pbtag = ::arg()["unique-response-pb-tag"];
af1377b7
NC
4365 }
4366}
4367
050e6877 4368static void parseNODWhitelist(const std::string& wlist)
af1377b7
NC
4369{
4370 vector<string> parts;
4371 stringtok(parts, wlist, ",; ");
4372 for(const auto& a : parts) {
4373 g_nodDomainWL.add(DNSName(a));
4374 }
4375}
4376
4377static void setupNODGlobal()
4378{
4379 // Setup NOD subsystem
4380 g_nodEnabled = ::arg().mustDo("new-domain-tracking");
4381 g_nodLookupDomain = DNSName(::arg()["new-domain-lookup"]);
4382 g_nodLog = ::arg().mustDo("new-domain-log");
4383 parseNODWhitelist(::arg()["new-domain-whitelist"]);
41c542ec
NC
4384
4385 // Setup Unique DNS Response subsystem
4386 g_udrEnabled = ::arg().mustDo("unique-response-tracking");
4387 g_udrLog = ::arg().mustDo("unique-response-log");
af1377b7
NC
4388}
4389#endif /* NOD_ENABLED */
4390
c6042a88 4391static void checkSocketDir(void)
0127f6bd
OM
4392{
4393 struct stat st;
4394 string dir(::arg()["socket-dir"]);
4395 string msg;
c6042a88 4396
0127f6bd
OM
4397 if (stat(dir.c_str(), &st) == -1) {
4398 msg = "it does not exist or cannot access";
4399 }
4400 else if (!S_ISDIR(st.st_mode)) {
4401 msg = "it is not a directory";
4402 }
4403 else if (access(dir.c_str(), R_OK | W_OK | X_OK) != 0) {
4404 msg = "cannot read, write or search";
4405 } else {
4406 return;
4407 }
4408 g_log << Logger::Error << "Problem with socket directory " << dir << ": " << msg << "; see https://docs.powerdns.com/recursor/upgrade.html#x-to-4-3-0-or-master" << endl;
4409 _exit(1);
4410}
4411
d187038c 4412static int serviceMain(int argc, char*argv[])
18af64a8 4413{
bff61896
OM
4414 int ret = EXIT_SUCCESS;
4415
e6a9dde5
PL
4416 g_log.setName(s_programname);
4417 g_log.disableSyslog(::arg().mustDo("disable-syslog"));
4418 g_log.setTimestamps(::arg().mustDo("log-timestamp"));
18af64a8
BH
4419
4420 if(!::arg()["logging-facility"].empty()) {
f8499e52
BH
4421 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
4422 if(val >= 0)
e6a9dde5 4423 g_log.setFacility(val);
18af64a8 4424 else
e6a9dde5 4425 g_log<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
18af64a8
BH
4426 }
4427
ba1a571d 4428 showProductVersion();
3afde9b2 4429
06ea9015 4430 g_disthashseed=dns_random(0xffffffff);
4431
b7ef5828
PL
4432 checkLinuxIPv6Limits();
4433 try {
20829585 4434 pdns::parseQueryLocalAddress(::arg()["query-local-address"]);
b7ef5828
PL
4435 }
4436 catch(std::exception& e) {
e6a9dde5 4437 g_log<<Logger::Error<<"Assigning local query addresses: "<<e.what();
b7ef5828
PL
4438 exit(99);
4439 }
4440
7b1d1a7d
PL
4441 if(pdns::isQueryLocalAddressFamilyEnabled(AF_INET)) {
4442 SyncRes::s_doIPv4=true;
4443 g_log<<Logger::Warning<<"Enabling IPv4 transport for outgoing queries"<<endl;
4444 }
4445 else {
094ab9a3 4446 g_log<<Logger::Warning<<"NOT using IPv4 for outgoing queries - add an IPv4 address (like '0.0.0.0') to query-local-address to enable"<<endl;
7b1d1a7d
PL
4447 }
4448
4449
20829585
PL
4450 if(pdns::isQueryLocalAddressFamilyEnabled(AF_INET6)) {
4451 SyncRes::s_doIPv6=true;
4452 g_log<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
4453 }
4454 else {
ea02eeba 4455 g_log<<Logger::Warning<<"NOT using IPv6 for outgoing queries - add an IPv6 address (like '::') to query-local-address to enable"<<endl;
20829585
PL
4456 }
4457
76a89c3a
PL
4458 if (!SyncRes::s_doIPv6 && !SyncRes::s_doIPv4) {
4459 g_log<<Logger::Error<<"No outgoing addresses configured! Can not continue"<<endl;
094ab9a3
PL
4460 exit(99);
4461 }
4462
e48c6b8a
PL
4463 // keep this ABOVE loadRecursorLuaConfig!
4464 if(::arg()["dnssec"]=="off")
4465 g_dnssecmode=DNSSECMode::Off;
4466 else if(::arg()["dnssec"]=="process-no-validate")
4467 g_dnssecmode=DNSSECMode::ProcessNoValidate;
4468 else if(::arg()["dnssec"]=="process")
4469 g_dnssecmode=DNSSECMode::Process;
4470 else if(::arg()["dnssec"]=="validate")
4471 g_dnssecmode=DNSSECMode::ValidateAll;
4472 else if(::arg()["dnssec"]=="log-fail")
4473 g_dnssecmode=DNSSECMode::ValidateForLog;
4474 else {
e6a9dde5 4475 g_log<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
e48c6b8a
PL
4476 exit(1);
4477 }
4478
9a3ab3e4
KM
4479 g_signatureInceptionSkew = ::arg().asNum("signature-inception-skew");
4480 if (g_signatureInceptionSkew < 0) {
4481 g_log<<Logger::Error<<"A negative value for 'signature-inception-skew' is not allowed"<<endl;
4482 exit(1);
4483 }
4484
e48c6b8a 4485 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
d377bb54 4486 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
e48c6b8a 4487
a6f7f5fe 4488 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
4489 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
e6ec15bf
RG
4490
4491 luaConfigDelayedThreads delayedLuaThreads;
0f5785a6 4492 try {
e6ec15bf 4493 loadRecursorLuaConfig(::arg()["lua-config-file"], delayedLuaThreads);
0f5785a6
PL
4494 }
4495 catch (PDNSException &e) {
e6a9dde5 4496 g_log<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
0f5785a6
PL
4497 exit(1);
4498 }
ad42489c 4499
18af64a8 4500 parseACLs();
d6f3fcfa 4501 initPublicSuffixList(::arg()["public-suffix-list-file"]);
92011b8f 4502
eb5bae86 4503 if(!::arg()["dont-query"].empty()) {
eb5bae86
BH
4504 vector<string> ips;
4505 stringtok(ips, ::arg()["dont-query"], ", ");
66e0b6ea
BH
4506 ips.push_back("0.0.0.0");
4507 ips.push_back("::");
c36bc97a 4508
e6a9dde5 4509 g_log<<Logger::Warning<<"Will not send queries to: ";
eb5bae86 4510 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
9065eb05 4511 SyncRes::addDontQuery(*i);
eb5bae86 4512 if(i!=ips.begin())
e6a9dde5
PL
4513 g_log<<Logger::Warning<<", ";
4514 g_log<<Logger::Warning<<*i;
eb5bae86 4515 }
e6a9dde5 4516 g_log<<Logger::Warning<<endl;
eb5bae86
BH
4517 }
4518
f7c1d4e3 4519 g_quiet=::arg().mustDo("quiet");
3ddb9247 4520
b243ca3b 4521 /* this needs to be done before parseACLs(), which call broadcastFunction() */
1bc3c142
BH
4522 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
4523 if(g_weDistributeQueries) {
b243ca3b 4524 g_log<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
1bc3c142 4525 }
3ddb9247 4526
756e82cf 4527 setupDelegationOnly();
b33c2462 4528 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
756e82cf 4529
77499b05
BH
4530 if(::arg()["trace"]=="fail") {
4531 SyncRes::setDefaultLogMode(SyncRes::Store);
4532 }
4533 else if(::arg().mustDo("trace")) {
4534 SyncRes::setDefaultLogMode(SyncRes::Log);
f7c1d4e3
BH
4535 ::arg().set("quiet")="no";
4536 g_quiet=false;
3e9c6c0a 4537 g_dnssecLOG=true;
f7c1d4e3 4538 }
43a9b290
PL
4539 string myHostname = getHostname();
4540 if (myHostname == "UNKNOWN"){
4541 g_log<<Logger::Warning<<"Unable to get the hostname, NSID and id.server values will be empty"<<endl;
4542 myHostname = "";
d0983bff 4543 }
3ddb9247 4544
aadceba8 4545 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
5cf4b2e7 4546 SyncRes::s_minimumECSTTL = ::arg().asNum("ecs-minimum-ttl-override");
aadceba8 4547
1051f8a9
BH
4548 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
4549
f7c1d4e3 4550 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
b9473937 4551 SyncRes::s_maxbogusttl=::arg().asNum("max-cache-bogus-ttl");
63637fd8 4552 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
1051f8a9 4553 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
79ec0627
PL
4554 // Cap the packetcache-servfail-ttl to the packetcache-ttl
4555 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
4556 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
628e2c7b
PA
4557 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
4558 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
f7c1d4e3 4559 SyncRes::s_serverID=::arg()["server-id"];
173d790e 4560 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
86f95f85 4561 SyncRes::s_maxnsaddressqperq=::arg().asNum("max-ns-address-qperq");
9de3e034 4562 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
7c3398aa 4563 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
01402d56 4564 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
f7c1d4e3 4565 if(SyncRes::s_serverID.empty()) {
d0983bff 4566 SyncRes::s_serverID = myHostname;
f7c1d4e3 4567 }
3ddb9247 4568
e9f9b8ec
RG
4569 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
4570 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
c9783016 4571 SyncRes::clearECSStats();
fd8898fb 4572 SyncRes::s_ecsipv4cachelimit = ::arg().asNum("ecs-ipv4-cache-bits");
4573 SyncRes::s_ecsipv6cachelimit = ::arg().asNum("ecs-ipv6-cache-bits");
ed9019c9 4574 SyncRes::s_ecscachelimitttl = ::arg().asNum("ecs-cache-limit-ttl");
e9f9b8ec 4575
116d1288 4576 SyncRes::s_qnameminimization = ::arg().mustDo("qname-minimization");
d40a915b 4577
409b8398
OM
4578 if (SyncRes::s_qnameminimization) {
4579 // With an empty cache, a rev ipv6 query with dnssec enabled takes
4580 // almost 100 queries. Default maxqperq is 60.
13c43bdd 4581 SyncRes::s_maxqperq = std::max(SyncRes::s_maxqperq, static_cast<unsigned int>(100));
409b8398
OM
4582 }
4583
d40a915b
OM
4584 SyncRes::s_hardenNXD = SyncRes::HardenNXD::DNSSEC;
4585 string value = ::arg()["nothing-below-nxdomain"];
4586 if (value == "yes") {
4587 SyncRes::s_hardenNXD = SyncRes::HardenNXD::Yes;
4588 } else if (value == "no") {
4589 SyncRes::s_hardenNXD = SyncRes::HardenNXD::No;
4590 } else if (value != "dnssec") {
4591 g_log << Logger::Error << "Unknown nothing-below-nxdomain mode: " << value << endl;
4592 exit(1);
4593 }
116d1288 4594
8a3a3822
RG
4595 if (!::arg().isEmpty("ecs-scope-zero-address")) {
4596 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
4597 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
4598 }
4599 else {
20829585
PL
4600 Netmask nm;
4601 bool done = false;
4602
4603 auto addr = pdns::getNonAnyQueryLocalAddress(AF_INET);
4604 if (addr.sin4.sin_family != 0) {
4605 nm = Netmask(addr, 32);
4606 done = true;
8a3a3822 4607 }
20829585
PL
4608 if (!done) {
4609 addr = pdns::getNonAnyQueryLocalAddress(AF_INET6);
4610 if (addr.sin4.sin_family != 0) {
4611 nm = Netmask(addr, 128);
4612 done = true;
8a3a3822
RG
4613 }
4614 }
20829585
PL
4615 if (!done) {
4616 nm = Netmask(ComboAddress("127.0.0.1"), 32);
8a3a3822 4617 }
20829585 4618 SyncRes::setECSScopeZeroAddress(nm);
8a3a3822
RG
4619 }
4620
2fe3354d
CH
4621 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
4622 SyncRes::parseEDNSSubnetAddFor(::arg()["ecs-add-for"]);
4623 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
4624
5cc8371b 4625 g_XPFAcl.toMasks(::arg()["xpf-allow-from"]);
59cb4a79 4626 g_xpfRRCode = ::arg().asNum("xpf-rr-code");
5cc8371b 4627
38d8b937 4628 g_proxyProtocolACL.toMasks(::arg()["proxy-protocol-from"]);
5216ddcc
RG
4629 g_proxyProtocolMaximumSize = ::arg().asNum("proxy-protocol-maximum-size");
4630
ef3ee606 4631 if (!::arg()["dns64-prefix"].empty()) {
75e31a0b
RG
4632 try {
4633 auto dns64Prefix = Netmask(::arg()["dns64-prefix"]);
4634 if (dns64Prefix.getBits() != 96) {
e9c3c56f 4635 g_log << Logger::Error << "Invalid prefix for 'dns64-prefix', the current implementation only supports /96 prefixes: " << ::arg()["dns64-prefix"] << endl;
75e31a0b
RG
4636 exit(1);
4637 }
4638 g_dns64Prefix = dns64Prefix.getNetwork();
4639 g_dns64PrefixReverse = reverseNameFromIP(*g_dns64Prefix);
4640 /* /96 is 24 nibbles + 2 for "ip6.arpa." */
4641 while (g_dns64PrefixReverse.countLabels() > 26) {
4642 g_dns64PrefixReverse.chopOff();
4643 }
ef3ee606 4644 }
75e31a0b
RG
4645 catch (const NetmaskException& ne) {
4646 g_log << Logger::Error << "Invalid prefix '" << ::arg()["dns64-prefix"] << "' for 'dns64-prefix': " << ne.reason << endl;
4647 exit(1);
ef3ee606
RG
4648 }
4649 }
4650
5b0ddd18 4651 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
bb4bdbaf 4652
49a699c4 4653 g_initialDomainMap = parseAuthAndForwards();
3ddb9247 4654
08f3f638 4655 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3ddb9247 4656
f7c1d4e3 4657 g_logCommonErrors=::arg().mustDo("log-common-errors");
98d36505 4658 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
e661a20b
PD
4659
4660 g_anyToTcp = ::arg().mustDo("any-to-tcp");
a09a8ce0
PD
4661 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
4662
b3adda56
PD
4663 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
4664
b243ca3b 4665 g_numDistributorThreads = ::arg().asNum("distributor-threads");
810ff705 4666 g_numWorkerThreads = ::arg().asNum("threads");
1c4f2e1b 4667 if (g_numWorkerThreads < 1) {
e6a9dde5 4668 g_log<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
1c4f2e1b
RG
4669 g_numWorkerThreads = 1;
4670 }
4671
b243ca3b 4672 g_numThreads = g_numDistributorThreads + g_numWorkerThreads;
810ff705
RG
4673 g_maxMThreads = ::arg().asNum("max-mthreads");
4674
c51c551e
OM
4675
4676 int64_t maxInFlight = ::arg().asNum("max-concurrent-requests-per-tcp-connection");
4677 if (maxInFlight < 1 || maxInFlight > USHRT_MAX || maxInFlight >= g_maxMThreads) {
4678 g_log<<Logger::Warning<<"Asked to run with illegal max-concurrent-requests-per-tcp-connection, setting to default (10)"<<endl;
4679 TCPConnection::s_maxInFlight = 10;
4680 } else {
4681 TCPConnection::s_maxInFlight = maxInFlight;
4682 }
c51c551e 4683
00b8cadc
RG
4684 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
4685
0ec489bf 4686 g_statisticsInterval = ::arg().asNum("statistics-interval");
4687
699bc10a 4688 s_addExtendedResolutionDNSErrors = ::arg().mustDo("extended-resolution-errors");
94e2a9b0 4689
559b6c93
PL
4690 {
4691 SuffixMatchNode dontThrottleNames;
4692 vector<string> parts;
52858314 4693 stringtok(parts, ::arg()["dont-throttle-names"], " ,");
559b6c93
PL
4694 for (const auto &p : parts) {
4695 dontThrottleNames.add(DNSName(p));
4696 }
d514bd03 4697 g_dontThrottleNames.setState(std::move(dontThrottleNames));
559b6c93 4698
bc3d2b73 4699 parts.clear();
559b6c93 4700 NetmaskGroup dontThrottleNetmasks;
52858314 4701 stringtok(parts, ::arg()["dont-throttle-netmasks"], " ,");
559b6c93
PL
4702 for (const auto &p : parts) {
4703 dontThrottleNetmasks.addMask(Netmask(p));
4704 }
d514bd03 4705 g_dontThrottleNetmasks.setState(std::move(dontThrottleNetmasks));
559b6c93
PL
4706 }
4707
144040be 4708 s_balancingFactor = ::arg().asDouble("distribution-load-factor");
078be17f
RG
4709 if (s_balancingFactor != 0.0 && s_balancingFactor < 1.0) {
4710 s_balancingFactor = 0.0;
4711 g_log<<Logger::Warning<<"Asked to run with a distribution-load-factor below 1.0, disabling it instead"<<endl;
4712 }
144040be 4713
810ff705
RG
4714#ifdef SO_REUSEPORT
4715 g_reusePort = ::arg().mustDo("reuseport");
4716#endif
4717
b243ca3b 4718 s_threadInfos.resize(g_numDistributorThreads + g_numWorkerThreads + /* handler */ 1);
810ff705 4719
b243ca3b
RG
4720 if (g_reusePort) {
4721 if (g_weDistributeQueries) {
4722 /* first thread is the handler, then distributors */
4723 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
4724 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
adb6cd72 4725 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
b243ca3b 4726 makeUDPServerSockets(deferredAdds);
adb6cd72 4727 makeTCPServerSockets(deferredAdds, tcpSockets);
b243ca3b
RG
4728 }
4729 }
4730 else {
4731 /* first thread is the handler, there is no distributor here and workers are accepting queries */
4732 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
4733 auto& deferredAdds = s_threadInfos.at(threadId).deferredAdds;
adb6cd72 4734 auto& tcpSockets = s_threadInfos.at(threadId).tcpSockets;
b243ca3b 4735 makeUDPServerSockets(deferredAdds);
adb6cd72 4736 makeTCPServerSockets(deferredAdds, tcpSockets);
b243ca3b 4737 }
810ff705
RG
4738 }
4739 }
4740 else {
c47f201b 4741 std::set<int> tcpSockets;
b243ca3b
RG
4742 /* we don't have reuseport so we can only open one socket per
4743 listening addr:port and everyone will listen on it */
4744 makeUDPServerSockets(g_deferredAdds);
c47f201b
RG
4745 makeTCPServerSockets(g_deferredAdds, tcpSockets);
4746
4747 /* every listener (so distributor if g_weDistributeQueries, workers otherwise)
4748 needs to listen to the shared sockets */
4749 if (g_weDistributeQueries) {
4750 /* first thread is the handler, then distributors */
4751 for (unsigned int threadId = 1; threadId <= g_numDistributorThreads; threadId++) {
4752 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
4753 }
4754 }
4755 else {
4756 /* first thread is the handler, there is no distributor here and workers are accepting queries */
4757 for (unsigned int threadId = 1; threadId <= g_numWorkerThreads; threadId++) {
4758 s_threadInfos.at(threadId).tcpSockets = tcpSockets;
4759 }
4760 }
810ff705 4761 }
815099b2 4762
af1377b7
NC
4763#ifdef NOD_ENABLED
4764 // Setup newly observed domain globals
4765 setupNODGlobal();
4766#endif /* NOD_ENABLED */
4767
677e2a46
BH
4768 int forks;
4769 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
1bc3c142
BH
4770 if(!fork()) // we are child
4771 break;
4772 }
3ddb9247 4773
f7c1d4e3 4774 if(::arg().mustDo("daemon")) {
e6a9dde5
PL
4775 g_log<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
4776 g_log.toConsole(Logger::Critical);
f7c1d4e3
BH
4777 daemonize();
4778 }
cd180a71 4779 if(Utility::getpid() == 1) {
807db6c8
FL
4780 /* We are running as pid 1, register sigterm and sigint handler
4781
4782 The Linux kernel will handle SIGTERM and SIGINT for all processes, except PID 1.
4783 It assumes that the processes running as pid 1 is an "init" like system.
4784 For years, this was a safe assumption, but containers change that: in
4785 most (all?) container implementations, the application itself is running
4786 as pid 1. This means that sending signals to those applications, will not
ef2ea4bf 4787 be handled by default. Results might be "your container not responding
dda61e20
FL
4788 when asking it to stop", or "ctrl-c not working even when the app is
4789 running in the foreground inside a container".
807db6c8
FL
4790
4791 So TL;DR: If we're running pid 1 (container), we should handle SIGTERM and SIGINT ourselves */
4792
cd180a71
FL
4793 signal(SIGTERM,termIntHandler);
4794 signal(SIGINT,termIntHandler);
4795 }
4796
f7c1d4e3
BH
4797 signal(SIGUSR1,usr1Handler);
4798 signal(SIGUSR2,usr2Handler);
4799 signal(SIGPIPE,SIG_IGN);
810ff705 4800
a6414fdc 4801 checkOrFixFDS();
3ddb9247 4802
d1b28475
KM
4803#ifdef HAVE_LIBSODIUM
4804 if (sodium_init() == -1) {
e6a9dde5 4805 g_log<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
d1b28475
KM
4806 exit(99);
4807 }
4808#endif
4809
3afde9b2
PL
4810 openssl_thread_setup();
4811 openssl_seed();
e97cb679
AT
4812 /* setup rng before chroot */
4813 dns_random_init();
3afde9b2 4814
bdbb07e0 4815 if(::arg()["server-id"].empty()) {
d0983bff 4816 ::arg().set("server-id") = myHostname;
bdbb07e0
PL
4817 }
4818
138435cb
BH
4819 int newgid=0;
4820 if(!::arg()["setgid"].empty())
2211dac9 4821 newgid = strToGID(::arg()["setgid"]);
138435cb
BH
4822 int newuid=0;
4823 if(!::arg()["setuid"].empty())
2211dac9 4824 newuid = strToUID(::arg()["setuid"]);
138435cb 4825
f1d6a7ce
KM
4826 Utility::dropGroupPrivs(newuid, newgid);
4827
138435cb 4828 if (!::arg()["chroot"].empty()) {
75336810
PL
4829#ifdef HAVE_SYSTEMD
4830 char *ns;
4831 ns = getenv("NOTIFY_SOCKET");
4832 if (ns != nullptr) {
e6a9dde5 4833 g_log<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
75336810
PL
4834 exit(1);
4835 }
4836#endif
138435cb 4837 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
a2a81d42
OM
4838 int err = errno;
4839 g_log<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (err)<<", exiting"<<endl;
4840 exit(1);
138435cb 4841 }
f0f3f0b0 4842 else
377602e3 4843 g_log<<Logger::Info<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
138435cb
BH
4844 }
4845
c6042a88
OM
4846 checkSocketDir();
4847
f0f3f0b0
PL
4848 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
4849 if(!s_pidfname.empty())
4850 unlink(s_pidfname.c_str()); // remove possible old pid file
4851 writePid();
4852
4853 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
4854
f1d6a7ce 4855 Utility::dropUserPrivs(newuid);
1f2b341e
RG
4856 try {
4857 /* we might still have capabilities remaining, for example if we have been started as root
4858 without --setuid (please don't do that) or as an unprivileged user with ambient capabilities
4859 like CAP_NET_BIND_SERVICE.
4860 */
4861 dropCapabilities();
4862 }
4863 catch(const std::exception& e) {
4864 g_log<<Logger::Warning<<e.what()<<endl;
4865 }
c0063e60 4866
e6ec15bf
RG
4867 startLuaConfigDelayedThreads(delayedLuaThreads, g_luaconfs.getCopy().generation);
4868
49a699c4 4869 makeThreadPipes();
3ddb9247 4870
5d4dd7fe
BH
4871 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
4872 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
fde296a3 4873 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
a5886e6a 4874 s_maxUDPQueriesPerRound=::arg().asNum("max-udp-queries-per-round");
343257a4 4875
c29d820c
RG
4876 g_useKernelTimestamp = ::arg().mustDo("protobuf-use-kernel-timestamp");
4877
563517f3
RG
4878 blacklistStats(StatComponent::API, ::arg()["stats-api-blacklist"]);
4879 blacklistStats(StatComponent::Carbon, ::arg()["stats-carbon-blacklist"]);
4880 blacklistStats(StatComponent::RecControl, ::arg()["stats-rec-control-blacklist"]);
4881 blacklistStats(StatComponent::SNMP, ::arg()["stats-snmp-blacklist"]);
72259676 4882
d705aad9
RG
4883 if (::arg().mustDo("snmp-agent")) {
4884 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
4885 g_snmpAgent->run();
4886 }
4887
b47026fd 4888 int port = ::arg().asNum("udp-source-port-min");
58da9034 4889 if(port < 1024 || port > 65535){
e6a9dde5 4890 g_log<<Logger::Error<<"Unable to launch, udp-source-port-min is not a valid port number"<<endl;
bf6f28ca
CHB
4891 exit(99); // this isn't going to fix itself either
4892 }
4893 s_minUdpSourcePort = port;
b47026fd 4894 port = ::arg().asNum("udp-source-port-max");
58da9034 4895 if(port < 1024 || port > 65535 || port < s_minUdpSourcePort){
e6a9dde5 4896 g_log<<Logger::Error<<"Unable to launch, udp-source-port-max is not a valid port number or is smaller than udp-source-port-min"<<endl;
bf6f28ca
CHB
4897 exit(99); // this isn't going to fix itself either
4898 }
4899 s_maxUdpSourcePort = port;
4900 std::vector<string> parts {};
b47026fd 4901 stringtok(parts, ::arg()["udp-source-port-avoid"], ", ");
bf6f28ca
CHB
4902 for (const auto &part : parts)
4903 {
4904 port = std::stoi(part);
58da9034 4905 if(port < 1024 || port > 65535){
e6a9dde5 4906 g_log<<Logger::Error<<"Unable to launch, udp-source-port-avoid contains an invalid port number: "<<part<<endl;
bf6f28ca
CHB
4907 exit(99); // this isn't going to fix itself either
4908 }
4909 s_avoidUdpSourcePorts.insert(port);
4910 }
4911
b243ca3b 4912 unsigned int currentThreadId = 1;
8fd25133 4913 const auto cpusMap = parseCPUMap();
d77abca1 4914
c3828c03 4915 if(g_numThreads == 1) {
e6a9dde5 4916 g_log<<Logger::Warning<<"Operating unthreaded"<<endl;
6b6720de
PL
4917#ifdef HAVE_SYSTEMD
4918 sd_notify(0, "READY=1");
4919#endif
b243ca3b
RG
4920
4921 /* This thread handles the web server, carbon, statistics and the control channel */
4922 auto& handlerInfos = s_threadInfos.at(0);
4923 handlerInfos.isHandler = true;
c390b2da 4924 handlerInfos.thread = std::thread(recursorThread, 0, "main");
b243ca3b
RG
4925
4926 setCPUMap(cpusMap, currentThreadId, pthread_self());
4927
4928 auto& infos = s_threadInfos.at(currentThreadId);
4929 infos.isListener = true;
4930 infos.isWorker = true;
c390b2da 4931 recursorThread(currentThreadId++, "worker");
8657c2af
OM
4932
4933 handlerInfos.thread.join();
bff61896
OM
4934 if (handlerInfos.exitCode != 0) {
4935 ret = handlerInfos.exitCode;
4936 }
76698c6e
BH
4937 }
4938 else {
8fd25133 4939
ef31b090
OM
4940
4941 if (g_weDistributeQueries) {
4942 for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
4943 auto& infos = s_threadInfos.at(currentThreadId + n);
4944 infos.isListener = true;
4945 }
4946 }
4947 for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
4948 auto& infos = s_threadInfos.at(currentThreadId + (g_weDistributeQueries ? g_numDistributorThreads : 0) + n);
4949 infos.isListener = !g_weDistributeQueries;
4950 infos.isWorker = true;
4951 }
4952
b243ca3b
RG
4953 if (g_weDistributeQueries) {
4954 g_log<<Logger::Warning<<"Launching "<< g_numDistributorThreads <<" distributor threads"<<endl;
4955 for(unsigned int n=0; n < g_numDistributorThreads; ++n) {
4956 auto& infos = s_threadInfos.at(currentThreadId);
c390b2da 4957 infos.thread = std::thread(recursorThread, currentThreadId++, "distr");
b243ca3b
RG
4958 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
4959 }
4960 }
8fd25133 4961
62b549e0
RG
4962 g_log<<Logger::Warning<<"Launching "<< g_numWorkerThreads <<" worker threads"<<endl;
4963
b243ca3b
RG
4964 for(unsigned int n=0; n < g_numWorkerThreads; ++n) {
4965 auto& infos = s_threadInfos.at(currentThreadId);
c390b2da 4966 infos.thread = std::thread(recursorThread, currentThreadId++, "worker");
b243ca3b 4967 setCPUMap(cpusMap, currentThreadId, infos.thread.native_handle());
76698c6e 4968 }
b243ca3b 4969
6b6720de
PL
4970#ifdef HAVE_SYSTEMD
4971 sd_notify(0, "READY=1");
4972#endif
b243ca3b
RG
4973
4974 /* This thread handles the web server, carbon, statistics and the control channel */
4975 auto& infos = s_threadInfos.at(0);
4976 infos.isHandler = true;
c390b2da 4977 infos.thread = std::thread(recursorThread, 0, "web+stat");
b243ca3b 4978
8657c2af
OM
4979 for (auto & ti : s_threadInfos) {
4980 ti.thread.join();
bff61896
OM
4981 if (ti.exitCode != 0) {
4982 ret = ti.exitCode;
4983 }
8657c2af 4984 }
bb4bdbaf 4985 }
da966ae0
OM
4986
4987#ifdef HAVE_PROTOBUF
4988 google::protobuf::ShutdownProtobufLibrary();
4989#endif /* HAVE_PROTOBUF */
bff61896 4990 return ret;
bb4bdbaf
BH
4991}
4992
c390b2da 4993static void* recursorThread(unsigned int n, const string& threadName)
bb4bdbaf
BH
4994try
4995{
d77abca1 4996 t_id=n;
b243ca3b 4997 auto& threadInfo = s_threadInfos.at(t_id);
c390b2da
PL
4998
4999 static string threadPrefix = "pdns-r/";
519f5484 5000 setThreadName(threadPrefix + threadName);
c390b2da 5001
49a699c4 5002 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
a712cb56 5003 SyncRes::setDomainMap(g_initialDomainMap);
49a699c4 5004 t_allowFrom = g_initialAllowFrom;
f26bf547
RG
5005 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
5006 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
b3dd32d2 5007
eb7fae64 5008 if (threadInfo.isHandler) {
b3dd32d2 5009 if (!primeHints()) {
bff61896 5010 threadInfo.exitCode = EXIT_FAILURE;
dc3831aa 5011 RecursorControlChannel::stop = 1;
bff61896
OM
5012 g_log<<Logger::Critical<<"Priming cache failed, stopping"<<endl;
5013 return nullptr;
b3dd32d2 5014 }
eb7fae64
OM
5015 g_log<<Logger::Warning<<"Done priming cache with root hints"<<endl;
5016 }
3ddb9247 5017
f26bf547 5018 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
3ddb9247 5019
3ddb9247 5020
af1377b7 5021#ifdef NOD_ENABLED
41c542ec
NC
5022 if (threadInfo.isWorker)
5023 setupNODThread();
af1377b7 5024#endif /* NOD_ENABLED */
c1751a59
RG
5025
5026 /* the listener threads handle TCP queries */
5027 if(threadInfo.isWorker || threadInfo.isListener) {
5b388d28
PD
5028 try {
5029 if(!::arg()["lua-dns-script"].empty()) {
5030 t_pdl = std::make_shared<RecursorLua4>();
5031 t_pdl->loadFile(::arg()["lua-dns-script"]);
5032 g_log<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
5033 }
5034 }
5035 catch(std::exception &e) {
5036 g_log<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
5037 _exit(99);
674cf0f6 5038 }
674cf0f6 5039 }
3ddb9247 5040
f8f243b0 5041 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
92011b8f 5042 if(ringsize) {
f26bf547 5043 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
b243ca3b
RG
5044 if(g_weDistributeQueries)
5045 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries") / g_numDistributorThreads);
f8f243b0 5046 else
3ddb9247 5047 t_remotes->set_capacity(ringsize);
f26bf547 5048 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 5049 t_servfailremotes->set_capacity(ringsize);
66f2e6ad
KM
5050 t_bogusremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
5051 t_bogusremotes->set_capacity(ringsize);
f26bf547 5052 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 5053 t_largeanswerremotes->set_capacity(ringsize);
621ccf89 5054 t_timeouts = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
5055 t_timeouts->set_capacity(ringsize);
92011b8f 5056
f26bf547 5057 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 5058 t_queryring->set_capacity(ringsize);
f26bf547 5059 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 5060 t_servfailqueryring->set_capacity(ringsize);
66f2e6ad
KM
5061 t_bogusqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
5062 t_bogusqueryring->set_capacity(ringsize);
92011b8f 5063 }
f26bf547 5064 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
144040be 5065 threadInfo.mt = MT.get();
3ddb9247 5066
63341e8d
RG
5067#ifdef HAVE_PROTOBUF
5068 /* start protobuf export threads if needed */
5069 auto luaconfsLocal = g_luaconfs.getLocal();
5070 checkProtobufExport(luaconfsLocal);
5071 checkOutgoingProtobufExport(luaconfsLocal);
5072#endif /* HAVE_PROTOBUF */
b9fa43e0
OM
5073#ifdef HAVE_FSTRM
5074 checkFrameStreamExport(luaconfsLocal);
5075#endif
63341e8d 5076
bb4bdbaf
BH
5077 PacketID pident;
5078
5079 t_fdm=getMultiplexer();
d77abca1 5080
da966ae0
OM
5081 RecursorWebServer *rws = nullptr;
5082
b243ca3b 5083 if(threadInfo.isHandler) {
d07bf7ff 5084 if(::arg().mustDo("webserver")) {
e6a9dde5 5085 g_log<<Logger::Warning << "Enabling web server" << endl;
8989097d 5086 try {
da966ae0 5087 rws = new RecursorWebServer(t_fdm);
8989097d
CH
5088 }
5089 catch(PDNSException &e) {
e6a9dde5 5090 g_log<<Logger::Error<<"Exception: "<<e.reason<<endl;
8989097d
CH
5091 exit(99);
5092 }
f3d1d67b 5093 }
377602e3 5094 g_log<<Logger::Info<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
f3d1d67b 5095 }
810ff705 5096 else {
d77abca1 5097
b243ca3b
RG
5098 t_fdm->addReadFD(threadInfo.pipes.readToThread, handlePipeRequest);
5099 t_fdm->addReadFD(threadInfo.pipes.readQueriesToThread, handlePipeRequest);
5100
5101 if (threadInfo.isListener) {
5102 if (g_reusePort) {
5103 /* then every listener has its own FDs */
7af99dff 5104 for(const auto& deferred : threadInfo.deferredAdds) {
b243ca3b
RG
5105 t_fdm->addReadFD(deferred.first, deferred.second);
5106 }
810ff705 5107 }
b243ca3b
RG
5108 else {
5109 /* otherwise all listeners are listening on the same ones */
7af99dff 5110 for(const auto& deferred : g_deferredAdds) {
b243ca3b 5111 t_fdm->addReadFD(deferred.first, deferred.second);
d77abca1
RG
5112 }
5113 }
5114 }
810ff705 5115 }
3ddb9247 5116
b0b37121 5117 registerAllStats();
d77abca1 5118
b243ca3b 5119 if(threadInfo.isHandler) {
674cf0f6
BH
5120 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
5121 }
1bc3c142 5122
f7c1d4e3 5123 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3ddb9247 5124
f7c1d4e3 5125 bool listenOnTCP(true);
49a699c4 5126
cb1523d1 5127 time_t last_stat = 0;
a2f87dd1 5128 time_t last_carbon=0, last_lua_maintenance=0;
2c78bd57 5129 time_t carbonInterval=::arg().asNum("carbon-interval");
a2f87dd1 5130 time_t luaMaintenanceInterval=::arg().asNum("lua-maintenance-interval");
ac0995bb 5131 counter.store(0); // used to periodically execute certain tasks
8657c2af
OM
5132
5133 while (!RecursorControlChannel::stop) {
ac0e821b 5134 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3ddb9247 5135
eb7fae64
OM
5136 // Use primes, it avoid not being scheduled in cases where the counter has a regular pattern.
5137 // We want to call handler thread often, it gets scheduled about 2 times per second
5138 if ((threadInfo.isHandler && counter % 11 == 0) || counter % 499 == 0) {
3427fa8a 5139 MT->makeThread(houseKeeping, 0);
f7c1d4e3
BH
5140 }
5141
d2392145 5142 if(!(counter%55)) {
d8f6d49f 5143 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
bb4bdbaf 5144 expired_t expired=t_fdm->getTimeouts(g_now);
3ddb9247 5145
f7c1d4e3 5146 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
dc593046 5147 shared_ptr<TCPConnection> conn=boost::any_cast<shared_ptr<TCPConnection> >(i->second);
4957a608 5148 if(g_logCommonErrors)
e6a9dde5 5149 g_log<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4957a608 5150 t_fdm->removeReadFD(i->first);
f7c1d4e3
BH
5151 }
5152 }
3ddb9247 5153
f7c1d4e3
BH
5154 counter++;
5155
b243ca3b 5156 if(threadInfo.isHandler) {
cb1523d1
RG
5157 if(statsWanted || (g_statisticsInterval > 0 && (g_now.tv_sec - last_stat) >= g_statisticsInterval)) {
5158 doStats();
5159 last_stat = g_now.tv_sec;
5160 }
f7c1d4e3 5161
cb1523d1 5162 Utility::gettimeofday(&g_now, 0);
2c78bd57 5163
cb1523d1
RG
5164 if((g_now.tv_sec - last_carbon) >= carbonInterval) {
5165 MT->makeThread(doCarbonDump, 0);
5166 last_carbon = g_now.tv_sec;
5167 }
2c78bd57 5168 }
2a0276a9 5169 if (t_pdl != nullptr) {
9adbe790 5170 // lua-dns-script directive is present, call the maintenance callback if needed
c1751a59
RG
5171 /* remember that the listener threads handle TCP queries */
5172 if (threadInfo.isWorker || threadInfo.isListener) {
2a0276a9
CHB
5173 // Only on threads processing queries
5174 if(g_now.tv_sec - last_lua_maintenance >= luaMaintenanceInterval) {
5175 t_pdl->maintenance();
5176 last_lua_maintenance = g_now.tv_sec;
5177 }
9adbe790 5178 }
a2f87dd1 5179 }
2c78bd57 5180
bb4bdbaf 5181 t_fdm->run(&g_now);
3ea54bf0 5182 // 'run' updates g_now for us
f7c1d4e3 5183
b243ca3b 5184 if(threadInfo.isListener) {
5c889cf5 5185 if(listenOnTCP) {
c47f201b
RG
5186 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
5187 for(const auto fd : threadInfo.tcpSockets) {
5188 t_fdm->removeReadFD(fd);
b243ca3b 5189 }
c47f201b
RG
5190 listenOnTCP=false;
5191 }
f7c1d4e3 5192 }
5c889cf5 5193 else {
c47f201b
RG
5194 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
5195 for(const auto fd : threadInfo.tcpSockets) {
5196 t_fdm->addReadFD(fd, handleNewTCPQuestion);
b243ca3b 5197 }
c47f201b
RG
5198 listenOnTCP=true;
5199 }
f7c1d4e3
BH
5200 }
5201 }
5202 }
da966ae0
OM
5203 delete rws;
5204 delete t_fdm;
8657c2af 5205 return 0;
f7c1d4e3 5206}
3f81d239 5207catch(PDNSException &ae) {
e6a9dde5 5208 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
bb4bdbaf
BH
5209 return 0;
5210}
5211catch(std::exception &e) {
e6a9dde5 5212 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
bb4bdbaf
BH
5213 return 0;
5214}
5215catch(...) {
e6a9dde5 5216 g_log<<Logger::Error<<"any other exception in main: "<<endl;
bb4bdbaf
BH
5217 return 0;
5218}
5219
51e2144e 5220
3ddb9247 5221int main(int argc, char **argv)
288f4aa9 5222{
dbd23fc2
BH
5223 g_argc = argc;
5224 g_argv = argv;
5e3de507 5225 g_stats.startupTime=time(0);
b51ef4f9 5226 Utility::srandom();
3e135495 5227 versionSetProduct(ProductRecursor);
8a63d3ce 5228 reportBasicTypes();
0007c2e5 5229 reportOtherTypes();
ea634573 5230
22030c37 5231 int ret = EXIT_SUCCESS;
caa6eefa 5232
288f4aa9 5233 try {
f888311c 5234 ::arg().set("stack-size","stack size per mthread")="200000";
2e3d8a19 5235 ::arg().set("soa-minimum-ttl","Don't change")="0";
2e3d8a19 5236 ::arg().set("no-shuffle","Don't change")="off";
2e3d8a19 5237 ::arg().set("local-port","port to listen on")="53";
32252594 5238 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
fec7dd5a 5239 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
77499b05 5240 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
a6415142 5241 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
c87e1876 5242 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
13c46e62 5243 ::arg().set("signature-inception-skew", "Allow the signature inception to be off by this number of seconds")="60";
d3f809bf 5244 ::arg().set("daemon","Operate as a daemon")="no";
191f2e47 5245 ::arg().setSwitch("write-pid","Write a PID file")="yes";
9afa8662 5246 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
b6cfa948 5247 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
b18fa400 5248 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
22e0810c 5249 ::arg().set("log-common-errors","If we should log rather common errors")="no";
2e3d8a19 5250 ::arg().set("chroot","switch to chroot jail")="";
fe9e7b8d
PL
5251 ::arg().set("setgid","If set, change group id to this gid for more security"
5252#ifdef HAVE_SYSTEMD
5253#define SYSTEMD_SETID_MSG ". When running inside systemd, use the User and Group settings in the unit-file!"
5254 SYSTEMD_SETID_MSG
5255#endif
5256 )="";
5257 ::arg().set("setuid","If set, change user id to this uid for more security"
5258#ifdef HAVE_SYSTEMD
5259 SYSTEMD_SETID_MSG
5260#endif
5261 )="";
c83ee49d 5262 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
bb4bdbaf 5263 ::arg().set("threads", "Launch this number of threads")="2";
b243ca3b 5264 ::arg().set("distributor-threads", "Launch this number of distributor threads, distributing queries to other threads")="0";
adabfcb9 5265 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
5124de27 5266 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
d07bf7ff 5267 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
479e0976 5268 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
479e0976 5269 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
d07bf7ff
PL
5270 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
5271 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
5272 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
be3e1477 5273 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
8ca656a8 5274 ::arg().set("webserver-loglevel", "Amount of logging in the webserver (none, normal, detailed)") = "normal";
cc08b5a9 5275 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
e12f8407 5276 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
2c78bd57 5277 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
f7a645ec
RG
5278 ::arg().set("carbon-namespace", "If set overwrites the first part of the carbon string")="pdns";
5279 ::arg().set("carbon-instance", "If set overwrites the the instance name default")="recursor";
5280
0ec489bf 5281 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
c038218b 5282 ::arg().set("quiet","Suppress logging of questions and answers")="";
f27e6356 5283 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
2e3d8a19 5284 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
fdbf35ac
BH
5285 ::arg().set("socket-owner","Owner of socket")="";
5286 ::arg().set("socket-group","Group of socket")="";
5287 ::arg().set("socket-mode", "Permissions for socket")="";
3ddb9247 5288
9a5b0a54
PL
5289 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+"/pdns-recursor when unset and not chrooted"
5290#ifdef HAVE_SYSTEMD
5291 + ". Set to the RUNTIME_DIRECTORY environment variable when that variable has a value (e.g. under systemd).")="";
5292 auto runtimeDir = getenv("RUNTIME_DIRECTORY");
5293 if (runtimeDir != nullptr) {
5294 ::arg().set("socket-dir") = runtimeDir;
5295 }
5296#else
5297 )="";
5298#endif
2e3d8a19
BH
5299 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
5300 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
5301 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
85c32340 5302 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
2e3d8a19 5303 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
87ff2287 5304 ::arg().set("max-concurrent-requests-per-tcp-connection", "Maximum number of requests handled concurrently per TCP connection") = "10";
324dc148 5305 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
979edd70 5306 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
559b6c93
PL
5307 ::arg().set("dont-throttle-names", "Do not throttle nameservers with this name or suffix")="";
5308 ::arg().set("dont-throttle-netmasks", "Do not throttle nameservers with this IP netmask")="";
2e3d8a19 5309 ::arg().set("hint-file", "If set, load root hints from this file")="";
b45eb27c 5310 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
a9af3782 5311 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
b9473937 5312 ::arg().set("max-cache-bogus-ttl", "maximum number of seconds to keep a Bogus (positive or negative) cached entry in memory")="3600";
c3e753c7 5313 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
1051f8a9 5314 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
927c12b0 5315 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
1051f8a9 5316 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
950626be 5317 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname, set custom or 'disabled'")="";
92011b8f 5318 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
ba1a571d 5319 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
49a699c4 5320 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
2c95fc65 5321 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
51e2144e 5322 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3ddb9247 5323 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4e120339 5324 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
fde296a3 5325 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
0d5f0a9f 5326 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4ef015cd 5327 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
5605c067 5328 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3e61e7f7 5329 ::arg().set("lua-config-file", "More powerful configuration options")="";
0273d4ab 5330 ::arg().setSwitch("allow-trust-anchor-query", "Allow queries for trustanchor.server CH TXT and negativetrustanchor.server CH TXT")="no";
644dd1da 5331
5605c067 5332 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
927c12b0
BH
5333 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
5334 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
5605c067 5335 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
ac0b4eb3 5336 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3ea54bf0 5337 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
e498dac1 5338 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
4485aa35 5339 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
a2f87dd1 5340 ::arg().set("lua-maintenance-interval", "Number of seconds between calls to the lua user defined maintenance() function")="1";
08f3f638 5341 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3ddb9247 5342 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
35695d18 5343 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
fd8898fb 5344 ::arg().set("ecs-ipv4-cache-bits", "Maximum number of bits of IPv4 mask to cache ECS response")="24";
35695d18 5345 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
fd8898fb 5346 ::arg().set("ecs-ipv6-cache-bits", "Maximum number of bits of IPv6 mask to cache ECS response")="56";
5cf4b2e7 5347 ::arg().set("ecs-minimum-ttl-override", "Set under adverse conditions, a minimum TTL for records in ECS-specific answers")="0";
ed9019c9 5348 ::arg().set("ecs-cache-limit-ttl", "Minimum TTL to cache ECS response")="0";
3f975863 5349 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
2fe3354d 5350 ::arg().set("ecs-add-for", "List of client netmasks for which EDNS Client Subnet will be added")="0.0.0.0/0, ::/0, " LOCAL_NETS_INVERSE;
8a3a3822 5351 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
a16c4536 5352 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
e498dac1 5353 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
4ca2d205 5354 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
e661a20b 5355 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
b3adda56 5356 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
00b8cadc 5357 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
54c36063
PL
5358 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1232";
5359 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1232";
aadceba8 5360 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
409b8398 5361 ::arg().set("max-qperq", "Maximum outgoing queries per query")="60";
86f95f85 5362 ::arg().set("max-ns-address-qperq", "Maximum outgoing NS address queries per query")="10";
c5950146 5363 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
7c3398aa 5364 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
78227847 5365 ::arg().set("max-udp-queries-per-round", "Maximum number of UDP queries processed per recvmsg() round, before returning back to normal processing")="10000";
c29d820c 5366 ::arg().set("protobuf-use-kernel-timestamp", "Compute the latency of queries in protobuf messages by using the timestamp set by the kernel when the query was received (when available)")="";
ee271fc4 5367 ::arg().set("distribution-pipe-buffer-size", "Size in bytes of the internal buffer of the pipe used by the distributor to pass incoming queries to a worker thread")="0";
a09a8ce0 5368
68e6df3c 5369 ::arg().set("include-dir","Include *.conf files from this directory")="";
d67620e4 5370 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
2332f42d 5371
5372 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
2e3d8a19 5373
d705aad9 5374 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
396f126e 5375 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
d705aad9 5376
72259676
RG
5377 std::string defaultBlacklistedStats = "cache-bytes, packetcache-bytes, special-memory-usage";
5378 for (size_t idx = 0; idx < 32; idx++) {
5379 defaultBlacklistedStats += ", ecs-v4-response-bits-" + std::to_string(idx + 1);
5380 }
5381 for (size_t idx = 0; idx < 128; idx++) {
5382 defaultBlacklistedStats += ", ecs-v6-response-bits-" + std::to_string(idx + 1);
5383 }
563517f3
RG
5384 ::arg().set("stats-api-blacklist", "List of statistics that are disabled when retrieving the complete list of statistics via the API")=defaultBlacklistedStats;
5385 ::arg().set("stats-carbon-blacklist", "List of statistics that are prevented from being exported via Carbon")=defaultBlacklistedStats;
5386 ::arg().set("stats-rec-control-blacklist", "List of statistics that are prevented from being exported via rec_control get-all")=defaultBlacklistedStats;
5387 ::arg().set("stats-snmp-blacklist", "List of statistics that are prevented from being exported via SNMP")=defaultBlacklistedStats;
d705aad9 5388
0735b17e 5389 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
d377bb54 5390 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
0735b17e 5391
8fd25133
RG
5392 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
5393
98d36505
RG
5394 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
5395
5cc8371b 5396 ::arg().set("xpf-allow-from","XPF information is only processed from these subnets")="";
59cb4a79 5397 ::arg().set("xpf-rr-code","XPF option code to use")="0";
5cc8371b 5398
5216ddcc
RG
5399 ::arg().set("proxy-protocol-from", "A Proxy Protocol header is only allowed from these subnets")="";
5400 ::arg().set("proxy-protocol-maximum-size", "The maximum size of a proxy protocol payload, including the TLV values")="512";
5401
ef3ee606
RG
5402 ::arg().set("dns64-prefix", "DNS64 prefix")="";
5403
58da9034 5404 ::arg().set("udp-source-port-min", "Minimum UDP port to bind on")="1024";
b47026fd
CHB
5405 ::arg().set("udp-source-port-max", "Maximum UDP port to bind on")="65535";
5406 ::arg().set("udp-source-port-avoid", "List of comma separated UDP port number to avoid")="11211";
e97cb679 5407 ::arg().set("rng", "Specify random number generator to use. Valid values are auto,sodium,openssl,getrandom,arc4random,urandom.")="auto";
d6f3fcfa 5408 ::arg().set("public-suffix-list-file", "Path to the Public Suffix List file, if any")="";
144040be 5409 ::arg().set("distribution-load-factor", "The load factor used when PowerDNS is distributing queries to worker threads")="0.0";
8949a3e0
OM
5410
5411 ::arg().setSwitch("qname-minimization", "Use Query Name Minimization")="yes";
d40a915b 5412 ::arg().setSwitch("nothing-below-nxdomain", "When an NXDOMAIN exists in cache for a name with fewer labels than the qname, send NXDOMAIN without doing a lookup (see RFC 8020)")="dnssec";
ba3d53d1 5413 ::arg().set("max-generate-steps", "Maximum number of $GENERATE steps when loading a zone from a file")="0";
fb2c9ec8 5414 ::arg().set("record-cache-shards", "Number of shards in the record cache")="1024";
d40a915b 5415
af1377b7
NC
5416#ifdef NOD_ENABLED
5417 ::arg().set("new-domain-tracking", "Track newly observed domains (i.e. never seen before).")="no";
5418 ::arg().set("new-domain-log", "Log newly observed domains.")="yes";
5419 ::arg().set("new-domain-lookup", "Perform a DNS lookup newly observed domains as a subdomain of the configured domain")="";
5420 ::arg().set("new-domain-history-dir", "Persist new domain tracking data here to persist between restarts")=string(NODCACHEDIR)+"/nod";
5421 ::arg().set("new-domain-whitelist", "List of domains (and implicitly all subdomains) which will never be considered a new domain")="";
b78727c6 5422 ::arg().set("new-domain-db-size", "Size of the DB used to track new domains in terms of number of cells. Defaults to 67108864")="67108864";
ca2526f5 5423 ::arg().set("new-domain-pb-tag", "If protobuf is configured, the tag to use for messages containing newly observed domains. Defaults to 'pdns-nod'")="pdns-nod";
41c542ec
NC
5424 ::arg().set("unique-response-tracking", "Track unique responses (tuple of query name, type and RR).")="no";
5425 ::arg().set("unique-response-log", "Log unique responses")="yes";
5426 ::arg().set("unique-response-history-dir", "Persist unique response tracking data here to persist between restarts")=string(NODCACHEDIR)+"/udr";
b78727c6 5427 ::arg().set("unique-response-db-size", "Size of the DB used to track unique responses in terms of number of cells. Defaults to 67108864")="67108864";
ca2526f5 5428 ::arg().set("unique-response-pb-tag", "If protobuf is configured, the tag to use for messages containing unique DNS responses. Defaults to 'pdns-udr'")="pdns-udr";
af1377b7 5429#endif /* NOD_ENABLED */
94e2a9b0 5430
699bc10a 5431 ::arg().setSwitch("extended-resolution-errors", "If set, send an EDNS Extended Error extension on resolution failures, like DNSSEC validation errors")="no";
94e2a9b0 5432
2e3d8a19 5433 ::arg().setCmd("help","Provide a helpful message");
ba1a571d 5434 ::arg().setCmd("version","Print version string");
d5141417 5435 ::arg().setCmd("config","Output blank configuration");
8864bdf6 5436 ::arg().setDefaults();
e6a9dde5 5437 g_log.toConsole(Logger::Info);
2e3d8a19 5438 ::arg().laxParse(argc,argv); // do a lax parse
c75a6a9e 5439
2733183f
PL
5440 if(::arg().mustDo("version")) {
5441 showProductVersion();
5442 showBuildConfiguration();
5443 exit(0);
5444 }
5445
2d733c0f
CH
5446 string configname=::arg()["config-dir"]+"/recursor.conf";
5447 if(::arg()["config-name"]!="") {
5448 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
5124de27 5449 s_programname+="-"+::arg()["config-name"];
2d733c0f
CH
5450 }
5451 cleanSlashes(configname);
5124de27 5452
5cc1ea1d 5453 if(!::arg().getCommands().empty()) {
94ea3c7b
PL
5454 cerr<<"Fatal: non-option";
5455 if (::arg().getCommands().size() > 1) {
5456 cerr<<"s";
5457 }
5458 cerr<<" (";
5459 bool first = true;
7af99dff 5460 for (const auto& c : ::arg().getCommands()) {
94ea3c7b
PL
5461 if (!first) {
5462 cerr<<", ";
5463 }
5464 first = false;
5465 cerr<<c;
5466 }
5467 cerr<<") on the command line, perhaps a '--setting=123' statement missed the '='?"<<endl;
5cc1ea1d
CH
5468 exit(99);
5469 }
5470
577cf284 5471 if(::arg().mustDo("config")) {
8864bdf6 5472 cout<<::arg().configstring(false, true);
577cf284
BH
5473 exit(0);
5474 }
5475
3ddb9247 5476 if(!::arg().file(configname.c_str()))
e6a9dde5 5477 g_log<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
c75a6a9e 5478
2e3d8a19 5479 ::arg().parse(argc,argv);
c836dc19 5480
2054afbb
CH
5481 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() ) {
5482 g_log<<Logger::Error<<"Using chroot and enabling the API is not possible"<<endl;
f0f3f0b0
PL
5483 exit(EXIT_FAILURE);
5484 }
5485
5486 if (::arg()["socket-dir"].empty()) {
5487 if (::arg()["chroot"].empty())
0524add9 5488 ::arg().set("socket-dir") = std::string(LOCALSTATEDIR) + "/pdns-recursor";
f0f3f0b0
PL
5489 else
5490 ::arg().set("socket-dir") = "/";
5491 }
5492
2e3d8a19 5493 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
562588a3 5494
b243ca3b
RG
5495 if(::arg().asNum("threads")==1) {
5496 if (::arg().mustDo("pdns-distributes-queries")) {
5497 g_log<<Logger::Warning<<"Only one thread, no need to distribute queries ourselves"<<endl;
5498 ::arg().set("pdns-distributes-queries")="no";
5499 }
5500 }
5501
5502 if(::arg().mustDo("pdns-distributes-queries") && ::arg().asNum("distributor-threads") <= 0) {
5503 g_log<<Logger::Warning<<"Asked to run with pdns-distributes-queries set but no distributor threads, raising to 1"<<endl;
5504 ::arg().set("distributor-threads")="1";
5505 }
5506
5507 if (!::arg().mustDo("pdns-distributes-queries")) {
5508 ::arg().set("distributor-threads")="0";
5509 }
61d74169 5510
2e3d8a19 5511 if(::arg().mustDo("help")) {
ff5ba4f9
WA
5512 cout<<"syntax:"<<endl<<endl;
5513 cout<<::arg().helpstring(::arg()["help"])<<endl;
5514 exit(0);
b636533b 5515 }
ccfadb6c
OM
5516 g_recCache = std::unique_ptr<MemRecursorCache>(new MemRecursorCache(::arg().asNum("record-cache-shards")));
5517 g_negCache = std::unique_ptr<NegCache>(new NegCache(::arg().asNum("record-cache-shards")));
359d6c17 5518
34162f8f 5519 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
f48d7b65 5520
34162f8f
CH
5521 if (logUrgency < Logger::Error)
5522 logUrgency = Logger::Error;
f48d7b65 5523 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
5524 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
5525 }
e6a9dde5
PL
5526 g_log.setLoglevel(logUrgency);
5527 g_log.toConsole(logUrgency);
34162f8f 5528
bff61896 5529 ret = serviceMain(argc, argv);
288f4aa9 5530 }
3f81d239 5531 catch(PDNSException &ae) {
e6a9dde5 5532 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
22030c37 5533 ret=EXIT_FAILURE;
288f4aa9 5534 }
fdbf35ac 5535 catch(std::exception &e) {
e6a9dde5 5536 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
22030c37 5537 ret=EXIT_FAILURE;
288f4aa9
BH
5538 }
5539 catch(...) {
e6a9dde5 5540 g_log<<Logger::Error<<"any other exception in main: "<<endl;
22030c37 5541 ret=EXIT_FAILURE;
288f4aa9 5542 }
3ddb9247 5543
22030c37 5544 return ret;
288f4aa9 5545}