]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/pdns_recursor.cc
Merge pull request #5543 from rgacogne/web-auto-ptr
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
CommitLineData
288f4aa9 1/*
6edbf68a
PL
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
870a0fe4
AT
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
3e61e7f7 25
76473b92
KM
26#include <netdb.h>
27#include <sys/stat.h>
28#include <unistd.h>
fa8fd4d2 29
2470b36e 30#include "ws-recursor.hh"
49a699c4 31#include <pthread.h>
3ea54bf0 32#include "recpacketcache.hh"
3ddb9247 33#include "utility.hh"
51e2144e 34#include "dns_random.hh"
d1b28475
KM
35#ifdef HAVE_LIBSODIUM
36#include <sodium.h>
37#endif
3afde9b2 38#include "opensslsigners.hh"
288f4aa9
BH
39#include <iostream>
40#include <errno.h>
81859ba5 41#include <boost/static_assert.hpp>
288f4aa9
BH
42#include <map>
43#include <set>
97bb160b 44#include "recursor_cache.hh"
38c9ceaa 45#include "cachecleaner.hh"
288f4aa9 46#include <stdio.h>
c75a6a9e 47#include <signal.h>
288f4aa9 48#include <stdlib.h>
bb4bdbaf 49#include "misc.hh"
288f4aa9
BH
50#include "mtasker.hh"
51#include <utility>
288f4aa9
BH
52#include "arguments.hh"
53#include "syncres.hh"
88def049
BH
54#include <fcntl.h>
55#include <fstream>
3e61e7f7 56#include "sortlist.hh"
57extern SortList g_sortlist;
5c633640
BH
58#include "sstuff.hh"
59#include <boost/tuple/tuple.hpp>
60#include <boost/tuple/tuple_comparison.hpp>
72df400f 61#include <boost/shared_array.hpp>
7f1fa77d 62#include <boost/function.hpp>
5605c067 63#include <boost/algorithm/string.hpp>
8f7473d7 64#ifdef MALLOC_TRACE
65#include "malloctrace.hh"
66#endif
40a3dd64 67#include <netinet/tcp.h>
ea634573
BH
68#include "dnsparser.hh"
69#include "dnswriter.hh"
70#include "dnsrecords.hh"
f814d7c8 71#include "zoneparser-tng.hh"
1d5b3ce6 72#include "rec_channel.hh"
aaacf7f2 73#include "logger.hh"
c8ddb7c2 74#include "iputils.hh"
09e6702a 75#include "mplexer.hh"
c038218b 76#include "config.h"
808c5ef7 77#include "lua-recursor4.hh"
ba1a571d 78#include "version.hh"
79332bff 79#include "responsestats.hh"
d67620e4 80#include "secpoll-recursor.hh"
c5c066bf 81#include "dnsname.hh"
644dd1da 82#include "filterpo.hh"
83#include "rpzloader.hh"
b3f0ed10 84#include "validate-recursor.hh"
f3c18728 85#include "rec-lua-conf.hh"
5c3b5e7f 86#include "ednsoptions.hh"
85c7ca75 87#include "gettime.hh"
f3c18728 88
d9d3f9c1 89#include "rec-protobuf.hh"
d705aad9 90#include "rec-snmp.hh"
aa7929a3 91
6b6720de
PL
92#ifdef HAVE_SYSTEMD
93#include <systemd/sd-daemon.h>
94#endif
95
d187038c
RG
96#include "namespaces.hh"
97
98typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
99
f26bf547
RG
100static thread_local std::shared_ptr<RecursorLua4> t_pdl;
101static thread_local unsigned int t_id;
102static thread_local std::shared_ptr<Regex> t_traceRegex;
103static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
104
105thread_local std::unique_ptr<MT_t> MT; // the big MTasker
106thread_local std::unique_ptr<MemRecursorCache> t_RC;
107thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
3337c2f7 108thread_local FDMultiplexer* t_fdm{nullptr};
f26bf547
RG
109thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes;
110thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring;
111thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
aa7929a3 112#ifdef HAVE_PROTOBUF
f26bf547 113thread_local std::unique_ptr<boost::uuids::random_generator> t_uuidGenerator;
aa7929a3 114#endif
d187038c 115__thread struct timeval g_now; // timestamp, updated (too) frequently
d7dae798
BH
116
117// for communicating with our threads
49a699c4
BH
118struct ThreadPipeSet
119{
120 int writeToThread;
121 int readToThread;
122 int writeFromThread;
123 int readFromThread;
124};
810ff705 125
d187038c
RG
126typedef vector<int> tcpListenSockets_t;
127typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
128typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
3ea54bf0 129
d187038c
RG
130static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
131static vector<ThreadPipeSet> g_pipes; // effectively readonly after startup
132static tcpListenSockets_t g_tcpListenSockets; // shared across threads, but this is fine, never written to from a thread. All threads listen on all sockets
133static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
810ff705 134static std::unordered_map<unsigned int, deferredAdd_t> deferredAdds;
d187038c
RG
135static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
136static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
137static AtomicCounter counter;
9065eb05 138static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
f26bf547 139static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
d187038c
RG
140static size_t g_tcpMaxQueriesPerConn;
141static uint64_t g_latencyStatSize;
142static uint32_t g_disthashseed;
143static unsigned int g_maxTCPPerClient;
144static unsigned int g_networkTimeoutMsec;
145static unsigned int g_maxMThreads;
146static unsigned int g_numWorkerThreads;
147static int g_tcpTimeout;
148static uint16_t g_udpTruncationThreshold;
149static std::atomic<bool> statsWanted;
150static std::atomic<bool> g_quiet;
151static bool g_logCommonErrors;
152static bool g_anyToTcp;
153static bool g_lowercaseOutgoing;
154static bool g_weDistributeQueries; // if true, only 1 thread listens on the incoming query sockets
810ff705
RG
155static bool g_reusePort{false};
156static bool g_useOneSocketPerThread;
00b8cadc 157static bool g_gettagNeedsEDNSOptions{false};
0ec489bf 158static time_t g_statisticsInterval;
9065eb05 159static bool g_useIncomingECS;
49a699c4 160
d187038c
RG
161RecursorControlChannel s_rcc; // only active in thread 0
162RecursorStats g_stats;
2d733c0f 163string s_programname="pdns_recursor";
d187038c
RG
164string s_pidfname;
165unsigned int g_numThreads;
166uint16_t g_outgoingEDNSBufsize;
c3828c03 167
12cd44ee 168#define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
169// Bad Nets taken from both:
3ddb9247 170// http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
12cd44ee 171// and
172// http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
173// where such a network may not be considered a valid destination
174#define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
175#define DONT_QUERY LOCAL_NETS ", " BAD_NETS
49a699c4 176
d7dae798 177//! used to send information to a newborn mthread
ea634573 178struct DNSComboWriter {
27c0050c 179 DNSComboWriter(const char* data, uint16_t len, const struct timeval& now) : d_mdp(true, data, len), d_now(now),
232f0877 180 d_tcp(false), d_socket(-1)
ea634573
BH
181 {}
182 MOADNSParser d_mdp;
00c9b8c1 183 void setRemote(const ComboAddress* sa)
ea634573 184 {
37d3f960 185 d_remote=*sa;
ea634573
BH
186 }
187
b71b60ee 188 void setLocal(const ComboAddress& sa)
189 {
190 d_local=sa;
191 }
192
193
ea634573
BH
194 void setSocket(int sock)
195 {
196 d_socket=sock;
197 }
a1754c6a
BH
198
199 string getRemote() const
200 {
37d3f960 201 return d_remote.toString();
a1754c6a
BH
202 }
203
c9e9e5e0 204 struct timeval d_now;
b71b60ee 205 ComboAddress d_remote, d_local;
aa7929a3
RG
206#ifdef HAVE_PROTOBUF
207 boost::uuids::uuid d_uuid;
67e31ebe 208 string d_requestorId;
aa7929a3 209#endif
b40562da
RG
210 EDNSSubnetOpts d_ednssubnet;
211 bool d_ecsFound{false};
212 bool d_ecsParsed{false};
ea634573
BH
213 bool d_tcp;
214 int d_socket;
b673817a 215 unsigned int d_tag{0};
e9f63d47 216 uint32_t d_qhash{0};
49a3500d 217 string d_query;
cd989c87 218 shared_ptr<TCPConnection> d_tcpConnection;
e8340d27 219 vector<pair<uint16_t, string> > d_ednsOpts;
02b47f43 220 std::vector<std::string> d_policyTags;
5fd2577f 221 LuaContext::LuaObject d_data;
ea634573
BH
222};
223
06857845
RG
224MT_t* getMT()
225{
226 return MT ? MT.get() : nullptr;
227}
ea634573 228
288f4aa9
BH
229ArgvMap &arg()
230{
231 static ArgvMap theArg;
232 return theArg;
233}
4ef015cd 234
b4015453
RG
235unsigned int getRecursorThreadId()
236{
237 return t_id;
238}
09e6702a 239
30ee601a
RG
240int getMTaskerTID()
241{
242 return MT->getTid();
243}
244
d187038c 245static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 246
50c81227 247// -1 is error, 0 is timeout, 1 is success
3ddb9247 248int asendtcp(const string& data, Socket* sock)
5c633640
BH
249{
250 PacketID pident;
251 pident.sock=sock;
252 pident.outMSG=data;
3ddb9247 253
bb4bdbaf 254 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
50c81227 255 string packet;
5c633640 256
5b0ddd18 257 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
23db0a09 258
9170fbaf 259 if(!ret || ret==-1) { // timeout
bb4bdbaf 260 t_fdm->removeWriteFD(sock->getHandle());
5c633640 261 }
50c81227
BH
262 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
263 return -1;
264 }
9170fbaf 265 return ret;
5c633640
BH
266}
267
d187038c 268static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 269
9170fbaf 270// -1 is error, 0 is timeout, 1 is success
a683e8bd 271int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
288f4aa9 272{
50c81227 273 data.clear();
5c633640
BH
274 PacketID pident;
275 pident.sock=sock;
276 pident.inNeeded=len;
825fa717 277 pident.inIncompleteOkay=incompleteOkay;
bb4bdbaf 278 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
5c633640 279
bb4bdbaf 280 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
9170fbaf 281 if(!ret || ret==-1) { // timeout
bb4bdbaf 282 t_fdm->removeReadFD(sock->getHandle());
288f4aa9 283 }
50c81227
BH
284 else if(data.empty()) {// error, EOF or other
285 return -1;
286 }
287
9170fbaf 288 return ret;
288f4aa9
BH
289}
290
d187038c 291static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
4465e941 292{
fba1e944 293 PacketID pident=*any_cast<PacketID>(&var);
4465e941 294 char resp[512];
a683e8bd 295 ssize_t ret=recv(fd, resp, sizeof(resp), 0);
4465e941 296 t_fdm->removeReadFD(fd);
297 if(ret >= 0) {
a683e8bd 298 string data(resp, (size_t) ret);
fba1e944 299 MT->sendEvent(pident, &data);
4465e941 300 }
301 else {
fba1e944 302 string empty;
303 MT->sendEvent(pident, &empty);
304 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
4465e941 305 }
306}
fba1e944 307string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
4465e941 308{
4465e941 309 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
310 s.setNonBlocking();
311 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
312
313 s.bind(local);
314 s.connect(dest);
4465e941 315 s.send(query);
316
317 PacketID pident;
318 pident.sock=&s;
319 pident.type=0;
fba1e944 320 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
4465e941 321
322 string data;
fba1e944 323
4465e941 324 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
fba1e944 325
4465e941 326 if(!ret || ret==-1) { // timeout
4465e941 327 t_fdm->removeReadFD(s.getHandle());
328 }
329 else if(data.empty()) {// error, EOF or other
fba1e944 330 // we could special case this
4465e941 331 return data;
332 }
4465e941 333 return data;
334}
335
d7dae798 336//! pick a random query local address
1652a63e 337ComboAddress getQueryLocalAddress(int family, uint16_t port)
5a38281c 338{
1652a63e 339 ComboAddress ret;
5a38281c 340 if(family==AF_INET) {
3ddb9247 341 if(g_localQueryAddresses4.empty())
1652a63e 342 ret = g_local4;
3ddb9247 343 else
1652a63e
BH
344 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
345 ret.sin4.sin_port = htons(port);
5a38281c
BH
346 }
347 else {
348 if(g_localQueryAddresses6.empty())
1652a63e
BH
349 ret = g_local6;
350 else
351 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
3ddb9247 352
1652a63e 353 ret.sin6.sin6_port = htons(port);
5a38281c 354 }
1652a63e 355 return ret;
5a38281c 356}
4ef015cd 357
d187038c 358static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
09e6702a 359
d187038c 360static void setSocketBuffer(int fd, int optname, uint32_t size)
d7dae798
BH
361{
362 uint32_t psize=0;
363 socklen_t len=sizeof(psize);
3ddb9247 364
d7dae798
BH
365 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
366 L<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
3ddb9247 367 return;
d7dae798
BH
368 }
369
370 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
c057bfaa 371 L<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
d7dae798
BH
372}
373
374
375static void setSocketReceiveBuffer(int fd, uint32_t size)
376{
377 setSocketBuffer(fd, SO_RCVBUF, size);
378}
379
380static void setSocketSendBuffer(int fd, uint32_t size)
381{
382 setSocketBuffer(fd, SO_SNDBUF, size);
383}
384
385
4ef015cd
BH
386// you can ask this class for a UDP socket to send a query from
387// this socket is not yours, don't even think about deleting it
388// but after you call 'returnSocket' on it, don't assume anything anymore
389class UDPClientSocks
390{
4ef015cd 391 unsigned int d_numsocks;
4ef015cd 392public:
e2642526 393 UDPClientSocks() : d_numsocks(0)
4ef015cd
BH
394 {
395 }
396
996c89cc 397 typedef set<int> socks_t;
4ef015cd
BH
398 socks_t d_socks;
399
2ee280cf 400 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
d8f6d49f 401 int getSocket(const ComboAddress& toaddr, int* fd)
4ef015cd 402 {
d8f6d49f
BH
403 *fd=makeClientSocket(toaddr.sin4.sin_family);
404 if(*fd < 0) // temporary error - receive exception otherwise
2ee280cf 405 return -2;
d8f6d49f
BH
406
407 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
408 int err = errno;
41ff43f8 409 // returnSocket(*fd);
a7b68ae7
RG
410 try {
411 closesocket(*fd);
412 }
413 catch(const PDNSException& e) {
414 L<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
415 }
416
d8f6d49f 417 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
4957a608 418 return -2;
998a4334 419 return -1;
d8f6d49f 420 }
998a4334 421
d8f6d49f 422 d_socks.insert(*fd);
998a4334 423 d_numsocks++;
d8f6d49f 424 return 0;
4ef015cd
BH
425 }
426
095c3045
BH
427 void returnSocket(int fd)
428 {
429 socks_t::iterator i=d_socks.find(fd);
34801ab1 430 if(i==d_socks.end()) {
335da0ba 431 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd)+") not in the pool");
34801ab1 432 }
bb4bdbaf 433 returnSocketLocked(i);
095c3045
BH
434 }
435
4ef015cd 436 // return a socket to the pool, or simply erase it
bb4bdbaf 437 void returnSocketLocked(socks_t::iterator& i)
4ef015cd 438 {
600fc20b 439 if(i==d_socks.end()) {
3f81d239 440 throw PDNSException("Trying to return a socket not in the pool");
600fc20b 441 }
80baf329 442 try {
bb4bdbaf 443 t_fdm->removeReadFD(*i);
80baf329
BH
444 }
445 catch(FDMultiplexerException& e) {
bb4bdbaf 446 // we sometimes return a socket that has not yet been assigned to t_fdm
80baf329 447 }
a7b68ae7
RG
448 try {
449 closesocket(*i);
450 }
451 catch(const PDNSException& e) {
452 L<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
453 }
3ddb9247 454
998a4334
BH
455 d_socks.erase(i++);
456 --d_numsocks;
4ef015cd 457 }
d8f6d49f
BH
458
459 // returns -1 for errors which might go away, throws for ones that won't
bb4bdbaf 460 static int makeClientSocket(int family)
d8f6d49f 461 {
a683e8bd 462 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
42c235e5 463
d8f6d49f
BH
464 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
465 return ret;
3ddb9247
PD
466
467 if(ret<0)
335da0ba 468 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
36855b53 469
7eb73ffa 470 // setCloseOnExec(ret); // we're not going to exec
5a38281c 471
d8f6d49f 472 int tries=10;
3aa91c3e 473 ComboAddress sin;
d8f6d49f 474 while(--tries) {
1652a63e 475 uint16_t port;
3ddb9247 476
d8f6d49f 477 if(tries==1) // fall back to kernel 'random'
4957a608 478 port = 0;
1652a63e
BH
479 else
480 port = 1025 + dns_random(64510);
5a38281c 481
3aa91c3e 482 sin=getQueryLocalAddress(family, port); // does htons for us
5a38281c 483
3ddb9247 484 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
4957a608 485 break;
d8f6d49f
BH
486 }
487 if(!tries)
3aa91c3e 488 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
3ddb9247 489
3897b9e1 490 setNonBlocking(ret);
d8f6d49f
BH
491 return ret;
492 }
49a699c4
BH
493};
494
f26bf547 495static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
4ef015cd 496
288f4aa9 497/* these two functions are used by LWRes */
34801ab1 498// -2 is OS error, -1 is error that depends on the remote, > 0 is success
a683e8bd 499int asendto(const char *data, size_t len, int flags,
3ddb9247 500 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
288f4aa9 501{
34801ab1
BH
502
503 PacketID pident;
787e5eab
BH
504 pident.domain = domain;
505 pident.remote = toaddr;
506 pident.type = qtype;
34801ab1
BH
507
508 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
509 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
510
511 for(; chain.first != chain.second; chain.first++) {
512 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
e27e91a8 513 /*
4665c31e
BH
514 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
515 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
4957a608 516 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
e27e91a8 517 */
34801ab1
BH
518 chain.first->key.chain.insert(id); // we can chain
519 *fd=-1; // gets used in waitEvent / sendEvent later on
520 return 1;
521 }
522 }
523
49a699c4 524 int ret=t_udpclientsocks->getSocket(toaddr, fd);
d8f6d49f
BH
525 if(ret < 0)
526 return ret;
34801ab1 527
998a4334
BH
528 pident.fd=*fd;
529 pident.id=id;
3ddb9247 530
bb4bdbaf
BH
531 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
532 ret = send(*fd, data, len, 0);
533
5b0ddd18 534 int tmp = errno;
bb4bdbaf 535
7302ed0a 536 if(ret < 0)
49a699c4 537 t_udpclientsocks->returnSocket(*fd);
bb4bdbaf 538
5b0ddd18 539 errno = tmp; // this is for logging purposes only
7302ed0a 540 return ret;
288f4aa9
BH
541}
542
9170fbaf 543// -1 is error, 0 is timeout, 1 is success
a683e8bd 544int arecvfrom(char *data, size_t len, int flags, const ComboAddress& fromaddr, size_t *d_len,
c5c066bf 545 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
288f4aa9 546{
0d5f0a9f 547 static optional<unsigned int> nearMissLimit;
3ddb9247 548 if(!nearMissLimit)
0d5f0a9f
BH
549 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
550
288f4aa9 551 PacketID pident;
4ef015cd 552 pident.fd=fd;
288f4aa9 553 pident.id=id;
0d5f0a9f 554 pident.domain=domain;
787e5eab 555 pident.type = qtype;
996c89cc 556 pident.remote=fromaddr;
b636533b 557
288f4aa9 558 string packet;
5b0ddd18 559 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
34801ab1 560
9170fbaf 561 if(ret > 0) {
996c89cc 562 if(packet.empty()) // means "error"
3ddb9247 563 return -1;
998a4334 564
a683e8bd 565 *d_len=packet.size();
9170fbaf 566 memcpy(data,packet.c_str(),min(len,*d_len));
0d5f0a9f 567 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
996c89cc 568 L<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
0d5f0a9f 569 g_stats.spoofCount++;
35ce8576
BH
570 return -1;
571 }
288f4aa9 572 }
09e6702a 573 else {
34801ab1 574 if(fd >= 0)
49a699c4 575 t_udpclientsocks->returnSocket(fd);
09e6702a 576 }
9170fbaf 577 return ret;
288f4aa9
BH
578}
579
88def049
BH
580static void writePid(void)
581{
191f2e47 582 if(!::arg().mustDo("write-pid"))
583 return;
18e7758c 584 ofstream of(s_pidfname.c_str(), std::ios_base::app);
88def049 585 if(of)
705f31ae 586 of<< Utility::getpid() <<endl;
88def049 587 else
c057bfaa 588 L<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
88def049
BH
589}
590
cd989c87 591TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : d_remote(addr), d_fd(fd)
3ddb9247
PD
592{
593 ++s_currentConnections;
cd989c87 594 (*t_tcpClientCounts)[d_remote]++;
0e408828 595}
cd989c87
BH
596
597TCPConnection::~TCPConnection()
0e408828 598{
a7b68ae7
RG
599 try {
600 if(closesocket(d_fd) < 0)
601 L<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
602 }
603 catch(const PDNSException& e) {
604 L<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
605 }
606
3ddb9247 607 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
cd989c87 608 t_tcpClientCounts->erase(d_remote);
1bc9e6bd 609 --s_currentConnections;
0e408828 610}
0e9d9ce2 611
3ddb9247 612AtomicCounter TCPConnection::s_currentConnections;
d187038c
RG
613
614static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
6dcd28c3 615
92011b8f 616// the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
d187038c 617static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
2cc13433 618{
92011b8f 619 if(packetsize > 1000 && t_largeanswerremotes)
620 t_largeanswerremotes->push_back(remote);
2cc13433
BH
621 switch(res) {
622 case RCode::ServFail:
92011b8f 623 if(t_servfailremotes) {
624 t_servfailremotes->push_back(remote);
5af86fdc 625 if(query && t_servfailqueryring) // packet cache
92011b8f 626 t_servfailqueryring->push_back(make_pair(*query, qtype));
627 }
2cc13433
BH
628 g_stats.servFails++;
629 break;
630 case RCode::NXDomain:
631 g_stats.nxDomains++;
632 break;
633 case RCode::NoError:
634 g_stats.noErrors++;
635 break;
636 }
637}
638
a903b39c 639static string makeLoginfo(DNSComboWriter* dc)
640try
641{
5ad5bb7d 642 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->d_remote.toString())+")";
a903b39c 643}
644catch(...)
645{
646 return "Exception making error message for exception";
647}
648
aa7929a3 649#ifdef HAVE_PROTOBUF
67e31ebe 650static void protobufLogQuery(const std::shared_ptr<RemoteLogger>& logger, uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags, const std::string& requestorId)
aa7929a3 651{
e1c8a4bb
RG
652 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
653 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
654 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
a94bc5d7 655 message.setEDNSSubnet(ednssubnet, ednssubnet.isIpv4() ? maskV4 : maskV6);
67e31ebe 656 message.setRequestorId(requestorId);
02b47f43 657
02b47f43 658 if (!policyTags.empty()) {
d9d3f9c1 659 message.setPolicyTags(policyTags);
02b47f43 660 }
aa7929a3 661
d9d3f9c1 662// cerr <<message.toDebugString()<<endl;
aa7929a3 663 std::string str;
d9d3f9c1 664 message.serialize(str);
aa7929a3 665 logger->queueData(str);
aa7929a3
RG
666}
667
d9d3f9c1 668static void protobufLogResponse(const std::shared_ptr<RemoteLogger>& logger, const RecProtoBufMessage& message)
aa7929a3 669{
d9d3f9c1 670// cerr <<message.toDebugString()<<endl;
aa7929a3 671 std::string str;
d9d3f9c1 672 message.serialize(str);
aa7929a3 673 logger->queueData(str);
aa7929a3
RG
674}
675#endif
676
53508135
PL
677/**
678 * Chases the CNAME provided by the PolicyCustom RPZ policy.
679 *
680 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
681 * @param qtype: The QType of the original query
682 * @param sr: A SyncRes
683 * @param res: An integer that will contain the RCODE of the lookup we do
684 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
685 */
d187038c 686static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
53508135
PL
687{
688 if (spoofed.d_type == QType::CNAME) {
30ee601a
RG
689 bool oldWantsRPZ = sr.getWantsRPZ();
690 sr.setWantsRPZ(false);
53508135
PL
691 vector<DNSRecord> ans;
692 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, 1, ans);
693 for (const auto& rec : ans) {
694 if(rec.d_place == DNSResourceRecord::ANSWER) {
695 ret.push_back(rec);
696 }
697 }
698 // Reset the RPZ state of the SyncRes
30ee601a 699 sr.setWantsRPZ(oldWantsRPZ);
53508135
PL
700 }
701}
702
d187038c 703static void startDoResolve(void *p)
288f4aa9 704{
7b1469bb 705 DNSComboWriter* dc=(DNSComboWriter *)p;
288f4aa9 706 try {
5af86fdc
RG
707 if (t_queryring)
708 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
92011b8f 709
32015748 710 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
7f7b8d55 711 EDNSOpts edo;
8e079f3a 712 bool haveEDNS=false;
713 if(getEDNSOpts(dc->d_mdp, &edo)) {
32015748
RG
714 if(!dc->d_tcp) {
715 /* rfc6891 6.2.3:
716 "Values lower than 512 MUST be treated as equal to 512."
717 */
718 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
719 }
e8340d27 720 dc->d_ednsOpts = edo.d_options;
8e079f3a 721 haveEDNS=true;
b40562da
RG
722
723 if (g_useIncomingECS && !dc->d_ecsParsed) {
724 for (const auto& o : edo.d_options) {
725 if (o.first == EDNSOptionCode::ECS) {
726 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
727 break;
728 }
729 }
730 }
10321a98 731 }
b40562da
RG
732 /* perhaps there was no EDNS or no ECS but by now we looked */
733 dc->d_ecsParsed = true;
e325f20c 734 vector<DNSRecord> ret;
ea634573 735 vector<uint8_t> packet;
b23b8614 736
ad42489c 737 auto luaconfsLocal = g_luaconfs.getLocal();
b8470add
PL
738 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
739 bool wantsRPZ(true);
d9d3f9c1 740 RecProtoBufMessage pbMessage(RecProtoBufMessage::Response);
aa7929a3 741#ifdef HAVE_PROTOBUF
d9d3f9c1 742 if (luaconfsLocal->protobufServer) {
e1c8a4bb
RG
743 Netmask requestorNM(dc->d_remote, dc->d_remote.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
744 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
745 pbMessage.update(dc->d_uuid, &requestor, &dc->d_local, dc->d_tcp, dc->d_mdp.d_header.id);
b40562da 746 pbMessage.setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
d9d3f9c1
RG
747 pbMessage.setQuestion(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
748 }
749#endif /* HAVE_PROTOBUF */
ad42489c 750
3ddb9247 751 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
ea634573
BH
752
753 pw.getHeader()->aa=0;
754 pw.getHeader()->ra=1;
c154c8a4 755 pw.getHeader()->qr=1;
bb4bdbaf 756 pw.getHeader()->tc=0;
ea634573 757 pw.getHeader()->id=dc->d_mdp.d_header.id;
10321a98 758 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
57769f13 759 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
ea634573 760
1059837e 761 // DO NOT MOVE THIS CODE UP - DNSPacketWriter needs to get the original-cased version
b3adda56 762 if (g_lowercaseOutgoing)
3ebc80ce 763 dc->d_mdp.d_qname = dc->d_mdp.d_qname.makeLowerCase();
b3adda56 764
904d3219
PD
765 uint32_t minTTL=std::numeric_limits<uint32_t>::max();
766
767 SyncRes sr(dc->d_now);
2e921ec6 768 bool DNSSECOK=false;
3457a2a0 769 if(t_pdl) {
f26bf547 770 sr.setLuaEngine(t_pdl);
3457a2a0 771 }
cd00142f 772 sr.d_requestor=dc->d_remote; // ECS needs this too
9eec8c98 773 if(g_dnssecmode != DNSSECMode::Off) {
30ee601a 774 sr.setDoDNSSEC(true);
9eec8c98
PL
775
776 // Does the requestor want DNSSEC records?
777 if(edo.d_Z & EDNSOpts::DNSSECOK) {
778 DNSSECOK=true;
779 g_stats.dnssecQueries++;
780 }
781 } else {
782 // Ignore the client-set CD flag
783 pw.getHeader()->cd=0;
5b9853c9 784 }
4898a348 785#ifdef HAVE_PROTOBUF
30ee601a 786 sr.setInitialRequestId(dc->d_uuid);
4898a348 787#endif
b40562da 788 if (g_useIncomingECS) {
30ee601a 789 sr.setIncomingECSFound(dc->d_ecsFound);
b40562da 790 if (dc->d_ecsFound) {
30ee601a 791 sr.setIncomingECS(dc->d_ednssubnet);
b40562da
RG
792 }
793 }
57769f13 794
904d3219
PD
795 bool tracedQuery=false; // we could consider letting Lua know about this too
796 bool variableAnswer = false;
9fc36e90 797 bool shouldNotValidate = false;
904d3219 798
ef3b6cd7
RG
799 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
800 int res = RCode::NoError;
1f1ca368 801 DNSFilterEngine::Policy appliedPolicy;
39ec5d29 802 DNSRecord spoofed;
6e505c5e
RG
803 RecursorLua4::DNSQuestion dq(dc->d_remote, dc->d_local, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ);
804 dq.ednsFlags = &edo.d_Z;
805 dq.ednsOptions = &dc->d_ednsOpts;
806 dq.tag = dc->d_tag;
807 dq.discardedPolicies = &sr.d_discardedPolicies;
808 dq.policyTags = &dc->d_policyTags;
809 dq.appliedPolicy = &appliedPolicy;
810 dq.currentRecords = &ret;
811 dq.dh = &dc->d_mdp.d_header;
05c74122 812 dq.data = dc->d_data;
67e31ebe
RG
813#ifdef HAVE_PROTOBUF
814 dq.requestorId = dc->d_requestorId;
815#endif
ba21fcfe 816
e661a20b 817 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
56b4d21b
PD
818 pw.getHeader()->tc = 1;
819 res = 0;
820 variableAnswer = true;
e661a20b
PD
821 goto sendit;
822 }
823
f26bf547 824 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
77499b05
BH
825 sr.setLogMode(SyncRes::Store);
826 tracedQuery=true;
827 }
3ddb9247 828
8f7473d7 829
976ec823 830 if(!g_quiet || tracedQuery) {
461df9d2 831 L<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
976ec823 832 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
b40562da
RG
833 if(!dc->d_ednssubnet.source.empty()) {
834 L<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
6e986f5e 835 }
976ec823 836 L<<endl;
837 }
c75a6a9e 838
fededf47 839 sr.setId(MT->getTid());
67828389 840 if(!dc->d_mdp.d_header.rd)
c836dc19
BH
841 sr.setCacheOnly();
842
f26bf547
RG
843 if (t_pdl) {
844 t_pdl->prerpz(dq, res);
0a273054
RG
845 }
846
db486de5 847 // Check if the query has a policy attached to it
0a273054 848 if (wantsRPZ) {
1f1ca368 849 appliedPolicy = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_remote, sr.d_discardedPolicies);
0a273054 850 }
644dd1da 851
54be222b 852 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
f26bf547 853 if(!t_pdl || !t_pdl->preresolve(dq, res)) {
b8470add 854
30ee601a 855 sr.setWantsRPZ(wantsRPZ);
b8470add
PL
856 if(wantsRPZ) {
857 switch(appliedPolicy.d_kind) {
858 case DNSFilterEngine::PolicyKind::NoAction:
859 break;
860 case DNSFilterEngine::PolicyKind::Drop:
861 g_stats.policyDrops++;
7a25883a 862 g_stats.policyResults[appliedPolicy.d_kind]++;
b8470add
PL
863 delete dc;
864 dc=0;
865 return;
866 case DNSFilterEngine::PolicyKind::NXDOMAIN:
867 g_stats.policyResults[appliedPolicy.d_kind]++;
868 res=RCode::NXDomain;
869 goto haveAnswer;
870 case DNSFilterEngine::PolicyKind::NODATA:
871 g_stats.policyResults[appliedPolicy.d_kind]++;
872 res=RCode::NoError;
db486de5 873 goto haveAnswer;
b8470add
PL
874 case DNSFilterEngine::PolicyKind::Custom:
875 g_stats.policyResults[appliedPolicy.d_kind]++;
876 res=RCode::NoError;
a9e029ee 877 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
b8470add 878 ret.push_back(spoofed);
53508135 879 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
b8470add
PL
880 goto haveAnswer;
881 case DNSFilterEngine::PolicyKind::Truncate:
882 if(!dc->d_tcp) {
883 g_stats.policyResults[appliedPolicy.d_kind]++;
884 res=RCode::NoError;
885 pw.getHeader()->tc=1;
886 goto haveAnswer;
887 }
888 break;
889 }
db486de5
PL
890 }
891
b8470add 892 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
44971ca0
PD
893 try {
894 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
9fc36e90 895 shouldNotValidate = sr.wasOutOfBand();
44971ca0
PD
896 }
897 catch(ImmediateServFailException &e) {
854d44e3 898 if(g_logCommonErrors)
899 L<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
44971ca0
PD
900 res = RCode::ServFail;
901 }
4485aa35 902
b8470add
PL
903 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
904 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
905 appliedPolicy = sr.d_appliedPolicy;
906 g_stats.policyResults[appliedPolicy.d_kind]++;
907 switch(appliedPolicy.d_kind) {
908 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
909 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
910 case DNSFilterEngine::PolicyKind::Drop:
911 g_stats.policyDrops++;
912 delete dc;
913 dc=0;
914 return;
915 case DNSFilterEngine::PolicyKind::NXDOMAIN:
916 ret.clear();
917 res=RCode::NXDomain;
918 goto haveAnswer;
919
920 case DNSFilterEngine::PolicyKind::NODATA:
921 ret.clear();
922 res=RCode::NoError;
923 goto haveAnswer;
924
925 case DNSFilterEngine::PolicyKind::Truncate:
926 if(!dc->d_tcp) {
927 ret.clear();
928 res=RCode::NoError;
929 pw.getHeader()->tc=1;
930 goto haveAnswer;
931 }
932 break;
933
934 case DNSFilterEngine::PolicyKind::Custom:
935 ret.clear();
936 res=RCode::NoError;
a9e029ee 937 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
b8470add 938 ret.push_back(spoofed);
53508135 939 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
b8470add
PL
940 goto haveAnswer;
941 }
942 }
943
944 if (wantsRPZ) {
1f1ca368 945 appliedPolicy = luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies);
b8470add 946 }
db486de5 947
f26bf547 948 if(t_pdl) {
db486de5
PL
949 if(res == RCode::NoError) {
950 auto i=ret.cbegin();
951 for(; i!= ret.cend(); ++i)
952 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
953 break;
f26bf547 954 if(i == ret.cend() && t_pdl->nodata(dq, res))
3ca4e735
PL
955 shouldNotValidate = true;
956
db486de5 957 }
f26bf547 958 else if(res == RCode::NXDomain && t_pdl->nxdomain(dq, res))
3ca4e735 959 shouldNotValidate = true;
db486de5 960
f26bf547 961 if(t_pdl->postresolve(dq, res))
3ca4e735 962 shouldNotValidate = true;
db486de5
PL
963 }
964
b8470add
PL
965 if (wantsRPZ) { //XXX This block is repeated, see above
966 g_stats.policyResults[appliedPolicy.d_kind]++;
967 switch(appliedPolicy.d_kind) {
968 case DNSFilterEngine::PolicyKind::NoAction:
969 break;
970 case DNSFilterEngine::PolicyKind::Drop:
971 g_stats.policyDrops++;
972 delete dc;
973 dc=0;
974 return;
975 case DNSFilterEngine::PolicyKind::NXDOMAIN:
976 ret.clear();
977 res=RCode::NXDomain;
978 goto haveAnswer;
979
980 case DNSFilterEngine::PolicyKind::NODATA:
981 ret.clear();
982 res=RCode::NoError;
983 goto haveAnswer;
984
985 case DNSFilterEngine::PolicyKind::Truncate:
986 if(!dc->d_tcp) {
987 ret.clear();
988 res=RCode::NoError;
989 pw.getHeader()->tc=1;
990 goto haveAnswer;
991 }
992 break;
993
994 case DNSFilterEngine::PolicyKind::Custom:
995 ret.clear();
996 res=RCode::NoError;
a9e029ee 997 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
b8470add 998 ret.push_back(spoofed);
53508135 999 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
b8470add
PL
1000 goto haveAnswer;
1001 }
644dd1da 1002 }
4485aa35 1003 }
644dd1da 1004 haveAnswer:;
3e8216c8 1005 if(res == PolicyDecision::DROP) {
e9c2ad3a 1006 g_stats.policyDrops++;
ae7e77ad 1007 delete dc;
1008 dc=0;
1009 return;
3ddb9247 1010 }
3e8216c8 1011 if(tracedQuery || res == PolicyDecision::PASS || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1dc8f4d0 1012 {
85ffbc53
PD
1013 string trace(sr.getTrace());
1014 if(!trace.empty()) {
1015 vector<string> lines;
1016 boost::split(lines, trace, boost::is_any_of("\n"));
1dc8f4d0 1017 for(const string& line : lines) {
85ffbc53
PD
1018 if(!line.empty())
1019 L<<Logger::Warning<< line << endl;
1020 }
1021 }
1022 }
3ddb9247 1023
b3f0ed10 1024 if(res == PolicyDecision::PASS) { // XXX what does this MEAN? Why servfail on PASS?
0fe1d080
PD
1025 pw.getHeader()->rcode=RCode::ServFail;
1026 // no commit here, because no record
1027 g_stats.servFails++;
1028 }
288f4aa9 1029 else {
ea634573 1030 pw.getHeader()->rcode=res;
92011b8f 1031
f3fe4ae6 1032 // Does the validation mode or query demand validation?
9fc36e90 1033 if(!shouldNotValidate && (g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process))) {
b25cae9a 1034 try {
f3fe4ae6 1035 if(sr.doLog()) {
5fc44cd2 1036 L<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<endl;
2e921ec6 1037 }
4d2be65d
RG
1038
1039 auto state = sr.getValidationState();
1040
b25cae9a 1041 if(state == Secure) {
2e921ec6 1042 if(sr.doLog()) {
5fc44cd2 1043 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates correctly"<<endl;
2e921ec6 1044 }
b25cae9a 1045
1046 // Is the query source interested in the value of the ad-bit?
885c8881 1047 if (dc->d_mdp.d_header.ad || DNSSECOK)
b25cae9a 1048 pw.getHeader()->ad=1;
1049 }
1050 else if(state == Insecure) {
f3fe4ae6 1051 if(sr.doLog()) {
5fc44cd2 1052 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates as Insecure"<<endl;
12ce523e 1053 }
b25cae9a 1054
1055 pw.getHeader()->ad=0;
f3fe4ae6 1056 }
b25cae9a 1057 else if(state == Bogus) {
c87e1876 1058 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
5fc44cd2 1059 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates as Bogus"<<endl;
b25cae9a 1060 }
1061
1062 // Does the query or validation mode sending out a SERVFAIL on validation errors?
885c8881 1063 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
b25cae9a 1064 if(sr.doLog()) {
5fc44cd2 1065 L<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
b25cae9a 1066 }
1067
1068 pw.getHeader()->rcode=RCode::ServFail;
1069 goto sendit;
1070 } else {
1071 if(sr.doLog()) {
5fc44cd2 1072 L<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
b25cae9a 1073 }
1074 }
1075 }
1076 }
1077 catch(ImmediateServFailException &e) {
1078 if(g_logCommonErrors)
5fc44cd2 1079 L<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
b25cae9a 1080 pw.getHeader()->rcode=RCode::ServFail;
1081 goto sendit;
f3fe4ae6 1082 }
b3f0ed10 1083 }
1084
c154c8a4 1085 if(ret.size()) {
92476c8b 1086 orderAndShuffle(ret);
ad42489c 1087 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_remote)) {
3e61e7f7 1088 sort(ret.begin(), ret.end(), *sl);
1089 variableAnswer=true;
1090 }
8e079f3a 1091 }
1092 if(haveEDNS) {
1093 ret.push_back(makeOpt(edo.d_packetsize, 0, edo.d_Z));
1094 }
0afa32d4
RG
1095
1096 bool needCommit = false;
8e079f3a 1097 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
3e80ebce
KM
1098 if( ! DNSSECOK &&
1099 ( i->d_type == QType::NSEC3 ||
1100 (
1101 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1102 (
1103 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1104 i->d_place != DNSResourceRecord::ANSWER
1105 )
1106 )
1107 )
1108 ) {
2e921ec6 1109 continue;
3e80ebce
KM
1110 }
1111
8e079f3a 1112 pw.startRecord(i->d_name, i->d_type, i->d_ttl, i->d_class, i->d_place);
1113 if(i->d_type != QType::OPT) // their TTL ain't real
1114 minTTL = min(minTTL, i->d_ttl);
1115 i->d_content->toPacket(pw);
32015748 1116 if(pw.size() > static_cast<size_t>(maxanswersize)) {
8e079f3a 1117 pw.rollback();
1118 if(i->d_place==DNSResourceRecord::ANSWER) // only truncate if we actually omitted parts of the answer
add935a2 1119 {
4957a608 1120 pw.getHeader()->tc=1;
add935a2
PD
1121 pw.truncate();
1122 }
8e079f3a 1123 goto sendit; // need to jump over pw.commit
1124 }
0afa32d4 1125 needCommit = true;
aa7929a3 1126#ifdef HAVE_PROTOBUF
d9d3f9c1
RG
1127 if(luaconfsLocal->protobufServer && (i->d_type == QType::A || i->d_type == QType::AAAA || i->d_type == QType::CNAME)) {
1128 pbMessage.addRR(*i);
aa7929a3
RG
1129 }
1130#endif
ea634573 1131 }
0afa32d4 1132 if(needCommit)
8e079f3a 1133 pw.commit();
288f4aa9 1134 }
10321a98 1135 sendit:;
b3f0ed10 1136
79332bff 1137 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
92011b8f 1138 updateResponseStats(res, dc->d_remote, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
aa7929a3 1139#ifdef HAVE_PROTOBUF
b790ef3d 1140 if (luaconfsLocal->protobufServer && (!luaconfsLocal->protobufTaggedOnly || (appliedPolicy.d_name && !appliedPolicy.d_name->empty()) || !dc->d_policyTags.empty())) {
d9d3f9c1
RG
1141 pbMessage.setBytes(packet.size());
1142 pbMessage.setResponseCode(pw.getHeader()->rcode);
0a273054
RG
1143 if (appliedPolicy.d_name) {
1144 pbMessage.setAppliedPolicy(*appliedPolicy.d_name);
1145 }
d9d3f9c1 1146 pbMessage.setPolicyTags(dc->d_policyTags);
58307a85 1147 pbMessage.setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
67e31ebe 1148 pbMessage.setRequestorId(dq.requestorId);
02b47f43 1149 protobufLogResponse(luaconfsLocal->protobufServer, pbMessage);
aa7929a3
RG
1150 }
1151#endif
ea634573 1152 if(!dc->d_tcp) {
b71b60ee 1153 struct msghdr msgh;
1154 struct iovec iov;
1155 char cbuf[256];
1156 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
2c0af54f
PD
1157 msgh.msg_control=NULL;
1158
cbc03320 1159 if(g_fromtosockets.count(dc->d_socket)) {
fbe2a2e0 1160 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
2c0af54f 1161 }
cbc03320 1162 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
1163 L<<Logger::Warning<<"Sending UDP reply to client "<<dc->d_remote.toStringWithPort()<<" failed with: "<<strerror(errno)<<endl;
3762e821 1164 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
e9f63d47 1165 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
76e2b9e3 1166 string((const char*)&*packet.begin(), packet.size()),
3ddb9247 1167 g_now.tv_sec,
76e2b9e3 1168 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
d9d3f9c1
RG
1169 min(minTTL,SyncRes::s_packetcachettl),
1170 &pbMessage);
1051f8a9 1171 }
3762e821 1172 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
feccc9fc 1173 }
9c495589
BH
1174 else {
1175 char buf[2];
ea634573
BH
1176 buf[0]=packet.size()/256;
1177 buf[1]=packet.size()%256;
feccc9fc 1178
c038218b 1179 Utility::iovec iov[2];
feccc9fc 1180
ea634573
BH
1181 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1182 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
feccc9fc 1183
dd079764 1184 int wret=Utility::writev(dc->d_socket, iov, 2);
0e9d9ce2 1185 bool hadError=true;
feccc9fc 1186
dd079764 1187 if(wret == 0)
18af64a8 1188 L<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
dd079764 1189 else if(wret < 0 )
18af64a8 1190 L<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
dd079764
RG
1191 else if((unsigned int)wret != 2 + packet.size())
1192 L<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
0e9d9ce2 1193 else
18af64a8 1194 hadError=false;
3ddb9247 1195
09e6702a 1196 // update tcp connection status, either by closing or moving to 'BYTE0'
3ddb9247 1197
09e6702a 1198 if(hadError) {
18af64a8 1199 // no need to remove us from FDM, we weren't there
c36bc97a 1200 dc->d_socket = -1;
09e6702a 1201 }
a6ae6414 1202 else {
fde296a3
RG
1203 dc->d_tcpConnection->queriesCount++;
1204 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1205 dc->d_socket = -1;
1206 }
1207 else {
1208 dc->d_tcpConnection->state=TCPConnection::BYTE0;
1209 Utility::gettimeofday(&g_now, 0); // needs to be updated
1210 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection);
1211 t_fdm->setReadTTD(dc->d_socket, g_now, g_tcpTimeout);
1212 }
0e9d9ce2 1213 }
9c495589 1214 }
3ddb9247 1215
1d5b3ce6 1216 if(!g_quiet) {
461df9d2 1217 L<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
ea634573 1218 L<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
9de3e034 1219 sr.d_totUsec/1000.0<<" ms, "<<
1220 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<<res<<endl;
c75a6a9e 1221 }
b23b8614 1222
3ddb9247 1223 sr.d_outqueries ? t_RC->cacheMisses++ : t_RC->cacheHits++;
30ee601a 1224 float spent=makeFloat(sr.getNow()-dc->d_now);
fe213470
BH
1225 if(spent < 0.001)
1226 g_stats.answers0_1++;
1227 else if(spent < 0.010)
1228 g_stats.answers1_10++;
1229 else if(spent < 0.1)
1230 g_stats.answers10_100++;
1231 else if(spent < 1.0)
1232 g_stats.answers100_1000++;
1233 else
1234 g_stats.answersSlow++;
1235
574af7ea 1236 uint64_t newLat=(uint64_t)(spent*1000000);
b841314c 1237 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
08f3f638 1238 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
0a6b1027 1239 // no worries, we do this for packet cache hits elsewhere
c6d04bdc 1240 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
ea634573 1241 delete dc;
c36bc97a 1242 dc=0;
288f4aa9 1243 }
3f81d239 1244 catch(PDNSException &ae) {
a903b39c 1245 L<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
c36bc97a 1246 delete dc;
288f4aa9 1247 }
7b1469bb 1248 catch(MOADNSException& e) {
a903b39c 1249 L<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<e.what()<<endl;
c36bc97a 1250 delete dc;
7b1469bb 1251 }
fdbf35ac 1252 catch(std::exception& e) {
068c7634
PD
1253 L<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
1254
1255 // Luawrapper nests the exception from Lua, so we unnest it here
1256 try {
1257 std::rethrow_if_nested(e);
1258 } catch(const std::exception& e) {
1259 L<<". Extra info: "<<e.what();
1260 } catch(...) {}
1261
1262 L<<endl;
c36bc97a 1263 delete dc;
c154c8a4 1264 }
288f4aa9 1265 catch(...) {
a903b39c 1266 L<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
288f4aa9 1267 }
3ddb9247 1268
ec6eacbc 1269 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
288f4aa9
BH
1270}
1271
d187038c 1272static void makeControlChannelSocket(int processNum=-1)
1d5b3ce6 1273{
2d733c0f 1274 string sockname=::arg()["socket-dir"]+"/"+s_programname;
677e2a46 1275 if(processNum >= 0)
335da0ba 1276 sockname += "."+std::to_string(processNum);
677e2a46 1277 sockname+=".controlsocket";
41f7a068 1278 s_rcc.listen(sockname);
3ddb9247 1279
387de317
BH
1280 int sockowner = -1;
1281 int sockgroup = -1;
1282
1283 if (!::arg().isEmpty("socket-group"))
1284 sockgroup=::arg().asGid("socket-group");
1285 if (!::arg().isEmpty("socket-owner"))
1286 sockowner=::arg().asUid("socket-owner");
3ddb9247 1287
f838ad8d
BH
1288 if (sockgroup > -1 || sockowner > -1) {
1289 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1290 unixDie("Failed to chown control socket");
1291 }
1292 }
387de317
BH
1293
1294 // do mode change if socket-mode is given
1295 if(!::arg().isEmpty("socket-mode")) {
1296 mode_t sockmode=::arg().asMode("socket-mode");
34c513f9
RG
1297 if(chmod(sockname.c_str(), sockmode) < 0) {
1298 unixDie("Failed to chmod control socket");
1299 }
387de317 1300 }
1d5b3ce6
BH
1301}
1302
00b8cadc 1303static bool getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass, EDNSSubnetOpts* ednssubnet, std::map<uint16_t, EDNSOptionView>* options)
02b47f43 1304{
b40562da 1305 bool found = false;
02b47f43
RG
1306 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
1307 size_t questionLen = question.length();
1308 unsigned int consumed=0;
1309 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1310
1311 size_t pos= sizeof(dnsheader)+consumed+4;
1312 /* at least OPT root label (1), type (2), class (2) and ttl (4) + OPT RR rdlen (2)
1313 = 11 */
1314 if(ntohs(dh->arcount) == 1 && questionLen > pos + 11) { // this code can extract one (1) EDNS Subnet option
1315 /* OPT root label (1) followed by type (2) */
1316 if(question.at(pos)==0 && question.at(pos+1)==0 && question.at(pos+2)==QType::OPT) {
00b8cadc
RG
1317 if (!options) {
1318 char* ecsStart = nullptr;
1319 size_t ecsLen = 0;
1320 int res = getEDNSOption((char*)question.c_str()+pos+9, questionLen - pos - 9, EDNSOptionCode::ECS, &ecsStart, &ecsLen);
1321 if (res == 0 && ecsLen > 4) {
1322 EDNSSubnetOpts eso;
1323 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1324 *ednssubnet=eso;
1325 found = true;
1326 }
1327 }
1328 }
1329 else {
1330 int res = getEDNSOptions((char*)question.c_str()+pos+9, questionLen - pos - 9, *options);
1331 if (res == 0) {
1332 const auto& it = options->find(EDNSOptionCode::ECS);
1333 if (it != options->end() && it->second.content != nullptr && it->second.size > 0) {
1334 EDNSSubnetOpts eso;
1335 if(getEDNSSubnetOptsFromString(it->second.content, it->second.size, &eso)) {
1336 *ednssubnet=eso;
1337 found = true;
1338 }
1339 }
02b47f43
RG
1340 }
1341 }
1342 }
1343 }
b40562da 1344 return found;
02b47f43
RG
1345}
1346
d187038c 1347static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1348{
cd989c87 1349 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
c038218b 1350
879b3f70 1351 if(conn->state==TCPConnection::BYTE0) {
b841314c 1352 ssize_t bytes=recv(conn->getFD(), conn->data, 2, 0);
09e6702a 1353 if(bytes==1)
667f7e60 1354 conn->state=TCPConnection::BYTE1;
3ddb9247 1355 if(bytes==2) {
a0aa4f64 1356 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
667f7e60
BH
1357 conn->bytesread=0;
1358 conn->state=TCPConnection::GETQUESTION;
09e6702a
BH
1359 }
1360 if(!bytes || bytes < 0) {
bb4bdbaf 1361 t_fdm->removeReadFD(fd);
09e6702a
BH
1362 return;
1363 }
1364 }
667f7e60 1365 else if(conn->state==TCPConnection::BYTE1) {
b841314c 1366 ssize_t bytes=recv(conn->getFD(), conn->data+1, 1, 0);
09e6702a 1367 if(bytes==1) {
667f7e60 1368 conn->state=TCPConnection::GETQUESTION;
a0aa4f64 1369 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
667f7e60 1370 conn->bytesread=0;
09e6702a
BH
1371 }
1372 if(!bytes || bytes < 0) {
1373 if(g_logCommonErrors)
cd989c87 1374 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected after first byte"<<endl;
bb4bdbaf 1375 t_fdm->removeReadFD(fd);
09e6702a
BH
1376 return;
1377 }
1378 }
667f7e60 1379 else if(conn->state==TCPConnection::GETQUESTION) {
b841314c 1380 ssize_t bytes=recv(conn->getFD(), conn->data + conn->bytesread, conn->qlen - conn->bytesread, 0);
f9d67b41 1381 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
cd989c87 1382 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected while reading question body"<<endl;
bb4bdbaf 1383 t_fdm->removeReadFD(fd);
09e6702a
BH
1384 return;
1385 }
b841314c 1386 conn->bytesread+=(uint16_t)bytes;
667f7e60 1387 if(conn->bytesread==conn->qlen) {
bb4bdbaf 1388 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
879b3f70 1389
f26bf547 1390 DNSComboWriter* dc=nullptr;
09e6702a 1391 try {
cd989c87 1392 dc=new DNSComboWriter(conn->data, conn->qlen, g_now);
09e6702a
BH
1393 }
1394 catch(MOADNSException &mde) {
3ddb9247 1395 g_stats.clientParseError++;
4957a608 1396 if(g_logCommonErrors)
cd989c87 1397 L<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toString() <<endl;
4957a608 1398 return;
09e6702a 1399 }
cd989c87
BH
1400 dc->d_tcpConnection = conn; // carry the torch
1401 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
09e6702a 1402 dc->d_tcp=true;
cd989c87 1403 dc->setRemote(&conn->d_remote);
a6147cd2 1404 ComboAddress dest;
1405 memset(&dest, 0, sizeof(dest));
1406 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1407 socklen_t len = dest.getSocklen();
1408 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1409 dc->setLocal(dest);
33dcceba
RG
1410 DNSName qname;
1411 uint16_t qtype=0;
1412 uint16_t qclass=0;
1413 bool needECS = false;
67e31ebe 1414 string requestorId;
aa7929a3 1415#ifdef HAVE_PROTOBUF
02b47f43 1416 auto luaconfsLocal = g_luaconfs.getLocal();
33dcceba
RG
1417 if (luaconfsLocal->protobufServer) {
1418 needECS = true;
1419 }
1420#endif
1421
f26bf547 1422 if(needECS || (t_pdl && t_pdl->d_gettag)) {
33dcceba
RG
1423
1424 try {
00b8cadc 1425 std::map<uint16_t, EDNSOptionView> ednsOptions;
b40562da 1426 dc->d_ecsParsed = true;
00b8cadc 1427 dc->d_ecsFound = getQNameAndSubnet(std::string(conn->data, conn->qlen), &qname, &qtype, &qclass, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr);
02b47f43 1428
f26bf547 1429 if(t_pdl && t_pdl->d_gettag) {
33dcceba 1430 try {
a712cb56 1431 dc->d_tag = t_pdl->gettag(conn->d_remote, dc->d_ednssubnet.source, dest, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId);
33dcceba
RG
1432 }
1433 catch(std::exception& e) {
1434 if(g_logCommonErrors)
1435 L<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
1436 }
1437 }
1438 }
1439 catch(std::exception& e)
1440 {
1441 if(g_logCommonErrors)
1442 L<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
1443 }
1444 }
1445#ifdef HAVE_PROTOBUF
4898a348 1446 if(luaconfsLocal->protobufServer || luaconfsLocal->outgoingProtobufServer) {
67e31ebe 1447 dc->d_requestorId = requestorId;
02b47f43 1448 dc->d_uuid = (*t_uuidGenerator)();
4898a348 1449 }
02b47f43 1450
4898a348 1451 if(luaconfsLocal->protobufServer) {
02b47f43 1452 try {
02b47f43 1453 const struct dnsheader* dh = (const struct dnsheader*) conn->data;
02b47f43 1454
b790ef3d 1455 if (!luaconfsLocal->protobufTaggedOnly) {
67e31ebe 1456 protobufLogQuery(luaconfsLocal->protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, conn->d_remote, dest, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId);
b790ef3d 1457 }
02b47f43
RG
1458 }
1459 catch(std::exception& e) {
1460 if(g_logCommonErrors)
1461 L<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
1462 }
1463 }
aa7929a3 1464#endif
879b3f70 1465 if(dc->d_mdp.d_header.qr) {
4957a608 1466 delete dc;
048f5db6 1467 g_stats.ignoredCount++;
4328f463 1468 L<<Logger::Error<<"Ignoring answer from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
4957a608 1469 return;
879b3f70 1470 }
3abcdab2
PD
1471 if(dc->d_mdp.d_header.opcode) {
1472 delete dc;
048f5db6 1473 g_stats.ignoredCount++;
4328f463 1474 L<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
3abcdab2
PD
1475 return;
1476 }
09e6702a 1477 else {
4957a608
BH
1478 ++g_stats.qcounter;
1479 ++g_stats.tcpqcounter;
50a5ef72 1480 MT->makeThread(startDoResolve, dc); // deletes dc, will set state to BYTE0 again
4957a608 1481 return;
09e6702a
BH
1482 }
1483 }
1484 }
1485}
1486
6dcd28c3 1487//! Handle new incoming TCP connection
d187038c 1488static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
09e6702a 1489{
37d3f960 1490 ComboAddress addr;
09e6702a 1491 socklen_t addrlen=sizeof(addr);
a683e8bd 1492 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
b841314c 1493 if(newsock>=0) {
85c32340
BH
1494 if(MT->numProcesses() > g_maxMThreads) {
1495 g_stats.overCapacityDrops++;
a7b68ae7
RG
1496 try {
1497 closesocket(newsock);
1498 }
1499 catch(const PDNSException& e) {
1500 L<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
1501 }
85c32340
BH
1502 return;
1503 }
1504
92011b8f 1505 if(t_remotes)
1506 t_remotes->push_back(addr);
49a699c4 1507 if(t_allowFrom && !t_allowFrom->match(&addr)) {
3ddb9247 1508 if(!g_quiet)
4957a608 1509 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2914b022 1510
09e6702a 1511 g_stats.unauthorizedTCP++;
a7b68ae7
RG
1512 try {
1513 closesocket(newsock);
1514 }
1515 catch(const PDNSException& e) {
1516 L<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
1517 }
09e6702a
BH
1518 return;
1519 }
bd0289fc 1520 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
09e6702a 1521 g_stats.tcpClientOverflow++;
a7b68ae7
RG
1522 try {
1523 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
1524 }
1525 catch(const PDNSException& e) {
1526 L<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
1527 }
09e6702a
BH
1528 return;
1529 }
3ddb9247 1530
3897b9e1 1531 setNonBlocking(newsock);
f26bf547 1532 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
cd989c87 1533 tc->state=TCPConnection::BYTE0;
3ddb9247 1534
cd989c87 1535 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc);
c038218b 1536
0bff046b 1537 struct timeval now;
c038218b 1538 Utility::gettimeofday(&now, 0);
cd989c87 1539 t_fdm->setReadTTD(tc->getFD(), now, g_tcpTimeout);
09e6702a
BH
1540 }
1541}
3ddb9247 1542
d187038c 1543static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
1bc3c142 1544{
183eb877 1545 gettimeofday(&g_now, 0);
b71b60ee 1546 struct timeval diff = g_now - tv;
1547 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
183eb877 1548
22cf1fda 1549 if(tv.tv_sec && delta > 1000.0) {
b71b60ee 1550 g_stats.tooOldDrops++;
1551 return 0;
1552 }
1553
1bc3c142 1554 ++g_stats.qcounter;
d7f10541
BH
1555 if(fromaddr.sin4.sin_family==AF_INET6)
1556 g_stats.ipv6qcounter++;
1bc3c142
BH
1557
1558 string response;
93f0da94 1559 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
49a3500d 1560 unsigned int ctag=0;
f57486f1 1561 uint32_t qhash = 0;
12aff2e5 1562 bool needECS = false;
02b47f43 1563 std::vector<std::string> policyTags;
5fd2577f 1564 LuaContext::LuaObject data;
67e31ebe 1565 string requestorId;
12aff2e5 1566#ifdef HAVE_PROTOBUF
02b47f43 1567 boost::uuids::uuid uniqueId;
02b47f43
RG
1568 auto luaconfsLocal = g_luaconfs.getLocal();
1569 if (luaconfsLocal->protobufServer) {
4898a348 1570 uniqueId = (*t_uuidGenerator)();
02b47f43 1571 needECS = true;
4898a348 1572 } else if (luaconfsLocal->outgoingProtobufServer) {
02b47f43
RG
1573 uniqueId = (*t_uuidGenerator)();
1574 }
12aff2e5 1575#endif
b40562da
RG
1576 EDNSSubnetOpts ednssubnet;
1577 bool ecsFound = false;
1578 bool ecsParsed = false;
1bc3c142 1579 try {
02b47f43
RG
1580 DNSName qname;
1581 uint16_t qtype=0;
1582 uint16_t qclass=0;
1bc3c142 1583 uint32_t age;
c15ff3df 1584 bool qnameParsed=false;
8f7473d7 1585#ifdef MALLOC_TRACE
1586 /*
1587 static uint64_t last=0;
1588 if(!last)
1589 g_mtracer->clearAllocators();
1590 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
1591 last=g_mtracer->getAllocs();
1592 cout<<g_mtracer->topAllocatorsString()<<endl;
1593 g_mtracer->clearAllocators();
1594 */
1595#endif
55a1378f 1596
f26bf547 1597 if(needECS || (t_pdl && t_pdl->d_gettag)) {
b2eacd67 1598 try {
00b8cadc
RG
1599 std::map<uint16_t, EDNSOptionView> ednsOptions;
1600 ecsFound = getQNameAndSubnet(question, &qname, &qtype, &qclass, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr);
c15ff3df
RG
1601 qnameParsed = true;
1602 ecsParsed = true;
12aff2e5 1603
f26bf547 1604 if(t_pdl && t_pdl->d_gettag) {
12aff2e5 1605 try {
f26bf547 1606 ctag=t_pdl->gettag(fromaddr, ednssubnet.source, destaddr, qname, qtype, &policyTags, data, ednsOptions, false, requestorId);
12aff2e5
RG
1607 }
1608 catch(std::exception& e) {
1609 if(g_logCommonErrors)
1610 L<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
1611 }
8ea8c302 1612 }
b2eacd67 1613 }
1614 catch(std::exception& e)
1615 {
1616 if(g_logCommonErrors)
1617 L<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 1618 }
12ce523e 1619 }
3ddb9247 1620
02b47f43 1621 bool cacheHit = false;
d9d3f9c1 1622 RecProtoBufMessage pbMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
02b47f43
RG
1623#ifdef HAVE_PROTOBUF
1624 if(luaconfsLocal->protobufServer) {
b790ef3d 1625 if (!luaconfsLocal->protobufTaggedOnly || !policyTags.empty()) {
67e31ebe 1626 protobufLogQuery(luaconfsLocal->protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, fromaddr, destaddr, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId);
b790ef3d 1627 }
d9d3f9c1
RG
1628 }
1629#endif /* HAVE_PROTOBUF */
02b47f43 1630
c15ff3df
RG
1631 if (qnameParsed) {
1632 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &qhash, &pbMessage));
1633 }
1634 else {
1635 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, g_now.tv_sec, &response, &age, &qhash, &pbMessage));
1636 }
1637
d9d3f9c1
RG
1638 if (cacheHit) {
1639#ifdef HAVE_PROTOBUF
b790ef3d 1640 if(luaconfsLocal->protobufServer && (!luaconfsLocal->protobufTaggedOnly || !pbMessage.getAppliedPolicy().empty() || !pbMessage.getPolicyTags().empty())) {
e1c8a4bb
RG
1641 Netmask requestorNM(fromaddr, fromaddr.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1642 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
1643 pbMessage.update(uniqueId, &requestor, &destaddr, false, dh->id);
b40562da 1644 pbMessage.setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
58307a85 1645 pbMessage.setQueryTime(g_now.tv_sec, g_now.tv_usec);
67e31ebe 1646 pbMessage.setRequestorId(requestorId);
02b47f43
RG
1647 protobufLogResponse(luaconfsLocal->protobufServer, pbMessage);
1648 }
d9d3f9c1 1649#endif /* HAVE_PROTOBUF */
49a3500d 1650 if(!g_quiet)
1651 L<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<fromaddr.toString()<<endl;
8f7473d7 1652
1bc3c142
BH
1653 g_stats.packetCacheHits++;
1654 SyncRes::s_queries++;
1655 ageDNSPacket(response, age);
b71b60ee 1656 struct msghdr msgh;
1657 struct iovec iov;
1658 char cbuf[256];
1659 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
2c0af54f
PD
1660 msgh.msg_control=NULL;
1661
cbc03320 1662 if(g_fromtosockets.count(fd)) {
fbe2a2e0 1663 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
b71b60ee 1664 }
cbc03320 1665 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
1666 L<<Logger::Warning<<"Sending UDP reply to client "<<fromaddr.toStringWithPort()<<" failed with: "<<strerror(errno)<<endl;
b71b60ee 1667
97bee66d 1668 if(response.length() >= sizeof(struct dnsheader)) {
dd079764
RG
1669 struct dnsheader tmpdh;
1670 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
1671 updateResponseStats(tmpdh.rcode, fromaddr, response.length(), 0, 0);
97bee66d 1672 }
08f3f638 1673 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
1bc3c142
BH
1674 return 0;
1675 }
3ddb9247 1676 }
1bc3c142
BH
1677 catch(std::exception& e) {
1678 L<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1679 return 0;
1680 }
3ddb9247 1681
f26bf547
RG
1682 if(t_pdl) {
1683 if(t_pdl->ipfilter(fromaddr, destaddr, *dh)) {
4ea94941 1684 if(!g_quiet)
1685 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<" based on policy"<<endl;
1686 g_stats.policyDrops++;
1687 return 0;
1688 }
1689 }
1690
1bc3c142 1691 if(MT->numProcesses() > g_maxMThreads) {
461df9d2 1692 if(!g_quiet)
854d44e3 1693 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<", over capacity"<<endl;
461df9d2 1694
1bc3c142
BH
1695 g_stats.overCapacityDrops++;
1696 return 0;
1697 }
3ddb9247 1698
1bc3c142
BH
1699 DNSComboWriter* dc = new DNSComboWriter(question.c_str(), question.size(), g_now);
1700 dc->setSocket(fd);
49a3500d 1701 dc->d_tag=ctag;
e9f63d47 1702 dc->d_qhash=qhash;
49a3500d 1703 dc->d_query = question;
1bc3c142 1704 dc->setRemote(&fromaddr);
b71b60ee 1705 dc->setLocal(destaddr);
1bc3c142 1706 dc->d_tcp=false;
02b47f43 1707 dc->d_policyTags = policyTags;
05c74122 1708 dc->d_data = data;
b40562da
RG
1709 dc->d_ecsFound = ecsFound;
1710 dc->d_ecsParsed = ecsParsed;
1711 dc->d_ednssubnet = ednssubnet;
aa7929a3 1712#ifdef HAVE_PROTOBUF
4898a348 1713 if (luaconfsLocal->protobufServer || luaconfsLocal->outgoingProtobufServer) {
d9d3f9c1
RG
1714 dc->d_uuid = uniqueId;
1715 }
67e31ebe 1716 dc->d_requestorId = requestorId;
aa7929a3
RG
1717#endif
1718
1bc3c142
BH
1719 MT->makeThread(startDoResolve, (void*) dc); // deletes dc
1720 return 0;
3ddb9247
PD
1721}
1722
b71b60ee 1723
d187038c 1724static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
5db529f8 1725{
a683e8bd 1726 ssize_t len;
5db529f8
BH
1727 char data[1500];
1728 ComboAddress fromaddr;
b71b60ee 1729 struct msghdr msgh;
1730 struct iovec iov;
1731 char cbuf[256];
390f1dab 1732 bool firstQuery = true;
b71b60ee 1733
1734 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
1735 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), data, sizeof(data), &fromaddr);
1736
3ddb9247 1737 for(;;)
b71b60ee 1738 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
390f1dab
RG
1739
1740 firstQuery = false;
1741
92011b8f 1742 if(t_remotes)
1743 t_remotes->push_back(fromaddr);
b23b8614 1744
49a699c4 1745 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
3ddb9247 1746 if(!g_quiet)
4957a608 1747 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2914b022 1748
5db529f8 1749 g_stats.unauthorizedUDP++;
a9af3782 1750 return;
5db529f8 1751 }
15c01deb 1752 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
81859ba5 1753 if(!fromaddr.sin4.sin_port) { // also works for IPv6
3ddb9247 1754 if(!g_quiet)
81859ba5 1755 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
1756
1757 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
1758 return;
1759 }
5db529f8 1760 try {
b23b8614 1761 dnsheader* dh=(dnsheader*)data;
3ddb9247 1762
b23b8614 1763 if(dh->qr) {
048f5db6 1764 g_stats.ignoredCount++;
4957a608
BH
1765 if(g_logCommonErrors)
1766 L<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
5db529f8 1767 }
3abcdab2 1768 else if(dh->opcode) {
048f5db6 1769 g_stats.ignoredCount++;
3abcdab2
PD
1770 if(g_logCommonErrors)
1771 L<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
1772 }
5db529f8 1773 else {
a683e8bd 1774 string question(data, (size_t)len);
b71b60ee 1775 struct timeval tv={0,0};
1776 HarvestTimestamp(&msgh, &tv);
1777 ComboAddress dest;
c3cecd36 1778 memset(&dest, 0, sizeof(dest)); // this makes sure we ignore this address if not returned by recvmsg above
a6147cd2 1779 auto loc = rplookup(g_listenSocketsAddresses, fd);
1780 if(HarvestDestinationAddress(&msgh, &dest)) {
1781 // but.. need to get port too
1782 if(loc)
1783 dest.sin4.sin_port = loc->sin4.sin_port;
1784 }
1785 else {
1786 if(loc) {
1787 dest = *loc;
1788 }
1789 else {
1790 dest.sin4.sin_family = fromaddr.sin4.sin_family;
a683e8bd
RG
1791 socklen_t slen = dest.getSocklen();
1792 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
a6147cd2 1793 }
1794 }
232f0877 1795 if(g_weDistributeQueries)
b71b60ee 1796 distributeAsyncFunction(question, boost::bind(doProcessUDPQuestion, question, fromaddr, dest, tv, fd));
232f0877 1797 else
b71b60ee 1798 doProcessUDPQuestion(question, fromaddr, dest, tv, fd);
5db529f8
BH
1799 }
1800 }
1801 catch(MOADNSException& mde) {
3ddb9247 1802 g_stats.clientParseError++;
84e66a59 1803 if(g_logCommonErrors)
4957a608 1804 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
5db529f8 1805 }
0b602819
KM
1806 catch(std::runtime_error& e) {
1807 g_stats.clientParseError++;
1808 if(g_logCommonErrors)
1809 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
5db529f8
BH
1810 }
1811 }
ac0e821b
BH
1812 else {
1813 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
390f1dab 1814 if(firstQuery && errno == EAGAIN)
9326cae1 1815 g_stats.noPacketError++;
390f1dab 1816
bf3b0cec 1817 break;
ac0e821b 1818 }
5db529f8
BH
1819}
1820
810ff705 1821static void makeTCPServerSockets(unsigned int threadId)
9c495589 1822{
37d3f960 1823 int fd;
f28307ad 1824 vector<string>locals;
2e3d8a19 1825 stringtok(locals,::arg()["local-address"]," ,");
9c495589 1826
f28307ad 1827 if(locals.empty())
3f81d239 1828 throw PDNSException("No local address specified");
3ddb9247 1829
f28307ad 1830 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
1831 ServiceTuple st;
1832 st.port=::arg().asNum("local-port");
1833 parseService(*i, st);
3ddb9247 1834
32252594
BH
1835 ComboAddress sin;
1836
f28307ad 1837 memset((char *)&sin,0, sizeof(sin));
37d3f960 1838 sin.sin4.sin_family = AF_INET;
32252594 1839 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 1840 sin.sin6.sin6_family = AF_INET6;
f71bc087 1841 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 1842 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
37d3f960
BH
1843 }
1844
1845 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
3ddb9247 1846 if(fd<0)
3f81d239 1847 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
f28307ad 1848
3897b9e1 1849 setCloseOnExec(fd);
a903b39c 1850
f28307ad 1851 int tmp=1;
810ff705 1852 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
f28307ad 1853 L<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
c8ddb7c2 1854 exit(1);
f28307ad 1855 }
0dfa94ab 1856 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
1857 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
1858 }
1859
c8ddb7c2 1860#ifdef TCP_DEFER_ACCEPT
810ff705 1861 if(setsockopt(fd, SOL_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
37d3f960 1862 if(i==locals.begin())
4957a608 1863 L<<Logger::Error<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
c8ddb7c2
BH
1864 }
1865#endif
1866
fec7dd5a
SS
1867 if( ::arg().mustDo("non-local-bind") )
1868 Utility::setBindAny(AF_INET, fd);
1869
2332f42d 1870#ifdef SO_REUSEPORT
810ff705
RG
1871 if(g_reusePort) {
1872 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
2332f42d 1873 throw PDNSException("SO_REUSEPORT: "+stringerror());
1874 }
1875#endif
1876
0735b17e
RG
1877 if (::arg().asNum("tcp-fast-open") > 0) {
1878#ifdef TCP_FASTOPEN
1879 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
1880 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
1881 L<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno)<<endl;
1882 }
1883#else
1884 L<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
1885#endif
1886 }
1887
32252594 1888 sin.sin4.sin_port = htons(st.port);
a683e8bd 1889 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
3ddb9247 1890 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
3f81d239 1891 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
3ddb9247 1892
3897b9e1 1893 setNonBlocking(fd);
49a699c4 1894 setSocketSendBuffer(fd, 65000);
37d3f960 1895 listen(fd, 128);
810ff705 1896 deferredAdds[threadId].push_back(make_pair(fd, handleNewTCPQuestion));
c2136bf0 1897 g_tcpListenSockets.push_back(fd);
84433b79 1898 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
1899 // - fd is not that which we know here, but returned from accept()
3ddb9247 1900 if(sin.sin4.sin_family == AF_INET)
32252594 1901 L<<Logger::Error<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 1902 else
32252594 1903 L<<Logger::Error<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 1904 }
9c495589
BH
1905}
1906
810ff705 1907static void makeUDPServerSockets(unsigned int threadId)
288f4aa9 1908{
fec7dd5a 1909 int one=1;
f28307ad 1910 vector<string>locals;
2e3d8a19 1911 stringtok(locals,::arg()["local-address"]," ,");
288f4aa9 1912
f28307ad 1913 if(locals.empty())
3f81d239 1914 throw PDNSException("No local address specified");
3ddb9247 1915
f28307ad 1916 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
1917 ServiceTuple st;
1918 st.port=::arg().asNum("local-port");
1919 parseService(*i, st);
1920
37d3f960 1921 ComboAddress sin;
996c89cc 1922
37d3f960
BH
1923 memset(&sin, 0, sizeof(sin));
1924 sin.sin4.sin_family = AF_INET;
32252594 1925 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 1926 sin.sin6.sin6_family = AF_INET6;
f71bc087 1927 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 1928 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
37d3f960 1929 }
3ddb9247 1930
bb4bdbaf 1931 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
d3b4137e 1932 if(fd < 0) {
3f81d239 1933 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
d3b4137e 1934 }
915b0c39
AT
1935 if (!setSocketTimestamps(fd))
1936 L<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
0dfa94ab 1937
b71b60ee 1938 if(IsAnyAddress(sin)) {
cbc03320 1939 if(sin.sin4.sin_family == AF_INET)
1940 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
1941 g_fromtosockets.insert(fd);
757d3179 1942#ifdef IPV6_RECVPKTINFO
cbc03320 1943 if(sin.sin4.sin_family == AF_INET6)
1944 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
1945 g_fromtosockets.insert(fd);
757d3179 1946#endif
0dfa94ab 1947 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
1948 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
1949 }
b71b60ee 1950 }
fec7dd5a
SS
1951 if( ::arg().mustDo("non-local-bind") )
1952 Utility::setBindAny(AF_INET6, fd);
1953
3897b9e1 1954 setCloseOnExec(fd);
a903b39c 1955
4e9a20e6 1956 setSocketReceiveBuffer(fd, 250000);
32252594 1957 sin.sin4.sin_port = htons(st.port);
37d3f960 1958
2332f42d 1959
2573d4a6 1960#ifdef SO_REUSEPORT
810ff705 1961 if(g_reusePort) {
2332f42d 1962 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
1963 throw PDNSException("SO_REUSEPORT: "+stringerror());
1964 }
1965#endif
a683e8bd 1966 socklen_t socklen=sin.getSocklen();
3ddb9247 1967 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
335da0ba 1968 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
3ddb9247 1969
3897b9e1 1970 setNonBlocking(fd);
c2136bf0 1971
810ff705 1972 deferredAdds[threadId].push_back(make_pair(fd, handleNewUDPQuestion));
40a3dd64 1973 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
3ddb9247 1974 if(sin.sin4.sin_family == AF_INET)
32252594 1975 L<<Logger::Error<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 1976 else
32252594 1977 L<<Logger::Error<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 1978 }
c836dc19 1979}
caa6eefa 1980
d187038c 1981static void daemonize(void)
c836dc19
BH
1982{
1983 if(fork())
1984 exit(0); // bye bye
3ddb9247
PD
1985
1986 setsid();
c836dc19 1987
27a5ead5 1988 int i=open("/dev/null",O_RDWR); /* open stdin */
3ddb9247 1989 if(i < 0)
27a5ead5
BH
1990 L<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
1991 else {
1992 dup2(i,0); /* stdin */
1993 dup2(i,1); /* stderr */
1994 dup2(i,2); /* stderr */
1995 close(i);
1996 }
288f4aa9 1997}
caa6eefa 1998
d187038c 1999static void usr1Handler(int)
c75a6a9e
BH
2000{
2001 statsWanted=true;
2002}
ae1b2e98 2003
d187038c 2004static void usr2Handler(int)
9170fbaf 2005{
f1f34cc2 2006 g_quiet= !g_quiet;
2007 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
2008 ::arg().set("quiet")=g_quiet ? "" : "no";
9170fbaf
BH
2009}
2010
d187038c 2011static void doStats(void)
c75a6a9e 2012{
16beeaa4
BH
2013 static time_t lastOutputTime;
2014 static uint64_t lastQueryCount;
d299d4f5 2015
2016 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
2017 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
3ddb9247 2018
d299d4f5 2019 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
bd301954 2020 L<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
3427fa8a
BH
2021 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
2022 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
3ddb9247
PD
2023 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
2024
bd301954 2025 L<<Logger::Notice<<"stats: throttle map: "
3427fa8a 2026 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
3ddb9247 2027 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
bd301954
JB
2028 L<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2029 L<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
525b8a7c 2030 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
bd301954 2031 L<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
3427fa8a 2032 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
81883dcc 2033
bd301954 2034 //L<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
16beeaa4 2035 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
3ddb9247 2036
bd301954 2037 L<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
16beeaa4 2038 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
3ddb9247 2039
16beeaa4
BH
2040 time_t now = time(0);
2041 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
bd301954 2042 L<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
16beeaa4
BH
2043 }
2044 lastOutputTime = now;
2045 lastQueryCount = SyncRes::s_queries;
c75a6a9e 2046 }
3ddb9247 2047 else if(statsWanted)
bd301954 2048 L<<Logger::Notice<<"stats: no stats yet!"<<endl;
7becf07f 2049
c75a6a9e
BH
2050 statsWanted=false;
2051}
c836dc19 2052
29f0b1ce 2053static void houseKeeping(void *)
c836dc19 2054{
3337c2f7
RG
2055 static thread_local time_t last_stat, last_rootupdate, last_prune, last_secpoll;
2056 static thread_local int cleanCounter=0;
2057 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
cc59bce6 2058 try {
2059 if(s_running)
2060 return;
2061 s_running=true;
3ddb9247 2062
cc59bce6 2063 struct timeval now;
2064 Utility::gettimeofday(&now, 0);
3ddb9247
PD
2065
2066 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
cc59bce6 2067 DTime dt;
2068 dt.setTimeval(now);
2069 t_RC->doPrune(); // this function is local to a thread, so fine anyhow
f8f243b0 2070 t_packetCache->doPruneTo(::arg().asNum("max-packetcache-entries") / g_numWorkerThreads);
3ddb9247 2071
a712cb56 2072 SyncRes::pruneNegCache(::arg().asNum("max-cache-entries") / (g_numWorkerThreads * 10));
3ddb9247 2073
cc59bce6 2074 if(!((cleanCounter++)%40)) { // this is a full scan!
2075 time_t limit=now.tv_sec-300;
a712cb56 2076 SyncRes::pruneNSSpeeds(limit);
cc59bce6 2077 }
2078 last_prune=time(0);
d67620e4 2079 }
3ddb9247 2080
cc59bce6 2081 if(now.tv_sec - last_rootupdate > 7200) {
30ee601a 2082 int res = SyncRes::getRootNS(g_now, nullptr);
7836f7b4
PL
2083 if (!res)
2084 last_rootupdate=now.tv_sec;
cc59bce6 2085 }
3ddb9247 2086
cc59bce6 2087 if(!t_id) {
0ec489bf 2088 if(g_statisticsInterval > 0 && now.tv_sec - last_stat >= g_statisticsInterval) {
cc59bce6 2089 doStats();
2090 last_stat=time(0);
2091 }
3ddb9247 2092
cc59bce6 2093 if(now.tv_sec - last_secpoll >= 3600) {
2094 try {
2095 doSecPoll(&last_secpoll);
2096 }
2097 catch(...) {}
18b73338 2098 }
d67620e4 2099 }
cc59bce6 2100 s_running=false;
d67620e4 2101 }
cc59bce6 2102 catch(PDNSException& ae)
2103 {
2104 s_running=false;
2105 L<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
2106 throw;
2107 }
779828c4 2108}
d6d5dea7 2109
d187038c 2110static void makeThreadPipes()
49a699c4 2111{
c3828c03 2112 for(unsigned int n=0; n < g_numThreads; ++n) {
49a699c4
BH
2113 struct ThreadPipeSet tps;
2114 int fd[2];
2115 if(pipe(fd) < 0)
2116 unixDie("Creating pipe for inter-thread communications");
3ddb9247 2117
49a699c4
BH
2118 tps.readToThread = fd[0];
2119 tps.writeToThread = fd[1];
3ddb9247 2120
49a699c4
BH
2121 if(pipe(fd) < 0)
2122 unixDie("Creating pipe for inter-thread communications");
2123 tps.readFromThread = fd[0];
2124 tps.writeFromThread = fd[1];
3ddb9247 2125
49a699c4
BH
2126 g_pipes.push_back(tps);
2127 }
2128}
2129
00c9b8c1
BH
2130struct ThreadMSG
2131{
2132 pipefunc_t func;
2133 bool wantAnswer;
2134};
2135
49a699c4
BH
2136void broadcastFunction(const pipefunc_t& func, bool skipSelf)
2137{
49a699c4 2138 unsigned int n = 0;
1dc8f4d0 2139 for(ThreadPipeSet& tps : g_pipes)
49a699c4
BH
2140 {
2141 if(n++ == t_id) {
2142 if(!skipSelf)
2143 func(); // don't write to ourselves!
2144 continue;
2145 }
3ddb9247 2146
00c9b8c1
BH
2147 ThreadMSG* tmsg = new ThreadMSG();
2148 tmsg->func = func;
2149 tmsg->wantAnswer = true;
b841314c
RG
2150 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2151 delete tmsg;
49a699c4 2152 unixDie("write to thread pipe returned wrong size or error");
b841314c 2153 }
3ddb9247 2154
49a699c4
BH
2155 string* resp;
2156 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2157 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 2158
49a699c4
BH
2159 if(resp) {
2160// cerr <<"got response: " << *resp << endl;
2161 delete resp;
2162 }
2163 }
2164}
06ea9015 2165
8171ab83 2166void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
00c9b8c1 2167{
8171ab83 2168 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
06ea9015 2169 unsigned int target = 1 + (hash % (g_pipes.size()-1));
2170
00c9b8c1
BH
2171 if(target == t_id) {
2172 func();
2173 return;
2174 }
3ddb9247 2175 ThreadPipeSet& tps = g_pipes[target];
00c9b8c1
BH
2176 ThreadMSG* tmsg = new ThreadMSG();
2177 tmsg->func = func;
2178 tmsg->wantAnswer = false;
3ddb9247 2179
b841314c
RG
2180 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2181 delete tmsg;
3ddb9247 2182 unixDie("write to thread pipe returned wrong size or error");
b841314c 2183 }
00c9b8c1 2184}
3427fa8a 2185
d187038c 2186static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
49a699c4 2187{
f26bf547 2188 ThreadMSG* tmsg = nullptr;
3ddb9247
PD
2189
2190 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread
49a699c4
BH
2191 unixDie("read from thread pipe returned wrong size or error");
2192 }
3ddb9247 2193
2f22827a 2194 void *resp=0;
2195 try {
2196 resp = tmsg->func();
2197 }
2198 catch(std::exception& e) {
6d2010a8 2199 if(g_logCommonErrors)
2200 L<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2f22827a 2201 }
2202 catch(PDNSException& e) {
6d2010a8 2203 if(g_logCommonErrors)
2204 L<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2f22827a 2205 }
d7c676a5
RG
2206 if(tmsg->wantAnswer) {
2207 if(write(g_pipes[t_id].writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
2208 delete tmsg;
00c9b8c1 2209 unixDie("write to thread pipe returned wrong size or error");
d7c676a5
RG
2210 }
2211 }
3ddb9247 2212
00c9b8c1 2213 delete tmsg;
49a699c4 2214}
09e6702a 2215
13034931
BH
2216template<class T> void *voider(const boost::function<T*()>& func)
2217{
2218 return func();
2219}
2220
b3b5459d
BH
2221vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
2222{
2223 a.insert(a.end(), b.begin(), b.end());
2224 return a;
2225}
2226
92011b8f 2227vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
2228{
2229 a.insert(a.end(), b.begin(), b.end());
2230 return a;
2231}
2232
3ddb9247
PD
2233vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
2234{
2235 a.insert(a.end(), b.begin(), b.end());
2236 return a;
2237}
2238
92011b8f 2239
13034931 2240template<class T> T broadcastAccFunction(const boost::function<T*()>& func, bool skipSelf)
3427fa8a
BH
2241{
2242 unsigned int n = 0;
2243 T ret=T();
1dc8f4d0 2244 for(ThreadPipeSet& tps : g_pipes)
3427fa8a
BH
2245 {
2246 if(n++ == t_id) {
2247 if(!skipSelf) {
2248 T* resp = (T*)func(); // don't write to ourselves!
2249 if(resp) {
2250 //~ cerr <<"got direct: " << *resp << endl;
2251 ret += *resp;
2252 delete resp;
2253 }
2254 }
2255 continue;
2256 }
3ddb9247 2257
00c9b8c1
BH
2258 ThreadMSG* tmsg = new ThreadMSG();
2259 tmsg->func = boost::bind(voider<T>, func);
2260 tmsg->wantAnswer = true;
3ddb9247 2261
b841314c
RG
2262 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2263 delete tmsg;
3427fa8a 2264 unixDie("write to thread pipe returned wrong size or error");
b841314c 2265 }
3ddb9247 2266
3427fa8a
BH
2267 T* resp;
2268 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2269 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 2270
3427fa8a
BH
2271 if(resp) {
2272 //~ cerr <<"got response: " << *resp << endl;
2273 ret += *resp;
2274 delete resp;
2275 }
2276 }
2277 return ret;
2278}
2279
13034931
BH
2280template string broadcastAccFunction(const boost::function<string*()>& fun, bool skipSelf); // explicit instantiation
2281template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun, bool skipSelf); // explicit instantiation
b3b5459d 2282template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun, bool skipSelf); // explicit instantiation
3ddb9247 2283template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun, bool skipSelf); // explicit instantiation
3427fa8a 2284
d187038c 2285static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
09e6702a
BH
2286{
2287 string remote;
2288 string msg=s_rcc.recv(&remote);
2289 RecursorControlParser rcp;
2290 RecursorControlParser::func_t* command;
3ddb9247 2291
09e6702a 2292 string answer=rcp.getAnswer(msg, &command);
f0f3f0b0
PL
2293
2294 // If we are inside a chroot, we need to strip
2295 if (!arg()["chroot"].empty()) {
a683e8bd 2296 size_t len = arg()["chroot"].length();
f0f3f0b0
PL
2297 remote = remote.substr(len);
2298 }
2299
ab5c053d
BH
2300 try {
2301 s_rcc.send(answer, &remote);
2302 command();
2303 }
fdbf35ac 2304 catch(std::exception& e) {
ab5c053d
BH
2305 L<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
2306 }
3f81d239 2307 catch(PDNSException& ae) {
ab5c053d
BH
2308 L<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
2309 }
09e6702a
BH
2310}
2311
d187038c 2312static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2313{
0b18b22e 2314 PacketID* pident=any_cast<PacketID>(&var);
667f7e60 2315 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
09e6702a 2316
667f7e60 2317 shared_array<char> buffer(new char[pident->inNeeded]);
09e6702a 2318
a683e8bd 2319 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
09e6702a 2320 if(ret > 0) {
667f7e60 2321 pident->inMSG.append(&buffer[0], &buffer[ret]);
a683e8bd 2322 pident->inNeeded-=(size_t)ret;
825fa717 2323 if(!pident->inNeeded || pident->inIncompleteOkay) {
667f7e60
BH
2324 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
2325 PacketID pid=*pident;
2326 string msg=pident->inMSG;
3ddb9247 2327
bb4bdbaf 2328 t_fdm->removeReadFD(fd);
3ddb9247 2329 MT->sendEvent(pid, &msg);
09e6702a
BH
2330 }
2331 else {
667f7e60 2332 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
09e6702a
BH
2333 }
2334 }
2335 else {
667f7e60 2336 PacketID tmp=*pident;
bb4bdbaf 2337 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
09e6702a
BH
2338 string empty;
2339 MT->sendEvent(tmp, &empty); // this conveys error status
2340 }
2341}
2342
d187038c 2343static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2344{
0b18b22e 2345 PacketID* pid=any_cast<PacketID>(&var);
a683e8bd 2346 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
09e6702a 2347 if(ret > 0) {
a683e8bd 2348 pid->outPos+=(ssize_t)ret;
667f7e60
BH
2349 if(pid->outPos==pid->outMSG.size()) {
2350 PacketID tmp=*pid;
bb4bdbaf 2351 t_fdm->removeWriteFD(fd);
09e6702a
BH
2352 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
2353 }
2354 }
2355 else { // error or EOF
667f7e60 2356 PacketID tmp(*pid);
bb4bdbaf 2357 t_fdm->removeWriteFD(fd);
09e6702a 2358 string sent;
998a4334 2359 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
09e6702a
BH
2360 }
2361}
2362
34801ab1 2363// resend event to everybody chained onto it
d187038c 2364static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
34801ab1
BH
2365{
2366 if(iter->key.chain.empty())
2367 return;
e27e91a8 2368 // cerr<<"doResends called!\n";
34801ab1
BH
2369 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
2370 resend.fd=-1;
2371 resend.id=*i;
e27e91a8 2372 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
4665c31e 2373
34801ab1
BH
2374 MT->sendEvent(resend, &content);
2375 g_stats.chainResends++;
34801ab1
BH
2376 }
2377}
2378
d187038c 2379static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2380{
600fc20b 2381 PacketID pid=any_cast<PacketID>(var);
a683e8bd 2382 ssize_t len;
e45beeda 2383 char data[g_outgoingEDNSBufsize];
996c89cc 2384 ComboAddress fromaddr;
09e6702a
BH
2385 socklen_t addrlen=sizeof(fromaddr);
2386
998a4334 2387 len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen);
c1da7976 2388
a683e8bd 2389 if(len < (ssize_t) sizeof(dnsheader)) {
998a4334 2390 if(len < 0)
996c89cc 2391 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
09e6702a 2392 else {
3ddb9247 2393 g_stats.serverParseError++;
09e6702a 2394 if(g_logCommonErrors)
85db02c5 2395 L<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
e44d9fa7 2396 ": packet smaller than DNS header"<<endl;
998a4334 2397 }
34801ab1 2398
49a699c4 2399 t_udpclientsocks->returnSocket(fd);
34801ab1
BH
2400 string empty;
2401
2402 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3ddb9247 2403 if(iter != MT->d_waiters.end())
34801ab1 2404 doResends(iter, pid, empty);
3ddb9247 2405
34801ab1 2406 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
998a4334 2407 return;
3ddb9247 2408 }
998a4334
BH
2409
2410 dnsheader dh;
2411 memcpy(&dh, data, sizeof(dh));
3ddb9247 2412
6da3b3ad
PD
2413 PacketID pident;
2414 pident.remote=fromaddr;
2415 pident.id=dh.id;
2416 pident.fd=fd;
34801ab1 2417
33a928af 2418 if(!dh.qr && g_logCommonErrors) {
854d44e3 2419 L<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
6da3b3ad
PD
2420 }
2421
2422 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
2423 !dh.qr) { // one weird server
2424 pident.domain.clear();
2425 pident.type = 0;
2426 }
2427 else {
2428 try {
0b31e67e 2429 if(len > 12)
2430 pident.domain=DNSName(data, len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
6da3b3ad
PD
2431 }
2432 catch(std::exception& e) {
2433 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
0b31e67e 2434 L<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
6da3b3ad 2435 return;
34801ab1 2436 }
6da3b3ad
PD
2437 }
2438 string packet;
2439 packet.assign(data, len);
34801ab1 2440
6da3b3ad
PD
2441 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
2442 if(iter != MT->d_waiters.end()) {
2443 doResends(iter, pident, packet);
2444 }
c1da7976 2445
6da3b3ad 2446retryWithName:
4957a608 2447
6da3b3ad
PD
2448 if(!MT->sendEvent(pident, &packet)) {
2449 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
2450 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
2451 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
e325f20c 2452 pident.domain == mthread->key.domain) {
6da3b3ad 2453 mthread->key.nearMisses++;
998a4334 2454 }
6da3b3ad
PD
2455
2456 // be a bit paranoid here since we're weakening our matching
3ddb9247 2457 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
6da3b3ad
PD
2458 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
2459 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
2460 pident.domain = mthread->key.domain;
2461 pident.type = mthread->key.type;
2462 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
d4fb76e9 2463 }
09e6702a 2464 }
6da3b3ad
PD
2465 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
2466 if(g_logCommonErrors) {
8a464ee3 2467 L<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
d8f6d49f 2468 }
09e6702a 2469 }
6da3b3ad
PD
2470 else if(fd >= 0) {
2471 t_udpclientsocks->returnSocket(fd);
2472 }
09e6702a
BH
2473}
2474
1f4abb20
BH
2475FDMultiplexer* getMultiplexer()
2476{
2477 FDMultiplexer* ret;
f26bf547 2478 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
1f4abb20 2479 try {
f26bf547 2480 ret=i.second();
1f4abb20
BH
2481 return ret;
2482 }
98d0ee4a 2483 catch(FDMultiplexerException &fe) {
0a7f24cb 2484 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
98d0ee4a
BH
2485 }
2486 catch(...) {
2487 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
2488 }
1f4abb20
BH
2489 }
2490 L<<Logger::Error<<"No working multiplexer found!"<<endl;
2491 exit(1);
2492}
2493
3ddb9247 2494
d187038c 2495static string* doReloadLuaScript()
4485aa35 2496{
674cf0f6 2497 string fname= ::arg()["lua-dns-script"];
4485aa35 2498 try {
674cf0f6 2499 if(fname.empty()) {
f26bf547 2500 t_pdl.reset();
674cf0f6 2501 L<<Logger::Error<<t_id<<" Unloaded current lua script"<<endl;
0f39c1a3 2502 return new string("unloaded\n");
4485aa35
BH
2503 }
2504 else {
f26bf547 2505 t_pdl = std::make_shared<RecursorLua4>(fname);
4485aa35
BH
2506 }
2507 }
fdbf35ac 2508 catch(std::exception& e) {
674cf0f6 2509 L<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
0f39c1a3 2510 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
4485aa35 2511 }
3ddb9247 2512
674cf0f6 2513 L<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
0f39c1a3 2514 return new string("(re)loaded '"+fname+"'\n");
4485aa35
BH
2515}
2516
49a699c4
BH
2517string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2518{
3ddb9247 2519 if(begin != end)
49a699c4 2520 ::arg().set("lua-dns-script") = *begin;
3ddb9247 2521
0f39c1a3 2522 return broadcastAccFunction<string>(doReloadLuaScript);
3ddb9247 2523}
49a699c4 2524
d187038c 2525static string* pleaseUseNewTraceRegex(const std::string& newRegex)
77499b05
BH
2526try
2527{
2528 if(newRegex.empty()) {
f26bf547 2529 t_traceRegex.reset();
77499b05
BH
2530 return new string("unset\n");
2531 }
2532 else {
f26bf547 2533 t_traceRegex = std::make_shared<Regex>(newRegex);
77499b05
BH
2534 return new string("ok\n");
2535 }
2536}
3f81d239 2537catch(PDNSException& ae)
77499b05
BH
2538{
2539 return new string(ae.reason+"\n");
2540}
2541
2542string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2543{
2544 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
2545}
2546
4e9a20e6 2547static void checkLinuxIPv6Limits()
2548{
2549#ifdef __linux__
2550 string line;
2551 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
335da0ba 2552 int lim=std::stoi(line);
4e9a20e6 2553 if(lim < 16384) {
36849ff2 2554 L<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
4e9a20e6 2555 }
2556 }
2557#endif
2558}
36849ff2 2559static void checkOrFixFDS()
4e9a20e6 2560{
c0063e60 2561 unsigned int availFDs=getFilenumLimit();
2562 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
2563
2564 if(wantFDs > availFDs) {
067ad20e 2565 unsigned int hardlimit= getFilenumLimit(true);
2566 if(hardlimit >= wantFDs) {
c0063e60 2567 setFilenumLimit(wantFDs);
2568 L<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
36849ff2 2569 }
2570 else {
067ad20e 2571 int newval = (hardlimit - 25) / g_numWorkerThreads;
2572 L<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
36849ff2 2573 g_maxMThreads = newval;
067ad20e 2574 setFilenumLimit(hardlimit);
36849ff2 2575 }
2576 }
4e9a20e6 2577}
77499b05 2578
d187038c 2579static void* recursorThread(void*);
51e2144e 2580
f26bf547 2581static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
49a699c4
BH
2582{
2583 t_allowFrom = ng;
f26bf547 2584 return nullptr;
49a699c4
BH
2585}
2586
dbd23fc2
BH
2587int g_argc;
2588char** g_argv;
2589
18af64a8 2590void parseACLs()
f7c1d4e3 2591{
18af64a8 2592 static bool l_initialized;
3ddb9247 2593
49a699c4 2594 if(l_initialized) { // only reload configuration file on second call
18af64a8
BH
2595 string configname=::arg()["config-dir"]+"/recursor.conf";
2596 cleanSlashes(configname);
3ddb9247
PD
2597
2598 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
7e818521 2599 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
49a699c4 2600 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
242b90e1 2601 ::arg().preParseFile(configname.c_str(), "include-dir");
829849d6
AT
2602 ::arg().preParse(g_argc, g_argv, "include-dir");
2603
2604 // then process includes
2605 std::vector<std::string> extraConfigs;
242b90e1
AT
2606 ::arg().gatherIncludes(extraConfigs);
2607
1dc8f4d0 2608 for(const std::string& fn : extraConfigs) {
7e818521 2609 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
2610 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
2611 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
2612 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
829849d6 2613 }
ca2c884c
AT
2614
2615 ::arg().preParse(g_argc, g_argv, "allow-from-file");
2616 ::arg().preParse(g_argc, g_argv, "allow-from");
f27e6356 2617 }
49a699c4 2618
f26bf547
RG
2619 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
2620 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
3ddb9247 2621
2c95fc65
BH
2622 if(!::arg()["allow-from-file"].empty()) {
2623 string line;
2c95fc65
BH
2624 ifstream ifs(::arg()["allow-from-file"].c_str());
2625 if(!ifs) {
9c61b9d0 2626 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2c95fc65
BH
2627 }
2628
2629 string::size_type pos;
2630 while(getline(ifs,line)) {
2631 pos=line.find('#');
2632 if(pos!=string::npos)
2633 line.resize(pos);
2634 trim(line);
2635 if(line.empty())
2636 continue;
2637
18af64a8 2638 allowFrom->addMask(line);
2c95fc65 2639 }
49a699c4 2640 L<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2c95fc65
BH
2641 }
2642 else if(!::arg()["allow-from"].empty()) {
f7c1d4e3
BH
2643 vector<string> ips;
2644 stringtok(ips, ::arg()["allow-from"], ", ");
3ddb9247 2645
f7c1d4e3
BH
2646 L<<Logger::Warning<<"Only allowing queries from: ";
2647 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
18af64a8 2648 allowFrom->addMask(*i);
f7c1d4e3 2649 if(i!=ips.begin())
674cf0f6 2650 L<<Logger::Warning<<", ";
f7c1d4e3
BH
2651 L<<Logger::Warning<<*i;
2652 }
2653 L<<Logger::Warning<<endl;
2654 }
49a699c4 2655 else {
3ddb9247 2656 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
49a699c4 2657 L<<Logger::Error<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
f26bf547 2658 allowFrom = nullptr;
49a699c4 2659 }
3ddb9247 2660
49a699c4 2661 g_initialAllowFrom = allowFrom;
d7dae798 2662 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
f26bf547 2663 oldAllowFrom = nullptr;
3ddb9247 2664
49a699c4 2665 l_initialized = true;
18af64a8
BH
2666}
2667
795215f2 2668
756e82cf 2669static void setupDelegationOnly()
2670{
2671 vector<string> parts;
2672 stringtok(parts, ::arg()["delegation-only"], ", \t");
2673 for(const auto& p : parts) {
9065eb05 2674 SyncRes::addDelegationOnly(DNSName(p));
756e82cf 2675 }
2676}
795215f2 2677
8fd25133
RG
2678static std::map<unsigned int, std::set<int> > parseCPUMap()
2679{
2680 std::map<unsigned int, std::set<int> > result;
2681
2682 const std::string value = ::arg()["cpu-map"];
2683
2684 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
2685 L<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
2686 return result;
2687 }
2688
2689 std::vector<std::string> parts;
2690
2691 stringtok(parts, value, " \t");
2692
2693 for(const auto& part : parts) {
2694 if (part.find('=') == string::npos)
2695 continue;
2696
2697 try {
2698 auto headers = splitField(part, '=');
2699 trim(headers.first);
2700 trim(headers.second);
2701
2702 unsigned int threadId = pdns_stou(headers.first);
2703 std::vector<std::string> cpus;
2704
2705 stringtok(cpus, headers.second, ",");
2706
2707 for(const auto& cpu : cpus) {
2708 int cpuId = std::stoi(cpu);
2709
2710 result[threadId].insert(cpuId);
2711 }
2712 }
2713 catch(const std::exception& e) {
2714 L<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
2715 }
2716 }
2717
2718 return result;
2719}
2720
2721static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
2722{
2723 const auto& cpuMapping = cpusMap.find(n);
2724 if (cpuMapping != cpusMap.cend()) {
2725 int rc = mapThreadToCPUList(tid, cpuMapping->second);
2726 if (rc == 0) {
2727 L<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
2728 for (const auto cpu : cpuMapping->second) {
2729 L<<Logger::Info<<" "<<cpu;
2730 }
2731 L<<Logger::Info<<endl;
2732 }
2733 else {
2734 L<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
2735 for (const auto cpu : cpuMapping->second) {
2736 L<<Logger::Info<<" "<<cpu;
2737 }
2738 L<<Logger::Info<<strerror(rc)<<endl;
2739 }
2740 }
2741}
2742
d187038c 2743static int serviceMain(int argc, char*argv[])
18af64a8 2744{
5124de27 2745 L.setName(s_programname);
b6cfa948 2746 L.disableSyslog(::arg().mustDo("disable-syslog"));
18af64a8
BH
2747
2748 if(!::arg()["logging-facility"].empty()) {
f8499e52
BH
2749 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
2750 if(val >= 0)
2751 theL().setFacility(val);
18af64a8
BH
2752 else
2753 L<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
2754 }
2755
ba1a571d 2756 showProductVersion();
18af64a8 2757 seedRandom(::arg()["entropy-source"]);
3afde9b2 2758
06ea9015 2759 g_disthashseed=dns_random(0xffffffff);
2760
b7ef5828
PL
2761 checkLinuxIPv6Limits();
2762 try {
2763 vector<string> addrs;
2764 if(!::arg()["query-local-address6"].empty()) {
2765 SyncRes::s_doIPv6=true;
2766 L<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
2767
2768 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
2769 for(const string& addr : addrs) {
2770 g_localQueryAddresses6.push_back(ComboAddress(addr));
2771 }
2772 }
2773 else {
2774 L<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
2775 }
2776 addrs.clear();
2777 stringtok(addrs, ::arg()["query-local-address"], ", ;");
2778 for(const string& addr : addrs) {
2779 g_localQueryAddresses4.push_back(ComboAddress(addr));
2780 }
2781 }
2782 catch(std::exception& e) {
2783 L<<Logger::Error<<"Assigning local query addresses: "<<e.what();
2784 exit(99);
2785 }
2786
e48c6b8a
PL
2787 // keep this ABOVE loadRecursorLuaConfig!
2788 if(::arg()["dnssec"]=="off")
2789 g_dnssecmode=DNSSECMode::Off;
2790 else if(::arg()["dnssec"]=="process-no-validate")
2791 g_dnssecmode=DNSSECMode::ProcessNoValidate;
2792 else if(::arg()["dnssec"]=="process")
2793 g_dnssecmode=DNSSECMode::Process;
2794 else if(::arg()["dnssec"]=="validate")
2795 g_dnssecmode=DNSSECMode::ValidateAll;
2796 else if(::arg()["dnssec"]=="log-fail")
2797 g_dnssecmode=DNSSECMode::ValidateForLog;
2798 else {
2799 L<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
2800 exit(1);
2801 }
2802
2803 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
d377bb54 2804 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
e48c6b8a 2805
0f5785a6
PL
2806 try {
2807 loadRecursorLuaConfig(::arg()["lua-config-file"], ::arg().mustDo("daemon"));
2808 }
2809 catch (PDNSException &e) {
2810 L<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
2811 exit(1);
2812 }
ad42489c 2813
18af64a8 2814 parseACLs();
92011b8f 2815 sortPublicSuffixList();
2816
eb5bae86 2817 if(!::arg()["dont-query"].empty()) {
eb5bae86
BH
2818 vector<string> ips;
2819 stringtok(ips, ::arg()["dont-query"], ", ");
66e0b6ea
BH
2820 ips.push_back("0.0.0.0");
2821 ips.push_back("::");
c36bc97a 2822
eb5bae86
BH
2823 L<<Logger::Warning<<"Will not send queries to: ";
2824 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
9065eb05 2825 SyncRes::addDontQuery(*i);
eb5bae86 2826 if(i!=ips.begin())
4957a608 2827 L<<Logger::Warning<<", ";
eb5bae86
BH
2828 L<<Logger::Warning<<*i;
2829 }
2830 L<<Logger::Warning<<endl;
2831 }
2832
f7c1d4e3 2833 g_quiet=::arg().mustDo("quiet");
3ddb9247 2834
1bc3c142
BH
2835 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
2836 if(g_weDistributeQueries) {
2837 L<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
2838 }
3ddb9247 2839
756e82cf 2840 setupDelegationOnly();
b33c2462 2841 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
756e82cf 2842
77499b05
BH
2843 if(::arg()["trace"]=="fail") {
2844 SyncRes::setDefaultLogMode(SyncRes::Store);
2845 }
2846 else if(::arg().mustDo("trace")) {
2847 SyncRes::setDefaultLogMode(SyncRes::Log);
f7c1d4e3
BH
2848 ::arg().set("quiet")="no";
2849 g_quiet=false;
3e9c6c0a 2850 g_dnssecLOG=true;
f7c1d4e3 2851 }
3ddb9247 2852
aadceba8 2853 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
2854
1051f8a9
BH
2855 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
2856
f7c1d4e3 2857 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
63637fd8 2858 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
1051f8a9 2859 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
79ec0627
PL
2860 // Cap the packetcache-servfail-ttl to the packetcache-ttl
2861 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
2862 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
628e2c7b
PA
2863 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
2864 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
f7c1d4e3 2865 SyncRes::s_serverID=::arg()["server-id"];
173d790e 2866 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
9de3e034 2867 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
7c3398aa 2868 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
01402d56 2869 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
f7c1d4e3
BH
2870 if(SyncRes::s_serverID.empty()) {
2871 char tmp[128];
2872 gethostname(tmp, sizeof(tmp)-1);
2873 SyncRes::s_serverID=tmp;
2874 }
3ddb9247 2875
e9f9b8ec
RG
2876 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
2877 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
2878
5b0ddd18 2879 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
bb4bdbaf 2880
49a699c4 2881 g_initialDomainMap = parseAuthAndForwards();
3ddb9247 2882
08f3f638 2883 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3ddb9247 2884
f7c1d4e3 2885 g_logCommonErrors=::arg().mustDo("log-common-errors");
e661a20b
PD
2886
2887 g_anyToTcp = ::arg().mustDo("any-to-tcp");
a09a8ce0
PD
2888 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
2889
b3adda56
PD
2890 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
2891
810ff705
RG
2892 g_numWorkerThreads = ::arg().asNum("threads");
2893 g_numThreads = g_numWorkerThreads + g_weDistributeQueries;
2894 g_maxMThreads = ::arg().asNum("max-mthreads");
2895
00b8cadc
RG
2896 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
2897
0ec489bf 2898 g_statisticsInterval = ::arg().asNum("statistics-interval");
2899
810ff705
RG
2900#ifdef SO_REUSEPORT
2901 g_reusePort = ::arg().mustDo("reuseport");
2902#endif
2903
2904 g_useOneSocketPerThread = (!g_weDistributeQueries && g_reusePort);
2905
2906 if (g_useOneSocketPerThread) {
2907 for (unsigned int threadId = 0; threadId < g_numWorkerThreads; threadId++) {
2908 makeUDPServerSockets(threadId);
2909 makeTCPServerSockets(threadId);
2910 }
2911 }
2912 else {
2913 makeUDPServerSockets(0);
2914 makeTCPServerSockets(0);
2915 }
815099b2 2916
9065eb05 2917 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
b40562da 2918 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
376effcf 2919
677e2a46
BH
2920 int forks;
2921 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
1bc3c142
BH
2922 if(!fork()) // we are child
2923 break;
2924 }
3ddb9247 2925
f7c1d4e3
BH
2926 if(::arg().mustDo("daemon")) {
2927 L<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
2928 L.toConsole(Logger::Critical);
f7c1d4e3 2929 daemonize();
a4241908 2930 loadRecursorLuaConfig(::arg()["lua-config-file"], false);
f7c1d4e3
BH
2931 }
2932 signal(SIGUSR1,usr1Handler);
2933 signal(SIGUSR2,usr2Handler);
2934 signal(SIGPIPE,SIG_IGN);
810ff705 2935
a6414fdc 2936 checkOrFixFDS();
3ddb9247 2937
d1b28475
KM
2938#ifdef HAVE_LIBSODIUM
2939 if (sodium_init() == -1) {
2940 L<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
2941 exit(99);
2942 }
2943#endif
2944
3afde9b2
PL
2945 openssl_thread_setup();
2946 openssl_seed();
2947
138435cb
BH
2948 int newgid=0;
2949 if(!::arg()["setgid"].empty())
2950 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
2951 int newuid=0;
2952 if(!::arg()["setuid"].empty())
2953 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
2954
f1d6a7ce
KM
2955 Utility::dropGroupPrivs(newuid, newgid);
2956
138435cb 2957 if (!::arg()["chroot"].empty()) {
75336810
PL
2958#ifdef HAVE_SYSTEMD
2959 char *ns;
2960 ns = getenv("NOTIFY_SOCKET");
2961 if (ns != nullptr) {
2962 L<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
2963 exit(1);
2964 }
2965#endif
138435cb
BH
2966 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
2967 L<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
2968 exit(1);
2969 }
f0f3f0b0
PL
2970 else
2971 L<<Logger::Error<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
138435cb
BH
2972 }
2973
f0f3f0b0
PL
2974 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
2975 if(!s_pidfname.empty())
2976 unlink(s_pidfname.c_str()); // remove possible old pid file
2977 writePid();
2978
2979 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
2980
f1d6a7ce 2981 Utility::dropUserPrivs(newuid);
c0063e60 2982
49a699c4 2983 makeThreadPipes();
3ddb9247 2984
5d4dd7fe
BH
2985 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
2986 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
fde296a3 2987 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
343257a4 2988
d705aad9
RG
2989 if (::arg().mustDo("snmp-agent")) {
2990 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
2991 g_snmpAgent->run();
2992 }
2993
8fd25133 2994 const auto cpusMap = parseCPUMap();
c3828c03 2995 if(g_numThreads == 1) {
76698c6e 2996 L<<Logger::Warning<<"Operating unthreaded"<<endl;
6b6720de
PL
2997#ifdef HAVE_SYSTEMD
2998 sd_notify(0, "READY=1");
2999#endif
8fd25133 3000 setCPUMap(cpusMap, 0, pthread_self());
76698c6e
BH
3001 recursorThread(0);
3002 }
3003 else {
3004 pthread_t tid;
c3828c03
BH
3005 L<<Logger::Warning<<"Launching "<< g_numThreads <<" threads"<<endl;
3006 for(unsigned int n=0; n < g_numThreads; ++n) {
77499b05 3007 pthread_create(&tid, 0, recursorThread, (void*)(long)n);
8fd25133
RG
3008
3009 setCPUMap(cpusMap, n, tid);
76698c6e
BH
3010 }
3011 void* res;
6b6720de
PL
3012#ifdef HAVE_SYSTEMD
3013 sd_notify(0, "READY=1");
3014#endif
76698c6e 3015 pthread_join(tid, &res);
bb4bdbaf 3016 }
bb4bdbaf
BH
3017 return 0;
3018}
3019
d187038c 3020static void* recursorThread(void* ptr)
bb4bdbaf
BH
3021try
3022{
2e2cd8ec 3023 t_id=(int) (long) ptr;
49a699c4 3024 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
a712cb56 3025 SyncRes::setDomainMap(g_initialDomainMap);
49a699c4 3026 t_allowFrom = g_initialAllowFrom;
f26bf547
RG
3027 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
3028 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
49a699c4 3029 primeHints();
3ddb9247 3030
f26bf547 3031 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
3ddb9247 3032
aa7929a3 3033#ifdef HAVE_PROTOBUF
f26bf547 3034 t_uuidGenerator = std::unique_ptr<boost::uuids::random_generator>(new boost::uuids::random_generator());
aa7929a3 3035#endif
49a699c4 3036 L<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3ddb9247 3037
674cf0f6
BH
3038 try {
3039 if(!::arg()["lua-dns-script"].empty()) {
f26bf547 3040 t_pdl = std::make_shared<RecursorLua4>(::arg()["lua-dns-script"]);
674cf0f6
BH
3041 L<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
3042 }
674cf0f6
BH
3043 }
3044 catch(std::exception &e) {
3045 L<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
62f0ae62 3046 _exit(99);
674cf0f6 3047 }
3ddb9247 3048
f8f243b0 3049 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
92011b8f 3050 if(ringsize) {
f26bf547 3051 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
f8f243b0 3052 if(g_weDistributeQueries) // if so, only 1 thread does recvfrom
3ddb9247 3053 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries"));
f8f243b0 3054 else
3ddb9247 3055 t_remotes->set_capacity(ringsize);
f26bf547 3056 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 3057 t_servfailremotes->set_capacity(ringsize);
f26bf547 3058 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 3059 t_largeanswerremotes->set_capacity(ringsize);
92011b8f 3060
f26bf547 3061 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 3062 t_queryring->set_capacity(ringsize);
f26bf547 3063 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 3064 t_servfailqueryring->set_capacity(ringsize);
92011b8f 3065 }
3ddb9247 3066
f26bf547 3067 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
3ddb9247 3068
bb4bdbaf
BH
3069 PacketID pident;
3070
3071 t_fdm=getMultiplexer();
f3d1d67b 3072 if(!t_id) {
d07bf7ff 3073 if(::arg().mustDo("webserver")) {
30a1aa92 3074 L<<Logger::Warning << "Enabling web server" << endl;
8989097d 3075 try {
1ce57618 3076 new RecursorWebServer(t_fdm);
8989097d
CH
3077 }
3078 catch(PDNSException &e) {
3079 L<<Logger::Error<<"Exception: "<<e.reason<<endl;
3080 exit(99);
3081 }
f3d1d67b 3082 }
83252304 3083 L<<Logger::Error<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
f3d1d67b 3084 }
83252304 3085
49a699c4 3086 t_fdm->addReadFD(g_pipes[t_id].readToThread, handlePipeRequest);
83252304 3087
810ff705 3088 if(g_useOneSocketPerThread) {
2573d4a6
RG
3089 for(deferredAdd_t::const_iterator i = deferredAdds[t_id].cbegin(); i != deferredAdds[t_id].cend(); ++i) {
3090 t_fdm->addReadFD(i->first, i->second);
810ff705
RG
3091 }
3092 }
3093 else {
3094 if(!g_weDistributeQueries || !t_id) { // if we distribute queries, only t_id = 0 listens
0670917b 3095 for(deferredAdd_t::const_iterator i = deferredAdds[0].cbegin(); i != deferredAdds[0].cend(); ++i) {
810ff705
RG
3096 t_fdm->addReadFD(i->first, i->second);
3097 }
3098 }
3099 }
3ddb9247 3100
b0b37121 3101 registerAllStats();
674cf0f6 3102 if(!t_id) {
674cf0f6
BH
3103 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
3104 }
1bc3c142 3105
f7c1d4e3 3106 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3ddb9247 3107
f7c1d4e3 3108 bool listenOnTCP(true);
49a699c4 3109
2c78bd57 3110 time_t last_carbon=0;
3111 time_t carbonInterval=::arg().asNum("carbon-interval");
ac0995bb 3112 counter.store(0); // used to periodically execute certain tasks
f7c1d4e3 3113 for(;;) {
ac0e821b 3114 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3ddb9247 3115
3427fa8a
BH
3116 if(!(counter%500)) {
3117 MT->makeThread(houseKeeping, 0);
f7c1d4e3
BH
3118 }
3119
d2392145 3120 if(!(counter%55)) {
d8f6d49f 3121 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
bb4bdbaf 3122 expired_t expired=t_fdm->getTimeouts(g_now);
3ddb9247 3123
f7c1d4e3 3124 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
cd989c87 3125 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4957a608 3126 if(g_logCommonErrors)
cd989c87 3127 L<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toString() <<endl;
4957a608 3128 t_fdm->removeReadFD(i->first);
f7c1d4e3
BH
3129 }
3130 }
3ddb9247 3131
f7c1d4e3
BH
3132 counter++;
3133
3427fa8a 3134 if(!t_id && statsWanted) {
f7c1d4e3
BH
3135 doStats();
3136 }
3137
3138 Utility::gettimeofday(&g_now, 0);
2c78bd57 3139
3140 if(!t_id && (g_now.tv_sec - last_carbon >= carbonInterval)) {
3141 MT->makeThread(doCarbonDump, 0);
3142 last_carbon = g_now.tv_sec;
3143 }
3144
bb4bdbaf 3145 t_fdm->run(&g_now);
3ea54bf0 3146 // 'run' updates g_now for us
f7c1d4e3 3147
b8ef5c5c 3148 if(!g_weDistributeQueries || !t_id) { // if pdns distributes queries, only tid 0 should do this
5c889cf5 3149 if(listenOnTCP) {
3150 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
3151 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
3152 t_fdm->removeReadFD(*i);
3153 listenOnTCP=false;
3154 }
f7c1d4e3 3155 }
5c889cf5 3156 else {
3157 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
3158 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
3159 t_fdm->addReadFD(*i, handleNewTCPQuestion);
3160 listenOnTCP=true;
3161 }
f7c1d4e3
BH
3162 }
3163 }
3164 }
3165}
3f81d239 3166catch(PDNSException &ae) {
bb4bdbaf
BH
3167 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
3168 return 0;
3169}
3170catch(std::exception &e) {
3171 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
3172 return 0;
3173}
3174catch(...) {
3175 L<<Logger::Error<<"any other exception in main: "<<endl;
3176 return 0;
3177}
3178
51e2144e 3179
3ddb9247 3180int main(int argc, char **argv)
288f4aa9 3181{
dbd23fc2
BH
3182 g_argc = argc;
3183 g_argv = argv;
5e3de507 3184 g_stats.startupTime=time(0);
3e135495 3185 versionSetProduct(ProductRecursor);
8a63d3ce 3186 reportBasicTypes();
0007c2e5 3187 reportOtherTypes();
ea634573 3188
22030c37 3189 int ret = EXIT_SUCCESS;
caa6eefa 3190
288f4aa9 3191 try {
f888311c 3192 ::arg().set("stack-size","stack size per mthread")="200000";
2e3d8a19 3193 ::arg().set("soa-minimum-ttl","Don't change")="0";
2e3d8a19 3194 ::arg().set("no-shuffle","Don't change")="off";
2e3d8a19 3195 ::arg().set("local-port","port to listen on")="53";
32252594 3196 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
fec7dd5a 3197 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
77499b05 3198 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
a6415142 3199 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
c87e1876 3200 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
d3f809bf 3201 ::arg().set("daemon","Operate as a daemon")="no";
191f2e47 3202 ::arg().setSwitch("write-pid","Write a PID file")="yes";
9afa8662 3203 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
b6cfa948 3204 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
22e0810c 3205 ::arg().set("log-common-errors","If we should log rather common errors")="no";
2e3d8a19
BH
3206 ::arg().set("chroot","switch to chroot jail")="";
3207 ::arg().set("setgid","If set, change group id to this gid for more security")="";
3208 ::arg().set("setuid","If set, change user id to this uid for more security")="";
c83ee49d 3209 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
bb4bdbaf 3210 ::arg().set("threads", "Launch this number of threads")="2";
adabfcb9 3211 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
5124de27 3212 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
d07bf7ff 3213 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
479e0976
CH
3214 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
3215 ::arg().set("api-logfile", "Location of the server logfile (used by the REST API)") = "/var/log/pdns.log";
3216 ::arg().set("api-readonly", "Disallow data modification through the REST API when set") = "no";
3217 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
d07bf7ff
PL
3218 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
3219 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
3220 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
69e7f117 3221 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="0.0.0.0/0,::/0";
cc08b5a9 3222 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
e12f8407 3223 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
2c78bd57 3224 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
0ec489bf 3225 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
c038218b 3226 ::arg().set("quiet","Suppress logging of questions and answers")="";
f27e6356 3227 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
2e3d8a19 3228 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
fdbf35ac
BH
3229 ::arg().set("socket-owner","Owner of socket")="";
3230 ::arg().set("socket-group","Group of socket")="";
3231 ::arg().set("socket-mode", "Permissions for socket")="";
3ddb9247 3232
f0f3f0b0 3233 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
2e3d8a19
BH
3234 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
3235 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
d4fb76e9 3236 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
2e3d8a19 3237 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
85c32340 3238 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
2e3d8a19 3239 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
324dc148 3240 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
979edd70 3241 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
2e3d8a19 3242 ::arg().set("hint-file", "If set, load root hints from this file")="";
b45eb27c 3243 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
a9af3782 3244 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
c3e753c7 3245 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
1051f8a9 3246 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
927c12b0 3247 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
1051f8a9 3248 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
7f7b8d55 3249 ::arg().set("server-id", "Returned when queried for 'server.id' TXT or NSID, defaults to hostname")="";
92011b8f 3250 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
ba1a571d 3251 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
49a699c4 3252 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
2c95fc65 3253 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
51e2144e 3254 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3ddb9247 3255 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4e120339 3256 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
fde296a3 3257 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
0d5f0a9f 3258 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4ef015cd 3259 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
5605c067 3260 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3e61e7f7 3261 ::arg().set("lua-config-file", "More powerful configuration options")="";
644dd1da 3262
5605c067 3263 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
927c12b0
BH
3264 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
3265 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
5605c067 3266 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
ac0b4eb3 3267 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3ea54bf0 3268 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
e498dac1 3269 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
4485aa35 3270 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
08f3f638 3271 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3ddb9247 3272 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
35695d18 3273 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
3274 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
3f975863 3275 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
a16c4536 3276 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
e498dac1 3277 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
4ca2d205 3278 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
e661a20b 3279 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
b3adda56 3280 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
00b8cadc 3281 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
a09a8ce0 3282 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1680";
b33c2462 3283 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1680";
aadceba8 3284 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
173d790e 3285 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
c5950146 3286 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
7c3398aa 3287 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
a09a8ce0 3288
68e6df3c 3289 ::arg().set("include-dir","Include *.conf files from this directory")="";
d67620e4 3290 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
2332f42d 3291
3292 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
2e3d8a19 3293
d705aad9 3294 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
396f126e 3295 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
d705aad9 3296
0735b17e 3297 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
d377bb54 3298 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
0735b17e 3299
8fd25133
RG
3300 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
3301
2e3d8a19 3302 ::arg().setCmd("help","Provide a helpful message");
ba1a571d 3303 ::arg().setCmd("version","Print version string");
d5141417 3304 ::arg().setCmd("config","Output blank configuration");
f27e6356 3305 L.toConsole(Logger::Info);
2e3d8a19 3306 ::arg().laxParse(argc,argv); // do a lax parse
c75a6a9e 3307
2d733c0f
CH
3308 string configname=::arg()["config-dir"]+"/recursor.conf";
3309 if(::arg()["config-name"]!="") {
3310 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
5124de27 3311 s_programname+="-"+::arg()["config-name"];
2d733c0f
CH
3312 }
3313 cleanSlashes(configname);
5124de27 3314
577cf284
BH
3315 if(::arg().mustDo("config")) {
3316 cout<<::arg().configstring()<<endl;
3317 exit(0);
3318 }
3319
3ddb9247 3320 if(!::arg().file(configname.c_str()))
c75a6a9e
BH
3321 L<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
3322
2e3d8a19 3323 ::arg().parse(argc,argv);
c836dc19 3324
f0f3f0b0
PL
3325 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() && !::arg().mustDo("api-readonly") ) {
3326 L<<Logger::Error<<"Using chroot and a writable API is not possible"<<endl;
3327 exit(EXIT_FAILURE);
3328 }
3329
3330 if (::arg()["socket-dir"].empty()) {
3331 if (::arg()["chroot"].empty())
3332 ::arg().set("socket-dir") = LOCALSTATEDIR;
3333 else
3334 ::arg().set("socket-dir") = "/";
3335 }
3336
2e3d8a19 3337 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
562588a3 3338
61d74169 3339 if(::arg().asNum("threads")==1)
3340 ::arg().set("pdns-distributes-queries")="no";
3341
2e3d8a19 3342 if(::arg().mustDo("help")) {
ff5ba4f9
WA
3343 cout<<"syntax:"<<endl<<endl;
3344 cout<<::arg().helpstring(::arg()["help"])<<endl;
3345 exit(0);
b636533b 3346 }
5e3de507 3347 if(::arg().mustDo("version")) {
ba1a571d 3348 showProductVersion();
3613a51c 3349 showBuildConfiguration();
67076869 3350 exit(0);
5e3de507 3351 }
b636533b 3352
34162f8f 3353 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
f48d7b65 3354
34162f8f
CH
3355 if (logUrgency < Logger::Error)
3356 logUrgency = Logger::Error;
f48d7b65 3357 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
3358 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
3359 }
34162f8f
CH
3360 L.setLoglevel(logUrgency);
3361 L.toConsole(logUrgency);
3362
f7c1d4e3 3363 serviceMain(argc, argv);
288f4aa9 3364 }
3f81d239 3365 catch(PDNSException &ae) {
c836dc19 3366 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
22030c37 3367 ret=EXIT_FAILURE;
288f4aa9 3368 }
fdbf35ac 3369 catch(std::exception &e) {
c836dc19 3370 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
22030c37 3371 ret=EXIT_FAILURE;
288f4aa9
BH
3372 }
3373 catch(...) {
c836dc19 3374 L<<Logger::Error<<"any other exception in main: "<<endl;
22030c37 3375 ret=EXIT_FAILURE;
288f4aa9 3376 }
3ddb9247 3377
22030c37 3378 return ret;
288f4aa9 3379}