]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/pdns_recursor.cc
Merge pull request #6117 from rgacogne/ddist-dns-over-tls
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
CommitLineData
288f4aa9 1/*
6edbf68a
PL
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
870a0fe4
AT
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
3e61e7f7 25
76473b92
KM
26#include <netdb.h>
27#include <sys/stat.h>
28#include <unistd.h>
fa8fd4d2 29
2470b36e 30#include "ws-recursor.hh"
49a699c4 31#include <pthread.h>
3ea54bf0 32#include "recpacketcache.hh"
3ddb9247 33#include "utility.hh"
51e2144e 34#include "dns_random.hh"
d1b28475
KM
35#ifdef HAVE_LIBSODIUM
36#include <sodium.h>
37#endif
3afde9b2 38#include "opensslsigners.hh"
288f4aa9
BH
39#include <iostream>
40#include <errno.h>
81859ba5 41#include <boost/static_assert.hpp>
288f4aa9
BH
42#include <map>
43#include <set>
97bb160b 44#include "recursor_cache.hh"
38c9ceaa 45#include "cachecleaner.hh"
288f4aa9 46#include <stdio.h>
c75a6a9e 47#include <signal.h>
288f4aa9 48#include <stdlib.h>
bb4bdbaf 49#include "misc.hh"
288f4aa9
BH
50#include "mtasker.hh"
51#include <utility>
288f4aa9
BH
52#include "arguments.hh"
53#include "syncres.hh"
88def049
BH
54#include <fcntl.h>
55#include <fstream>
3e61e7f7 56#include "sortlist.hh"
5c633640
BH
57#include "sstuff.hh"
58#include <boost/tuple/tuple.hpp>
59#include <boost/tuple/tuple_comparison.hpp>
72df400f 60#include <boost/shared_array.hpp>
7f1fa77d 61#include <boost/function.hpp>
5605c067 62#include <boost/algorithm/string.hpp>
8f7473d7 63#ifdef MALLOC_TRACE
64#include "malloctrace.hh"
65#endif
40a3dd64 66#include <netinet/tcp.h>
ea634573
BH
67#include "dnsparser.hh"
68#include "dnswriter.hh"
69#include "dnsrecords.hh"
f814d7c8 70#include "zoneparser-tng.hh"
1d5b3ce6 71#include "rec_channel.hh"
aaacf7f2 72#include "logger.hh"
c8ddb7c2 73#include "iputils.hh"
09e6702a 74#include "mplexer.hh"
c038218b 75#include "config.h"
808c5ef7 76#include "lua-recursor4.hh"
ba1a571d 77#include "version.hh"
79332bff 78#include "responsestats.hh"
d67620e4 79#include "secpoll-recursor.hh"
c5c066bf 80#include "dnsname.hh"
644dd1da 81#include "filterpo.hh"
82#include "rpzloader.hh"
b3f0ed10 83#include "validate-recursor.hh"
f3c18728 84#include "rec-lua-conf.hh"
5c3b5e7f 85#include "ednsoptions.hh"
85c7ca75 86#include "gettime.hh"
f3c18728 87
d9d3f9c1 88#include "rec-protobuf.hh"
d705aad9 89#include "rec-snmp.hh"
aa7929a3 90
6b6720de
PL
91#ifdef HAVE_SYSTEMD
92#include <systemd/sd-daemon.h>
93#endif
94
d187038c
RG
95#include "namespaces.hh"
96
97typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
98
f26bf547
RG
99static thread_local std::shared_ptr<RecursorLua4> t_pdl;
100static thread_local unsigned int t_id;
101static thread_local std::shared_ptr<Regex> t_traceRegex;
102static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
103
104thread_local std::unique_ptr<MT_t> MT; // the big MTasker
105thread_local std::unique_ptr<MemRecursorCache> t_RC;
106thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
3337c2f7 107thread_local FDMultiplexer* t_fdm{nullptr};
f26bf547
RG
108thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes;
109thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring;
110thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
aa7929a3 111#ifdef HAVE_PROTOBUF
f26bf547 112thread_local std::unique_ptr<boost::uuids::random_generator> t_uuidGenerator;
aa7929a3 113#endif
d187038c 114__thread struct timeval g_now; // timestamp, updated (too) frequently
d7dae798
BH
115
116// for communicating with our threads
49a699c4
BH
117struct ThreadPipeSet
118{
119 int writeToThread;
120 int readToThread;
121 int writeFromThread;
122 int readFromThread;
123};
810ff705 124
d187038c
RG
125typedef vector<int> tcpListenSockets_t;
126typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
127typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
3ea54bf0 128
d187038c
RG
129static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
130static vector<ThreadPipeSet> g_pipes; // effectively readonly after startup
131static tcpListenSockets_t g_tcpListenSockets; // shared across threads, but this is fine, never written to from a thread. All threads listen on all sockets
132static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
810ff705 133static std::unordered_map<unsigned int, deferredAdd_t> deferredAdds;
d187038c
RG
134static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
135static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
136static AtomicCounter counter;
9065eb05 137static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
f26bf547 138static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
d187038c
RG
139static size_t g_tcpMaxQueriesPerConn;
140static uint64_t g_latencyStatSize;
141static uint32_t g_disthashseed;
142static unsigned int g_maxTCPPerClient;
143static unsigned int g_networkTimeoutMsec;
144static unsigned int g_maxMThreads;
145static unsigned int g_numWorkerThreads;
146static int g_tcpTimeout;
147static uint16_t g_udpTruncationThreshold;
148static std::atomic<bool> statsWanted;
149static std::atomic<bool> g_quiet;
150static bool g_logCommonErrors;
151static bool g_anyToTcp;
d187038c 152static bool g_weDistributeQueries; // if true, only 1 thread listens on the incoming query sockets
810ff705
RG
153static bool g_reusePort{false};
154static bool g_useOneSocketPerThread;
00b8cadc 155static bool g_gettagNeedsEDNSOptions{false};
0ec489bf 156static time_t g_statisticsInterval;
9065eb05 157static bool g_useIncomingECS;
a6f7f5fe 158std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
49a699c4 159
d187038c
RG
160RecursorControlChannel s_rcc; // only active in thread 0
161RecursorStats g_stats;
2d733c0f 162string s_programname="pdns_recursor";
d187038c 163string s_pidfname;
c1c29961 164bool g_lowercaseOutgoing;
d187038c
RG
165unsigned int g_numThreads;
166uint16_t g_outgoingEDNSBufsize;
98d36505 167bool g_logRPZChanges{false};
c3828c03 168
12cd44ee 169#define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
2fe3354d 170#define LOCAL_NETS_INVERSE "!127.0.0.0/8, !10.0.0.0/8, !100.64.0.0/10, !169.254.0.0/16, !192.168.0.0/16, !172.16.0.0/12, !::1/128, !fc00::/7, !fe80::/10"
12cd44ee 171// Bad Nets taken from both:
3ddb9247 172// http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
12cd44ee 173// and
174// http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
175// where such a network may not be considered a valid destination
176#define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
177#define DONT_QUERY LOCAL_NETS ", " BAD_NETS
49a699c4 178
d7dae798 179//! used to send information to a newborn mthread
ea634573 180struct DNSComboWriter {
27c0050c 181 DNSComboWriter(const char* data, uint16_t len, const struct timeval& now) : d_mdp(true, data, len), d_now(now),
232f0877 182 d_tcp(false), d_socket(-1)
ea634573
BH
183 {}
184 MOADNSParser d_mdp;
00c9b8c1 185 void setRemote(const ComboAddress* sa)
ea634573 186 {
37d3f960 187 d_remote=*sa;
ea634573
BH
188 }
189
b71b60ee 190 void setLocal(const ComboAddress& sa)
191 {
192 d_local=sa;
193 }
194
195
ea634573
BH
196 void setSocket(int sock)
197 {
198 d_socket=sock;
199 }
a1754c6a
BH
200
201 string getRemote() const
202 {
37d3f960 203 return d_remote.toString();
a1754c6a
BH
204 }
205
c9e9e5e0 206 struct timeval d_now;
b71b60ee 207 ComboAddress d_remote, d_local;
aa7929a3
RG
208#ifdef HAVE_PROTOBUF
209 boost::uuids::uuid d_uuid;
67e31ebe 210 string d_requestorId;
590388d2 211 string d_deviceId;
aa7929a3 212#endif
b40562da
RG
213 EDNSSubnetOpts d_ednssubnet;
214 bool d_ecsFound{false};
215 bool d_ecsParsed{false};
ea634573
BH
216 bool d_tcp;
217 int d_socket;
b673817a 218 unsigned int d_tag{0};
e9f63d47 219 uint32_t d_qhash{0};
49a3500d 220 string d_query;
cd989c87 221 shared_ptr<TCPConnection> d_tcpConnection;
e8340d27 222 vector<pair<uint16_t, string> > d_ednsOpts;
02b47f43 223 std::vector<std::string> d_policyTags;
5fd2577f 224 LuaContext::LuaObject d_data;
ea634573
BH
225};
226
06857845
RG
227MT_t* getMT()
228{
229 return MT ? MT.get() : nullptr;
230}
ea634573 231
288f4aa9
BH
232ArgvMap &arg()
233{
234 static ArgvMap theArg;
235 return theArg;
236}
4ef015cd 237
b4015453
RG
238unsigned int getRecursorThreadId()
239{
240 return t_id;
241}
09e6702a 242
30ee601a
RG
243int getMTaskerTID()
244{
245 return MT->getTid();
246}
247
d187038c 248static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 249
50c81227 250// -1 is error, 0 is timeout, 1 is success
3ddb9247 251int asendtcp(const string& data, Socket* sock)
5c633640
BH
252{
253 PacketID pident;
254 pident.sock=sock;
255 pident.outMSG=data;
3ddb9247 256
bb4bdbaf 257 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
50c81227 258 string packet;
5c633640 259
5b0ddd18 260 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
23db0a09 261
9170fbaf 262 if(!ret || ret==-1) { // timeout
bb4bdbaf 263 t_fdm->removeWriteFD(sock->getHandle());
5c633640 264 }
50c81227
BH
265 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
266 return -1;
267 }
9170fbaf 268 return ret;
5c633640
BH
269}
270
d187038c 271static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 272
9170fbaf 273// -1 is error, 0 is timeout, 1 is success
a683e8bd 274int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
288f4aa9 275{
50c81227 276 data.clear();
5c633640
BH
277 PacketID pident;
278 pident.sock=sock;
279 pident.inNeeded=len;
825fa717 280 pident.inIncompleteOkay=incompleteOkay;
bb4bdbaf 281 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
5c633640 282
bb4bdbaf 283 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
9170fbaf 284 if(!ret || ret==-1) { // timeout
bb4bdbaf 285 t_fdm->removeReadFD(sock->getHandle());
288f4aa9 286 }
50c81227
BH
287 else if(data.empty()) {// error, EOF or other
288 return -1;
289 }
290
9170fbaf 291 return ret;
288f4aa9
BH
292}
293
d187038c 294static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
4465e941 295{
fba1e944 296 PacketID pident=*any_cast<PacketID>(&var);
4465e941 297 char resp[512];
a683e8bd 298 ssize_t ret=recv(fd, resp, sizeof(resp), 0);
4465e941 299 t_fdm->removeReadFD(fd);
300 if(ret >= 0) {
a683e8bd 301 string data(resp, (size_t) ret);
fba1e944 302 MT->sendEvent(pident, &data);
4465e941 303 }
304 else {
fba1e944 305 string empty;
306 MT->sendEvent(pident, &empty);
307 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
4465e941 308 }
309}
fba1e944 310string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
4465e941 311{
4465e941 312 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
313 s.setNonBlocking();
314 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
315
316 s.bind(local);
317 s.connect(dest);
4465e941 318 s.send(query);
319
320 PacketID pident;
321 pident.sock=&s;
322 pident.type=0;
fba1e944 323 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
4465e941 324
325 string data;
fba1e944 326
4465e941 327 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
fba1e944 328
4465e941 329 if(!ret || ret==-1) { // timeout
4465e941 330 t_fdm->removeReadFD(s.getHandle());
331 }
332 else if(data.empty()) {// error, EOF or other
fba1e944 333 // we could special case this
4465e941 334 return data;
335 }
4465e941 336 return data;
337}
338
d7dae798 339//! pick a random query local address
1652a63e 340ComboAddress getQueryLocalAddress(int family, uint16_t port)
5a38281c 341{
1652a63e 342 ComboAddress ret;
5a38281c 343 if(family==AF_INET) {
3ddb9247 344 if(g_localQueryAddresses4.empty())
1652a63e 345 ret = g_local4;
3ddb9247 346 else
1652a63e
BH
347 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
348 ret.sin4.sin_port = htons(port);
5a38281c
BH
349 }
350 else {
351 if(g_localQueryAddresses6.empty())
1652a63e
BH
352 ret = g_local6;
353 else
354 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
3ddb9247 355
1652a63e 356 ret.sin6.sin6_port = htons(port);
5a38281c 357 }
1652a63e 358 return ret;
5a38281c 359}
4ef015cd 360
d187038c 361static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
09e6702a 362
d187038c 363static void setSocketBuffer(int fd, int optname, uint32_t size)
d7dae798
BH
364{
365 uint32_t psize=0;
366 socklen_t len=sizeof(psize);
3ddb9247 367
d7dae798
BH
368 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
369 L<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
3ddb9247 370 return;
d7dae798
BH
371 }
372
373 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
c057bfaa 374 L<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
d7dae798
BH
375}
376
377
378static void setSocketReceiveBuffer(int fd, uint32_t size)
379{
380 setSocketBuffer(fd, SO_RCVBUF, size);
381}
382
383static void setSocketSendBuffer(int fd, uint32_t size)
384{
385 setSocketBuffer(fd, SO_SNDBUF, size);
386}
387
388
4ef015cd
BH
389// you can ask this class for a UDP socket to send a query from
390// this socket is not yours, don't even think about deleting it
391// but after you call 'returnSocket' on it, don't assume anything anymore
392class UDPClientSocks
393{
4ef015cd 394 unsigned int d_numsocks;
4ef015cd 395public:
e2642526 396 UDPClientSocks() : d_numsocks(0)
4ef015cd
BH
397 {
398 }
399
996c89cc 400 typedef set<int> socks_t;
4ef015cd
BH
401 socks_t d_socks;
402
2ee280cf 403 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
d8f6d49f 404 int getSocket(const ComboAddress& toaddr, int* fd)
4ef015cd 405 {
d8f6d49f
BH
406 *fd=makeClientSocket(toaddr.sin4.sin_family);
407 if(*fd < 0) // temporary error - receive exception otherwise
2ee280cf 408 return -2;
d8f6d49f
BH
409
410 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
411 int err = errno;
41ff43f8 412 // returnSocket(*fd);
a7b68ae7
RG
413 try {
414 closesocket(*fd);
415 }
416 catch(const PDNSException& e) {
417 L<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
418 }
419
d8f6d49f 420 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
4957a608 421 return -2;
998a4334 422 return -1;
d8f6d49f 423 }
998a4334 424
d8f6d49f 425 d_socks.insert(*fd);
998a4334 426 d_numsocks++;
d8f6d49f 427 return 0;
4ef015cd
BH
428 }
429
095c3045
BH
430 void returnSocket(int fd)
431 {
432 socks_t::iterator i=d_socks.find(fd);
34801ab1 433 if(i==d_socks.end()) {
335da0ba 434 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd)+") not in the pool");
34801ab1 435 }
bb4bdbaf 436 returnSocketLocked(i);
095c3045
BH
437 }
438
4ef015cd 439 // return a socket to the pool, or simply erase it
bb4bdbaf 440 void returnSocketLocked(socks_t::iterator& i)
4ef015cd 441 {
600fc20b 442 if(i==d_socks.end()) {
3f81d239 443 throw PDNSException("Trying to return a socket not in the pool");
600fc20b 444 }
80baf329 445 try {
bb4bdbaf 446 t_fdm->removeReadFD(*i);
80baf329
BH
447 }
448 catch(FDMultiplexerException& e) {
bb4bdbaf 449 // we sometimes return a socket that has not yet been assigned to t_fdm
80baf329 450 }
a7b68ae7
RG
451 try {
452 closesocket(*i);
453 }
454 catch(const PDNSException& e) {
455 L<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
456 }
3ddb9247 457
998a4334
BH
458 d_socks.erase(i++);
459 --d_numsocks;
4ef015cd 460 }
d8f6d49f
BH
461
462 // returns -1 for errors which might go away, throws for ones that won't
bb4bdbaf 463 static int makeClientSocket(int family)
d8f6d49f 464 {
a683e8bd 465 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
42c235e5 466
d8f6d49f
BH
467 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
468 return ret;
3ddb9247
PD
469
470 if(ret<0)
335da0ba 471 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
36855b53 472
7eb73ffa 473 // setCloseOnExec(ret); // we're not going to exec
5a38281c 474
d8f6d49f 475 int tries=10;
3aa91c3e 476 ComboAddress sin;
d8f6d49f 477 while(--tries) {
1652a63e 478 uint16_t port;
3ddb9247 479
d8f6d49f 480 if(tries==1) // fall back to kernel 'random'
4957a608 481 port = 0;
1652a63e
BH
482 else
483 port = 1025 + dns_random(64510);
5a38281c 484
3aa91c3e 485 sin=getQueryLocalAddress(family, port); // does htons for us
5a38281c 486
3ddb9247 487 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
4957a608 488 break;
d8f6d49f
BH
489 }
490 if(!tries)
3aa91c3e 491 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
3ddb9247 492
3897b9e1 493 setNonBlocking(ret);
d8f6d49f
BH
494 return ret;
495 }
49a699c4
BH
496};
497
f26bf547 498static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
4ef015cd 499
288f4aa9 500/* these two functions are used by LWRes */
34801ab1 501// -2 is OS error, -1 is error that depends on the remote, > 0 is success
a683e8bd 502int asendto(const char *data, size_t len, int flags,
3ddb9247 503 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
288f4aa9 504{
34801ab1
BH
505
506 PacketID pident;
787e5eab
BH
507 pident.domain = domain;
508 pident.remote = toaddr;
509 pident.type = qtype;
34801ab1
BH
510
511 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
512 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
513
514 for(; chain.first != chain.second; chain.first++) {
515 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
e27e91a8 516 /*
4665c31e
BH
517 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
518 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
4957a608 519 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
e27e91a8 520 */
34801ab1
BH
521 chain.first->key.chain.insert(id); // we can chain
522 *fd=-1; // gets used in waitEvent / sendEvent later on
523 return 1;
524 }
525 }
526
49a699c4 527 int ret=t_udpclientsocks->getSocket(toaddr, fd);
d8f6d49f
BH
528 if(ret < 0)
529 return ret;
34801ab1 530
998a4334
BH
531 pident.fd=*fd;
532 pident.id=id;
3ddb9247 533
bb4bdbaf
BH
534 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
535 ret = send(*fd, data, len, 0);
536
5b0ddd18 537 int tmp = errno;
bb4bdbaf 538
7302ed0a 539 if(ret < 0)
49a699c4 540 t_udpclientsocks->returnSocket(*fd);
bb4bdbaf 541
5b0ddd18 542 errno = tmp; // this is for logging purposes only
7302ed0a 543 return ret;
288f4aa9
BH
544}
545
9170fbaf 546// -1 is error, 0 is timeout, 1 is success
a683e8bd 547int arecvfrom(char *data, size_t len, int flags, const ComboAddress& fromaddr, size_t *d_len,
c5c066bf 548 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
288f4aa9 549{
0d5f0a9f 550 static optional<unsigned int> nearMissLimit;
3ddb9247 551 if(!nearMissLimit)
0d5f0a9f
BH
552 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
553
288f4aa9 554 PacketID pident;
4ef015cd 555 pident.fd=fd;
288f4aa9 556 pident.id=id;
0d5f0a9f 557 pident.domain=domain;
787e5eab 558 pident.type = qtype;
996c89cc 559 pident.remote=fromaddr;
b636533b 560
288f4aa9 561 string packet;
5b0ddd18 562 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
34801ab1 563
9170fbaf 564 if(ret > 0) {
996c89cc 565 if(packet.empty()) // means "error"
3ddb9247 566 return -1;
998a4334 567
a683e8bd 568 *d_len=packet.size();
9170fbaf 569 memcpy(data,packet.c_str(),min(len,*d_len));
0d5f0a9f 570 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
996c89cc 571 L<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
0d5f0a9f 572 g_stats.spoofCount++;
35ce8576
BH
573 return -1;
574 }
288f4aa9 575 }
09e6702a 576 else {
34801ab1 577 if(fd >= 0)
49a699c4 578 t_udpclientsocks->returnSocket(fd);
09e6702a 579 }
9170fbaf 580 return ret;
288f4aa9
BH
581}
582
88def049
BH
583static void writePid(void)
584{
191f2e47 585 if(!::arg().mustDo("write-pid"))
586 return;
18e7758c 587 ofstream of(s_pidfname.c_str(), std::ios_base::app);
88def049 588 if(of)
705f31ae 589 of<< Utility::getpid() <<endl;
88def049 590 else
c057bfaa 591 L<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
88def049
BH
592}
593
cd989c87 594TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : d_remote(addr), d_fd(fd)
3ddb9247
PD
595{
596 ++s_currentConnections;
cd989c87 597 (*t_tcpClientCounts)[d_remote]++;
0e408828 598}
cd989c87
BH
599
600TCPConnection::~TCPConnection()
0e408828 601{
a7b68ae7
RG
602 try {
603 if(closesocket(d_fd) < 0)
604 L<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
605 }
606 catch(const PDNSException& e) {
607 L<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
608 }
609
3ddb9247 610 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
cd989c87 611 t_tcpClientCounts->erase(d_remote);
1bc9e6bd 612 --s_currentConnections;
0e408828 613}
0e9d9ce2 614
3ddb9247 615AtomicCounter TCPConnection::s_currentConnections;
d187038c
RG
616
617static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
6dcd28c3 618
92011b8f 619// the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
d187038c 620static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
2cc13433 621{
92011b8f 622 if(packetsize > 1000 && t_largeanswerremotes)
623 t_largeanswerremotes->push_back(remote);
2cc13433
BH
624 switch(res) {
625 case RCode::ServFail:
92011b8f 626 if(t_servfailremotes) {
627 t_servfailremotes->push_back(remote);
5af86fdc 628 if(query && t_servfailqueryring) // packet cache
92011b8f 629 t_servfailqueryring->push_back(make_pair(*query, qtype));
630 }
2cc13433
BH
631 g_stats.servFails++;
632 break;
633 case RCode::NXDomain:
634 g_stats.nxDomains++;
635 break;
636 case RCode::NoError:
637 g_stats.noErrors++;
638 break;
639 }
640}
641
a903b39c 642static string makeLoginfo(DNSComboWriter* dc)
643try
644{
5ad5bb7d 645 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->d_remote.toString())+")";
a903b39c 646}
647catch(...)
648{
649 return "Exception making error message for exception";
650}
651
aa7929a3 652#ifdef HAVE_PROTOBUF
590388d2 653static void protobufLogQuery(const std::shared_ptr<RemoteLogger>& logger, uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId)
aa7929a3 654{
e1c8a4bb
RG
655 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
656 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
657 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
a94bc5d7 658 message.setEDNSSubnet(ednssubnet, ednssubnet.isIpv4() ? maskV4 : maskV6);
67e31ebe 659 message.setRequestorId(requestorId);
590388d2 660 message.setDeviceId(deviceId);
02b47f43 661
02b47f43 662 if (!policyTags.empty()) {
d9d3f9c1 663 message.setPolicyTags(policyTags);
02b47f43 664 }
aa7929a3 665
d9d3f9c1 666// cerr <<message.toDebugString()<<endl;
aa7929a3 667 std::string str;
d9d3f9c1 668 message.serialize(str);
aa7929a3 669 logger->queueData(str);
aa7929a3
RG
670}
671
d9d3f9c1 672static void protobufLogResponse(const std::shared_ptr<RemoteLogger>& logger, const RecProtoBufMessage& message)
aa7929a3 673{
d9d3f9c1 674// cerr <<message.toDebugString()<<endl;
aa7929a3 675 std::string str;
d9d3f9c1 676 message.serialize(str);
aa7929a3 677 logger->queueData(str);
aa7929a3
RG
678}
679#endif
680
53508135
PL
681/**
682 * Chases the CNAME provided by the PolicyCustom RPZ policy.
683 *
684 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
685 * @param qtype: The QType of the original query
686 * @param sr: A SyncRes
687 * @param res: An integer that will contain the RCODE of the lookup we do
688 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
689 */
d187038c 690static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
53508135
PL
691{
692 if (spoofed.d_type == QType::CNAME) {
30ee601a
RG
693 bool oldWantsRPZ = sr.getWantsRPZ();
694 sr.setWantsRPZ(false);
53508135
PL
695 vector<DNSRecord> ans;
696 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, 1, ans);
697 for (const auto& rec : ans) {
698 if(rec.d_place == DNSResourceRecord::ANSWER) {
699 ret.push_back(rec);
700 }
701 }
702 // Reset the RPZ state of the SyncRes
30ee601a 703 sr.setWantsRPZ(oldWantsRPZ);
53508135
PL
704 }
705}
706
97c6d7e5
RG
707static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, const uint16_t maxAnswerSize)
708{
709 pw.startRecord(rec.d_name, rec.d_type, rec.d_ttl, rec.d_class, rec.d_place);
710
711 if(rec.d_type != QType::OPT) // their TTL ain't real
712 minTTL = min(minTTL, rec.d_ttl);
713
714 rec.d_content->toPacket(pw);
715 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
716 pw.rollback();
717 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
718 pw.getHeader()->tc=1;
719 pw.truncate();
720 }
721 return false;
722 }
723
724 return true;
725}
726
d187038c 727static void startDoResolve(void *p)
288f4aa9 728{
7b1469bb 729 DNSComboWriter* dc=(DNSComboWriter *)p;
288f4aa9 730 try {
5af86fdc
RG
731 if (t_queryring)
732 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
92011b8f 733
32015748 734 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
7f7b8d55 735 EDNSOpts edo;
8e079f3a 736 bool haveEDNS=false;
737 if(getEDNSOpts(dc->d_mdp, &edo)) {
32015748
RG
738 if(!dc->d_tcp) {
739 /* rfc6891 6.2.3:
740 "Values lower than 512 MUST be treated as equal to 512."
741 */
742 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
743 }
e8340d27 744 dc->d_ednsOpts = edo.d_options;
8e079f3a 745 haveEDNS=true;
b40562da
RG
746
747 if (g_useIncomingECS && !dc->d_ecsParsed) {
748 for (const auto& o : edo.d_options) {
749 if (o.first == EDNSOptionCode::ECS) {
750 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
751 break;
752 }
753 }
754 }
10321a98 755 }
b40562da
RG
756 /* perhaps there was no EDNS or no ECS but by now we looked */
757 dc->d_ecsParsed = true;
e325f20c 758 vector<DNSRecord> ret;
ea634573 759 vector<uint8_t> packet;
b23b8614 760
ad42489c 761 auto luaconfsLocal = g_luaconfs.getLocal();
b8470add
PL
762 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
763 bool wantsRPZ(true);
d9d3f9c1 764 RecProtoBufMessage pbMessage(RecProtoBufMessage::Response);
aa7929a3 765#ifdef HAVE_PROTOBUF
d9d3f9c1 766 if (luaconfsLocal->protobufServer) {
e1c8a4bb
RG
767 Netmask requestorNM(dc->d_remote, dc->d_remote.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
768 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
769 pbMessage.update(dc->d_uuid, &requestor, &dc->d_local, dc->d_tcp, dc->d_mdp.d_header.id);
b40562da 770 pbMessage.setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
d9d3f9c1
RG
771 pbMessage.setQuestion(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
772 }
773#endif /* HAVE_PROTOBUF */
ad42489c 774
3ddb9247 775 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
ea634573
BH
776
777 pw.getHeader()->aa=0;
778 pw.getHeader()->ra=1;
c154c8a4 779 pw.getHeader()->qr=1;
bb4bdbaf 780 pw.getHeader()->tc=0;
ea634573 781 pw.getHeader()->id=dc->d_mdp.d_header.id;
10321a98 782 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
57769f13 783 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
ea634573 784
904d3219
PD
785 uint32_t minTTL=std::numeric_limits<uint32_t>::max();
786
787 SyncRes sr(dc->d_now);
0c43f455 788
2e921ec6 789 bool DNSSECOK=false;
3457a2a0 790 if(t_pdl) {
f26bf547 791 sr.setLuaEngine(t_pdl);
3457a2a0 792 }
9eec8c98 793 if(g_dnssecmode != DNSSECMode::Off) {
30ee601a 794 sr.setDoDNSSEC(true);
9eec8c98
PL
795
796 // Does the requestor want DNSSEC records?
797 if(edo.d_Z & EDNSOpts::DNSSECOK) {
798 DNSSECOK=true;
799 g_stats.dnssecQueries++;
800 }
801 } else {
802 // Ignore the client-set CD flag
803 pw.getHeader()->cd=0;
5b9853c9 804 }
0c43f455
RG
805 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
806
4898a348 807#ifdef HAVE_PROTOBUF
30ee601a 808 sr.setInitialRequestId(dc->d_uuid);
4898a348 809#endif
0c43f455 810
2fe3354d 811 sr.setQuerySource(dc->d_remote, g_useIncomingECS && !dc->d_ednssubnet.source.empty() ? boost::optional<const EDNSSubnetOpts&>(dc->d_ednssubnet) : boost::none);
57769f13 812
904d3219
PD
813 bool tracedQuery=false; // we could consider letting Lua know about this too
814 bool variableAnswer = false;
9fc36e90 815 bool shouldNotValidate = false;
904d3219 816
ef3b6cd7
RG
817 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
818 int res = RCode::NoError;
1f1ca368 819 DNSFilterEngine::Policy appliedPolicy;
39ec5d29 820 DNSRecord spoofed;
6e505c5e
RG
821 RecursorLua4::DNSQuestion dq(dc->d_remote, dc->d_local, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ);
822 dq.ednsFlags = &edo.d_Z;
823 dq.ednsOptions = &dc->d_ednsOpts;
824 dq.tag = dc->d_tag;
825 dq.discardedPolicies = &sr.d_discardedPolicies;
826 dq.policyTags = &dc->d_policyTags;
827 dq.appliedPolicy = &appliedPolicy;
828 dq.currentRecords = &ret;
829 dq.dh = &dc->d_mdp.d_header;
05c74122 830 dq.data = dc->d_data;
67e31ebe
RG
831#ifdef HAVE_PROTOBUF
832 dq.requestorId = dc->d_requestorId;
590388d2 833 dq.deviceId = dc->d_deviceId;
67e31ebe 834#endif
ba21fcfe 835
e661a20b 836 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
56b4d21b
PD
837 pw.getHeader()->tc = 1;
838 res = 0;
839 variableAnswer = true;
e661a20b
PD
840 goto sendit;
841 }
842
f26bf547 843 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
77499b05
BH
844 sr.setLogMode(SyncRes::Store);
845 tracedQuery=true;
846 }
3ddb9247 847
8f7473d7 848
976ec823 849 if(!g_quiet || tracedQuery) {
461df9d2 850 L<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
976ec823 851 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
b40562da
RG
852 if(!dc->d_ednssubnet.source.empty()) {
853 L<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
6e986f5e 854 }
976ec823 855 L<<endl;
856 }
c75a6a9e 857
fededf47 858 sr.setId(MT->getTid());
67828389 859 if(!dc->d_mdp.d_header.rd)
c836dc19
BH
860 sr.setCacheOnly();
861
f26bf547
RG
862 if (t_pdl) {
863 t_pdl->prerpz(dq, res);
0a273054
RG
864 }
865
db486de5 866 // Check if the query has a policy attached to it
0a273054 867 if (wantsRPZ) {
1f1ca368 868 appliedPolicy = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_remote, sr.d_discardedPolicies);
0a273054 869 }
644dd1da 870
54be222b 871 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
f26bf547 872 if(!t_pdl || !t_pdl->preresolve(dq, res)) {
b8470add 873
30ee601a 874 sr.setWantsRPZ(wantsRPZ);
b8470add
PL
875 if(wantsRPZ) {
876 switch(appliedPolicy.d_kind) {
877 case DNSFilterEngine::PolicyKind::NoAction:
878 break;
879 case DNSFilterEngine::PolicyKind::Drop:
880 g_stats.policyDrops++;
7a25883a 881 g_stats.policyResults[appliedPolicy.d_kind]++;
b8470add
PL
882 delete dc;
883 dc=0;
884 return;
885 case DNSFilterEngine::PolicyKind::NXDOMAIN:
886 g_stats.policyResults[appliedPolicy.d_kind]++;
887 res=RCode::NXDomain;
888 goto haveAnswer;
889 case DNSFilterEngine::PolicyKind::NODATA:
890 g_stats.policyResults[appliedPolicy.d_kind]++;
891 res=RCode::NoError;
db486de5 892 goto haveAnswer;
b8470add
PL
893 case DNSFilterEngine::PolicyKind::Custom:
894 g_stats.policyResults[appliedPolicy.d_kind]++;
895 res=RCode::NoError;
a9e029ee 896 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
b8470add 897 ret.push_back(spoofed);
53508135 898 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
b8470add
PL
899 goto haveAnswer;
900 case DNSFilterEngine::PolicyKind::Truncate:
901 if(!dc->d_tcp) {
902 g_stats.policyResults[appliedPolicy.d_kind]++;
903 res=RCode::NoError;
904 pw.getHeader()->tc=1;
905 goto haveAnswer;
906 }
907 break;
908 }
db486de5
PL
909 }
910
b8470add 911 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
44971ca0
PD
912 try {
913 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
9fc36e90 914 shouldNotValidate = sr.wasOutOfBand();
44971ca0
PD
915 }
916 catch(ImmediateServFailException &e) {
854d44e3 917 if(g_logCommonErrors)
918 L<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
44971ca0
PD
919 res = RCode::ServFail;
920 }
4485aa35 921
1921a4c2
RG
922 dq.validationState = sr.getValidationState();
923
b8470add
PL
924 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
925 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
926 appliedPolicy = sr.d_appliedPolicy;
927 g_stats.policyResults[appliedPolicy.d_kind]++;
928 switch(appliedPolicy.d_kind) {
929 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
930 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
931 case DNSFilterEngine::PolicyKind::Drop:
932 g_stats.policyDrops++;
933 delete dc;
934 dc=0;
935 return;
936 case DNSFilterEngine::PolicyKind::NXDOMAIN:
937 ret.clear();
938 res=RCode::NXDomain;
939 goto haveAnswer;
940
941 case DNSFilterEngine::PolicyKind::NODATA:
942 ret.clear();
943 res=RCode::NoError;
944 goto haveAnswer;
945
946 case DNSFilterEngine::PolicyKind::Truncate:
947 if(!dc->d_tcp) {
948 ret.clear();
949 res=RCode::NoError;
950 pw.getHeader()->tc=1;
951 goto haveAnswer;
952 }
953 break;
954
955 case DNSFilterEngine::PolicyKind::Custom:
956 ret.clear();
957 res=RCode::NoError;
a9e029ee 958 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
b8470add 959 ret.push_back(spoofed);
53508135 960 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
b8470add
PL
961 goto haveAnswer;
962 }
963 }
964
965 if (wantsRPZ) {
1f1ca368 966 appliedPolicy = luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies);
b8470add 967 }
db486de5 968
f26bf547 969 if(t_pdl) {
db486de5
PL
970 if(res == RCode::NoError) {
971 auto i=ret.cbegin();
972 for(; i!= ret.cend(); ++i)
973 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
974 break;
f26bf547 975 if(i == ret.cend() && t_pdl->nodata(dq, res))
3ca4e735
PL
976 shouldNotValidate = true;
977
db486de5 978 }
f26bf547 979 else if(res == RCode::NXDomain && t_pdl->nxdomain(dq, res))
3ca4e735 980 shouldNotValidate = true;
db486de5 981
f26bf547 982 if(t_pdl->postresolve(dq, res))
3ca4e735 983 shouldNotValidate = true;
db486de5
PL
984 }
985
b8470add
PL
986 if (wantsRPZ) { //XXX This block is repeated, see above
987 g_stats.policyResults[appliedPolicy.d_kind]++;
988 switch(appliedPolicy.d_kind) {
989 case DNSFilterEngine::PolicyKind::NoAction:
990 break;
991 case DNSFilterEngine::PolicyKind::Drop:
992 g_stats.policyDrops++;
993 delete dc;
994 dc=0;
995 return;
996 case DNSFilterEngine::PolicyKind::NXDOMAIN:
997 ret.clear();
998 res=RCode::NXDomain;
999 goto haveAnswer;
1000
1001 case DNSFilterEngine::PolicyKind::NODATA:
1002 ret.clear();
1003 res=RCode::NoError;
1004 goto haveAnswer;
1005
1006 case DNSFilterEngine::PolicyKind::Truncate:
1007 if(!dc->d_tcp) {
1008 ret.clear();
1009 res=RCode::NoError;
1010 pw.getHeader()->tc=1;
1011 goto haveAnswer;
1012 }
1013 break;
1014
1015 case DNSFilterEngine::PolicyKind::Custom:
1016 ret.clear();
1017 res=RCode::NoError;
a9e029ee 1018 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
b8470add 1019 ret.push_back(spoofed);
53508135 1020 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
b8470add
PL
1021 goto haveAnswer;
1022 }
644dd1da 1023 }
4485aa35 1024 }
644dd1da 1025 haveAnswer:;
3e8216c8 1026 if(res == PolicyDecision::DROP) {
e9c2ad3a 1027 g_stats.policyDrops++;
ae7e77ad 1028 delete dc;
1029 dc=0;
1030 return;
3ddb9247 1031 }
9cdfab64 1032 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1dc8f4d0 1033 {
85ffbc53
PD
1034 string trace(sr.getTrace());
1035 if(!trace.empty()) {
1036 vector<string> lines;
1037 boost::split(lines, trace, boost::is_any_of("\n"));
1dc8f4d0 1038 for(const string& line : lines) {
85ffbc53
PD
1039 if(!line.empty())
1040 L<<Logger::Warning<< line << endl;
1041 }
1042 }
1043 }
3ddb9247 1044
9cdfab64 1045 if(res == -1) {
0fe1d080
PD
1046 pw.getHeader()->rcode=RCode::ServFail;
1047 // no commit here, because no record
1048 g_stats.servFails++;
1049 }
288f4aa9 1050 else {
ea634573 1051 pw.getHeader()->rcode=res;
92011b8f 1052
f3fe4ae6 1053 // Does the validation mode or query demand validation?
0c43f455 1054 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
b25cae9a 1055 try {
f3fe4ae6 1056 if(sr.doLog()) {
5fc44cd2 1057 L<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<endl;
2e921ec6 1058 }
4d2be65d
RG
1059
1060 auto state = sr.getValidationState();
1061
b25cae9a 1062 if(state == Secure) {
2e921ec6 1063 if(sr.doLog()) {
5fc44cd2 1064 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates correctly"<<endl;
2e921ec6 1065 }
b25cae9a 1066
1067 // Is the query source interested in the value of the ad-bit?
885c8881 1068 if (dc->d_mdp.d_header.ad || DNSSECOK)
b25cae9a 1069 pw.getHeader()->ad=1;
1070 }
1071 else if(state == Insecure) {
f3fe4ae6 1072 if(sr.doLog()) {
5fc44cd2 1073 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates as Insecure"<<endl;
12ce523e 1074 }
b25cae9a 1075
1076 pw.getHeader()->ad=0;
f3fe4ae6 1077 }
b25cae9a 1078 else if(state == Bogus) {
c87e1876 1079 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
5fc44cd2 1080 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates as Bogus"<<endl;
b25cae9a 1081 }
1082
1083 // Does the query or validation mode sending out a SERVFAIL on validation errors?
885c8881 1084 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
b25cae9a 1085 if(sr.doLog()) {
5fc44cd2 1086 L<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
b25cae9a 1087 }
1088
1089 pw.getHeader()->rcode=RCode::ServFail;
1090 goto sendit;
1091 } else {
1092 if(sr.doLog()) {
5fc44cd2 1093 L<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
b25cae9a 1094 }
1095 }
1096 }
1097 }
1098 catch(ImmediateServFailException &e) {
1099 if(g_logCommonErrors)
5fc44cd2 1100 L<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
b25cae9a 1101 pw.getHeader()->rcode=RCode::ServFail;
1102 goto sendit;
f3fe4ae6 1103 }
b3f0ed10 1104 }
1105
c154c8a4 1106 if(ret.size()) {
92476c8b 1107 orderAndShuffle(ret);
ad42489c 1108 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_remote)) {
20d84f77 1109 stable_sort(ret.begin(), ret.end(), *sl);
3e61e7f7 1110 variableAnswer=true;
1111 }
8e079f3a 1112 }
0afa32d4
RG
1113
1114 bool needCommit = false;
8e079f3a 1115 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
3e80ebce
KM
1116 if( ! DNSSECOK &&
1117 ( i->d_type == QType::NSEC3 ||
1118 (
1119 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1120 (
1121 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1122 i->d_place != DNSResourceRecord::ANSWER
1123 )
1124 )
1125 )
1126 ) {
2e921ec6 1127 continue;
3e80ebce
KM
1128 }
1129
97c6d7e5
RG
1130 if (!addRecordToPacket(pw, *i, minTTL, maxanswersize)) {
1131 needCommit = false;
1132 break;
1133 }
1134 needCommit = true;
1135
aa7929a3 1136#ifdef HAVE_PROTOBUF
d9d3f9c1
RG
1137 if(luaconfsLocal->protobufServer && (i->d_type == QType::A || i->d_type == QType::AAAA || i->d_type == QType::CNAME)) {
1138 pbMessage.addRR(*i);
aa7929a3
RG
1139 }
1140#endif
ea634573 1141 }
0afa32d4 1142 if(needCommit)
8e079f3a 1143 pw.commit();
288f4aa9 1144 }
10321a98 1145 sendit:;
b3f0ed10 1146
97c6d7e5
RG
1147 if (haveEDNS) {
1148 /* we try to add the EDNS OPT RR even for truncated answers,
1149 as rfc6891 states:
1150 "The minimal response MUST be the DNS header, question section, and an
1151 OPT record. This MUST also occur when a truncated response (using
1152 the DNS header's TC bit) is returned."
1153 */
1154 if (addRecordToPacket(pw, makeOpt(edo.d_packetsize, 0, edo.d_Z), minTTL, maxanswersize)) {
1155 pw.commit();
1156 }
1157 }
1158
79332bff 1159 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
92011b8f 1160 updateResponseStats(res, dc->d_remote, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
aa7929a3 1161#ifdef HAVE_PROTOBUF
b790ef3d 1162 if (luaconfsLocal->protobufServer && (!luaconfsLocal->protobufTaggedOnly || (appliedPolicy.d_name && !appliedPolicy.d_name->empty()) || !dc->d_policyTags.empty())) {
d9d3f9c1
RG
1163 pbMessage.setBytes(packet.size());
1164 pbMessage.setResponseCode(pw.getHeader()->rcode);
0a273054
RG
1165 if (appliedPolicy.d_name) {
1166 pbMessage.setAppliedPolicy(*appliedPolicy.d_name);
f3da83fe 1167 pbMessage.setAppliedPolicyType(appliedPolicy.d_type);
0a273054 1168 }
d9d3f9c1 1169 pbMessage.setPolicyTags(dc->d_policyTags);
58307a85 1170 pbMessage.setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
67e31ebe 1171 pbMessage.setRequestorId(dq.requestorId);
590388d2 1172 pbMessage.setDeviceId(dq.deviceId);
02b47f43 1173 protobufLogResponse(luaconfsLocal->protobufServer, pbMessage);
aa7929a3
RG
1174 }
1175#endif
ea634573 1176 if(!dc->d_tcp) {
b71b60ee 1177 struct msghdr msgh;
1178 struct iovec iov;
1179 char cbuf[256];
1180 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
2c0af54f
PD
1181 msgh.msg_control=NULL;
1182
cbc03320 1183 if(g_fromtosockets.count(dc->d_socket)) {
fbe2a2e0 1184 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
2c0af54f 1185 }
cbc03320 1186 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
1187 L<<Logger::Warning<<"Sending UDP reply to client "<<dc->d_remote.toStringWithPort()<<" failed with: "<<strerror(errno)<<endl;
3762e821 1188 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
e9f63d47 1189 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
76e2b9e3 1190 string((const char*)&*packet.begin(), packet.size()),
3ddb9247 1191 g_now.tv_sec,
76e2b9e3 1192 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
d9d3f9c1
RG
1193 min(minTTL,SyncRes::s_packetcachettl),
1194 &pbMessage);
1051f8a9 1195 }
3762e821 1196 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
feccc9fc 1197 }
9c495589
BH
1198 else {
1199 char buf[2];
ea634573
BH
1200 buf[0]=packet.size()/256;
1201 buf[1]=packet.size()%256;
feccc9fc 1202
c038218b 1203 Utility::iovec iov[2];
feccc9fc 1204
ea634573
BH
1205 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1206 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
feccc9fc 1207
dd079764 1208 int wret=Utility::writev(dc->d_socket, iov, 2);
0e9d9ce2 1209 bool hadError=true;
feccc9fc 1210
dd079764 1211 if(wret == 0)
18af64a8 1212 L<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
dd079764 1213 else if(wret < 0 )
18af64a8 1214 L<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
dd079764
RG
1215 else if((unsigned int)wret != 2 + packet.size())
1216 L<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
0e9d9ce2 1217 else
18af64a8 1218 hadError=false;
3ddb9247 1219
09e6702a 1220 // update tcp connection status, either by closing or moving to 'BYTE0'
3ddb9247 1221
09e6702a 1222 if(hadError) {
18af64a8 1223 // no need to remove us from FDM, we weren't there
c36bc97a 1224 dc->d_socket = -1;
09e6702a 1225 }
a6ae6414 1226 else {
fde296a3
RG
1227 dc->d_tcpConnection->queriesCount++;
1228 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1229 dc->d_socket = -1;
1230 }
1231 else {
1232 dc->d_tcpConnection->state=TCPConnection::BYTE0;
1233 Utility::gettimeofday(&g_now, 0); // needs to be updated
1234 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection);
1235 t_fdm->setReadTTD(dc->d_socket, g_now, g_tcpTimeout);
1236 }
0e9d9ce2 1237 }
9c495589 1238 }
2c9119cd 1239 float spent=makeFloat(sr.getNow()-dc->d_now);
1d5b3ce6 1240 if(!g_quiet) {
461df9d2 1241 L<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
ea634573 1242 L<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
2c9119cd 1243 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
1244 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
1245
1246 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1247 L<< ", dnssec="<<vStates[sr.getValidationState()];
1248 }
1249
1250 L<<endl;
1251
c75a6a9e 1252 }
b23b8614 1253
3ddb9247 1254 sr.d_outqueries ? t_RC->cacheMisses++ : t_RC->cacheHits++;
2c9119cd 1255
fe213470
BH
1256 if(spent < 0.001)
1257 g_stats.answers0_1++;
1258 else if(spent < 0.010)
1259 g_stats.answers1_10++;
1260 else if(spent < 0.1)
1261 g_stats.answers10_100++;
1262 else if(spent < 1.0)
1263 g_stats.answers100_1000++;
1264 else
1265 g_stats.answersSlow++;
1266
574af7ea 1267 uint64_t newLat=(uint64_t)(spent*1000000);
b841314c 1268 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
08f3f638 1269 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
0a6b1027 1270 // no worries, we do this for packet cache hits elsewhere
19178da9 1271
1272 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
1273 if(ourtime < 1)
1274 g_stats.ourtime0_1++;
1275 else if(ourtime < 2)
1276 g_stats.ourtime1_2++;
1277 else if(ourtime < 4)
1278 g_stats.ourtime2_4++;
1279 else if(ourtime < 8)
1280 g_stats.ourtime4_8++;
1281 else if(ourtime < 16)
1282 g_stats.ourtime8_16++;
1283 else if(ourtime < 32)
1284 g_stats.ourtime16_32++;
1285 else {
1286 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1287 g_stats.ourtimeSlow++;
1288 }
042da1a1 1289 if(ourtime >= 0.0) {
1290 newLat=ourtime*1000; // usec
1291 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
1292 }
c6d04bdc 1293 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
ea634573 1294 delete dc;
c36bc97a 1295 dc=0;
288f4aa9 1296 }
3f81d239 1297 catch(PDNSException &ae) {
a903b39c 1298 L<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
c36bc97a 1299 delete dc;
288f4aa9 1300 }
7b1469bb 1301 catch(MOADNSException& e) {
a903b39c 1302 L<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<e.what()<<endl;
c36bc97a 1303 delete dc;
7b1469bb 1304 }
fdbf35ac 1305 catch(std::exception& e) {
068c7634
PD
1306 L<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
1307
1308 // Luawrapper nests the exception from Lua, so we unnest it here
1309 try {
1310 std::rethrow_if_nested(e);
2010ac95
RG
1311 } catch(const std::exception& ne) {
1312 L<<". Extra info: "<<ne.what();
068c7634
PD
1313 } catch(...) {}
1314
1315 L<<endl;
c36bc97a 1316 delete dc;
c154c8a4 1317 }
288f4aa9 1318 catch(...) {
a903b39c 1319 L<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
288f4aa9 1320 }
3ddb9247 1321
ec6eacbc 1322 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
288f4aa9
BH
1323}
1324
d187038c 1325static void makeControlChannelSocket(int processNum=-1)
1d5b3ce6 1326{
2d733c0f 1327 string sockname=::arg()["socket-dir"]+"/"+s_programname;
677e2a46 1328 if(processNum >= 0)
335da0ba 1329 sockname += "."+std::to_string(processNum);
677e2a46 1330 sockname+=".controlsocket";
41f7a068 1331 s_rcc.listen(sockname);
3ddb9247 1332
387de317
BH
1333 int sockowner = -1;
1334 int sockgroup = -1;
1335
1336 if (!::arg().isEmpty("socket-group"))
1337 sockgroup=::arg().asGid("socket-group");
1338 if (!::arg().isEmpty("socket-owner"))
1339 sockowner=::arg().asUid("socket-owner");
3ddb9247 1340
f838ad8d
BH
1341 if (sockgroup > -1 || sockowner > -1) {
1342 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1343 unixDie("Failed to chown control socket");
1344 }
1345 }
387de317
BH
1346
1347 // do mode change if socket-mode is given
1348 if(!::arg().isEmpty("socket-mode")) {
1349 mode_t sockmode=::arg().asMode("socket-mode");
34c513f9
RG
1350 if(chmod(sockname.c_str(), sockmode) < 0) {
1351 unixDie("Failed to chmod control socket");
1352 }
387de317 1353 }
1d5b3ce6
BH
1354}
1355
00b8cadc 1356static bool getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass, EDNSSubnetOpts* ednssubnet, std::map<uint16_t, EDNSOptionView>* options)
02b47f43 1357{
b40562da 1358 bool found = false;
02b47f43
RG
1359 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
1360 size_t questionLen = question.length();
1361 unsigned int consumed=0;
1362 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1363
1364 size_t pos= sizeof(dnsheader)+consumed+4;
1365 /* at least OPT root label (1), type (2), class (2) and ttl (4) + OPT RR rdlen (2)
1366 = 11 */
1367 if(ntohs(dh->arcount) == 1 && questionLen > pos + 11) { // this code can extract one (1) EDNS Subnet option
1368 /* OPT root label (1) followed by type (2) */
1369 if(question.at(pos)==0 && question.at(pos+1)==0 && question.at(pos+2)==QType::OPT) {
00b8cadc
RG
1370 if (!options) {
1371 char* ecsStart = nullptr;
1372 size_t ecsLen = 0;
1373 int res = getEDNSOption((char*)question.c_str()+pos+9, questionLen - pos - 9, EDNSOptionCode::ECS, &ecsStart, &ecsLen);
1374 if (res == 0 && ecsLen > 4) {
1375 EDNSSubnetOpts eso;
1376 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1377 *ednssubnet=eso;
1378 found = true;
1379 }
1380 }
1381 }
1382 else {
1383 int res = getEDNSOptions((char*)question.c_str()+pos+9, questionLen - pos - 9, *options);
1384 if (res == 0) {
1385 const auto& it = options->find(EDNSOptionCode::ECS);
1386 if (it != options->end() && it->second.content != nullptr && it->second.size > 0) {
1387 EDNSSubnetOpts eso;
1388 if(getEDNSSubnetOptsFromString(it->second.content, it->second.size, &eso)) {
1389 *ednssubnet=eso;
1390 found = true;
1391 }
1392 }
02b47f43
RG
1393 }
1394 }
1395 }
1396 }
b40562da 1397 return found;
02b47f43
RG
1398}
1399
d187038c 1400static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1401{
cd989c87 1402 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
c038218b 1403
879b3f70 1404 if(conn->state==TCPConnection::BYTE0) {
b841314c 1405 ssize_t bytes=recv(conn->getFD(), conn->data, 2, 0);
09e6702a 1406 if(bytes==1)
667f7e60 1407 conn->state=TCPConnection::BYTE1;
3ddb9247 1408 if(bytes==2) {
a0aa4f64 1409 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
667f7e60
BH
1410 conn->bytesread=0;
1411 conn->state=TCPConnection::GETQUESTION;
09e6702a
BH
1412 }
1413 if(!bytes || bytes < 0) {
bb4bdbaf 1414 t_fdm->removeReadFD(fd);
09e6702a
BH
1415 return;
1416 }
1417 }
667f7e60 1418 else if(conn->state==TCPConnection::BYTE1) {
b841314c 1419 ssize_t bytes=recv(conn->getFD(), conn->data+1, 1, 0);
09e6702a 1420 if(bytes==1) {
667f7e60 1421 conn->state=TCPConnection::GETQUESTION;
a0aa4f64 1422 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
667f7e60 1423 conn->bytesread=0;
09e6702a
BH
1424 }
1425 if(!bytes || bytes < 0) {
1426 if(g_logCommonErrors)
cd989c87 1427 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected after first byte"<<endl;
bb4bdbaf 1428 t_fdm->removeReadFD(fd);
09e6702a
BH
1429 return;
1430 }
1431 }
667f7e60 1432 else if(conn->state==TCPConnection::GETQUESTION) {
b841314c 1433 ssize_t bytes=recv(conn->getFD(), conn->data + conn->bytesread, conn->qlen - conn->bytesread, 0);
f9d67b41 1434 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
cd989c87 1435 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected while reading question body"<<endl;
bb4bdbaf 1436 t_fdm->removeReadFD(fd);
09e6702a
BH
1437 return;
1438 }
b841314c 1439 conn->bytesread+=(uint16_t)bytes;
667f7e60 1440 if(conn->bytesread==conn->qlen) {
bb4bdbaf 1441 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
879b3f70 1442
f26bf547 1443 DNSComboWriter* dc=nullptr;
09e6702a 1444 try {
cd989c87 1445 dc=new DNSComboWriter(conn->data, conn->qlen, g_now);
09e6702a
BH
1446 }
1447 catch(MOADNSException &mde) {
3ddb9247 1448 g_stats.clientParseError++;
4957a608 1449 if(g_logCommonErrors)
cd989c87 1450 L<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toString() <<endl;
4957a608 1451 return;
09e6702a 1452 }
cd989c87
BH
1453 dc->d_tcpConnection = conn; // carry the torch
1454 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
09e6702a 1455 dc->d_tcp=true;
cd989c87 1456 dc->setRemote(&conn->d_remote);
a6147cd2 1457 ComboAddress dest;
1458 memset(&dest, 0, sizeof(dest));
1459 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1460 socklen_t len = dest.getSocklen();
1461 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1462 dc->setLocal(dest);
33dcceba
RG
1463 DNSName qname;
1464 uint16_t qtype=0;
1465 uint16_t qclass=0;
1466 bool needECS = false;
67e31ebe 1467 string requestorId;
590388d2 1468 string deviceId;
aa7929a3 1469#ifdef HAVE_PROTOBUF
02b47f43 1470 auto luaconfsLocal = g_luaconfs.getLocal();
33dcceba
RG
1471 if (luaconfsLocal->protobufServer) {
1472 needECS = true;
1473 }
1474#endif
1475
f26bf547 1476 if(needECS || (t_pdl && t_pdl->d_gettag)) {
33dcceba
RG
1477
1478 try {
00b8cadc 1479 std::map<uint16_t, EDNSOptionView> ednsOptions;
b40562da 1480 dc->d_ecsParsed = true;
00b8cadc 1481 dc->d_ecsFound = getQNameAndSubnet(std::string(conn->data, conn->qlen), &qname, &qtype, &qclass, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr);
02b47f43 1482
f26bf547 1483 if(t_pdl && t_pdl->d_gettag) {
33dcceba 1484 try {
590388d2 1485 dc->d_tag = t_pdl->gettag(conn->d_remote, dc->d_ednssubnet.source, dest, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId);
33dcceba
RG
1486 }
1487 catch(std::exception& e) {
1488 if(g_logCommonErrors)
1489 L<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
1490 }
1491 }
1492 }
1493 catch(std::exception& e)
1494 {
1495 if(g_logCommonErrors)
1496 L<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
1497 }
1498 }
1499#ifdef HAVE_PROTOBUF
4898a348 1500 if(luaconfsLocal->protobufServer || luaconfsLocal->outgoingProtobufServer) {
67e31ebe 1501 dc->d_requestorId = requestorId;
590388d2 1502 dc->d_deviceId = deviceId;
02b47f43 1503 dc->d_uuid = (*t_uuidGenerator)();
4898a348 1504 }
02b47f43 1505
4898a348 1506 if(luaconfsLocal->protobufServer) {
02b47f43 1507 try {
02b47f43 1508 const struct dnsheader* dh = (const struct dnsheader*) conn->data;
02b47f43 1509
b790ef3d 1510 if (!luaconfsLocal->protobufTaggedOnly) {
590388d2 1511 protobufLogQuery(luaconfsLocal->protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, conn->d_remote, dest, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId);
b790ef3d 1512 }
02b47f43
RG
1513 }
1514 catch(std::exception& e) {
1515 if(g_logCommonErrors)
1516 L<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
1517 }
1518 }
aa7929a3 1519#endif
879b3f70 1520 if(dc->d_mdp.d_header.qr) {
4957a608 1521 delete dc;
048f5db6 1522 g_stats.ignoredCount++;
4328f463 1523 L<<Logger::Error<<"Ignoring answer from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
4957a608 1524 return;
879b3f70 1525 }
3abcdab2
PD
1526 if(dc->d_mdp.d_header.opcode) {
1527 delete dc;
048f5db6 1528 g_stats.ignoredCount++;
4328f463 1529 L<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
3abcdab2
PD
1530 return;
1531 }
09e6702a 1532 else {
4957a608
BH
1533 ++g_stats.qcounter;
1534 ++g_stats.tcpqcounter;
50a5ef72 1535 MT->makeThread(startDoResolve, dc); // deletes dc, will set state to BYTE0 again
4957a608 1536 return;
09e6702a
BH
1537 }
1538 }
1539 }
1540}
1541
6dcd28c3 1542//! Handle new incoming TCP connection
d187038c 1543static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
09e6702a 1544{
37d3f960 1545 ComboAddress addr;
09e6702a 1546 socklen_t addrlen=sizeof(addr);
a683e8bd 1547 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
b841314c 1548 if(newsock>=0) {
85c32340
BH
1549 if(MT->numProcesses() > g_maxMThreads) {
1550 g_stats.overCapacityDrops++;
a7b68ae7
RG
1551 try {
1552 closesocket(newsock);
1553 }
1554 catch(const PDNSException& e) {
1555 L<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
1556 }
85c32340
BH
1557 return;
1558 }
1559
92011b8f 1560 if(t_remotes)
1561 t_remotes->push_back(addr);
49a699c4 1562 if(t_allowFrom && !t_allowFrom->match(&addr)) {
3ddb9247 1563 if(!g_quiet)
4957a608 1564 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2914b022 1565
09e6702a 1566 g_stats.unauthorizedTCP++;
a7b68ae7
RG
1567 try {
1568 closesocket(newsock);
1569 }
1570 catch(const PDNSException& e) {
1571 L<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
1572 }
09e6702a
BH
1573 return;
1574 }
bd0289fc 1575 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
09e6702a 1576 g_stats.tcpClientOverflow++;
a7b68ae7
RG
1577 try {
1578 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
1579 }
1580 catch(const PDNSException& e) {
1581 L<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
1582 }
09e6702a
BH
1583 return;
1584 }
3ddb9247 1585
3897b9e1 1586 setNonBlocking(newsock);
f26bf547 1587 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
cd989c87 1588 tc->state=TCPConnection::BYTE0;
3ddb9247 1589
cd989c87 1590 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc);
c038218b 1591
0bff046b 1592 struct timeval now;
c038218b 1593 Utility::gettimeofday(&now, 0);
cd989c87 1594 t_fdm->setReadTTD(tc->getFD(), now, g_tcpTimeout);
09e6702a
BH
1595 }
1596}
3ddb9247 1597
d187038c 1598static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
1bc3c142 1599{
183eb877 1600 gettimeofday(&g_now, 0);
b71b60ee 1601 struct timeval diff = g_now - tv;
1602 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
183eb877 1603
22cf1fda 1604 if(tv.tv_sec && delta > 1000.0) {
b71b60ee 1605 g_stats.tooOldDrops++;
1606 return 0;
1607 }
1608
1bc3c142 1609 ++g_stats.qcounter;
d7f10541
BH
1610 if(fromaddr.sin4.sin_family==AF_INET6)
1611 g_stats.ipv6qcounter++;
1bc3c142
BH
1612
1613 string response;
93f0da94 1614 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
49a3500d 1615 unsigned int ctag=0;
f57486f1 1616 uint32_t qhash = 0;
12aff2e5 1617 bool needECS = false;
02b47f43 1618 std::vector<std::string> policyTags;
5fd2577f 1619 LuaContext::LuaObject data;
67e31ebe 1620 string requestorId;
590388d2 1621 string deviceId;
12aff2e5 1622#ifdef HAVE_PROTOBUF
02b47f43 1623 boost::uuids::uuid uniqueId;
02b47f43
RG
1624 auto luaconfsLocal = g_luaconfs.getLocal();
1625 if (luaconfsLocal->protobufServer) {
4898a348 1626 uniqueId = (*t_uuidGenerator)();
02b47f43 1627 needECS = true;
4898a348 1628 } else if (luaconfsLocal->outgoingProtobufServer) {
02b47f43
RG
1629 uniqueId = (*t_uuidGenerator)();
1630 }
12aff2e5 1631#endif
b40562da
RG
1632 EDNSSubnetOpts ednssubnet;
1633 bool ecsFound = false;
1634 bool ecsParsed = false;
1bc3c142 1635 try {
02b47f43
RG
1636 DNSName qname;
1637 uint16_t qtype=0;
1638 uint16_t qclass=0;
1bc3c142 1639 uint32_t age;
c15ff3df 1640 bool qnameParsed=false;
8f7473d7 1641#ifdef MALLOC_TRACE
1642 /*
1643 static uint64_t last=0;
1644 if(!last)
1645 g_mtracer->clearAllocators();
1646 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
1647 last=g_mtracer->getAllocs();
1648 cout<<g_mtracer->topAllocatorsString()<<endl;
1649 g_mtracer->clearAllocators();
1650 */
1651#endif
55a1378f 1652
f26bf547 1653 if(needECS || (t_pdl && t_pdl->d_gettag)) {
b2eacd67 1654 try {
00b8cadc
RG
1655 std::map<uint16_t, EDNSOptionView> ednsOptions;
1656 ecsFound = getQNameAndSubnet(question, &qname, &qtype, &qclass, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr);
c15ff3df
RG
1657 qnameParsed = true;
1658 ecsParsed = true;
12aff2e5 1659
f26bf547 1660 if(t_pdl && t_pdl->d_gettag) {
12aff2e5 1661 try {
590388d2 1662 ctag=t_pdl->gettag(fromaddr, ednssubnet.source, destaddr, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId);
12aff2e5
RG
1663 }
1664 catch(std::exception& e) {
1665 if(g_logCommonErrors)
1666 L<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
1667 }
8ea8c302 1668 }
b2eacd67 1669 }
1670 catch(std::exception& e)
1671 {
1672 if(g_logCommonErrors)
1673 L<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 1674 }
12ce523e 1675 }
3ddb9247 1676
02b47f43 1677 bool cacheHit = false;
d9d3f9c1 1678 RecProtoBufMessage pbMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
02b47f43
RG
1679#ifdef HAVE_PROTOBUF
1680 if(luaconfsLocal->protobufServer) {
b790ef3d 1681 if (!luaconfsLocal->protobufTaggedOnly || !policyTags.empty()) {
590388d2 1682 protobufLogQuery(luaconfsLocal->protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, fromaddr, destaddr, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId);
b790ef3d 1683 }
d9d3f9c1
RG
1684 }
1685#endif /* HAVE_PROTOBUF */
02b47f43 1686
c15ff3df
RG
1687 if (qnameParsed) {
1688 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &qhash, &pbMessage));
1689 }
1690 else {
1691 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, g_now.tv_sec, &response, &age, &qhash, &pbMessage));
1692 }
1693
d9d3f9c1
RG
1694 if (cacheHit) {
1695#ifdef HAVE_PROTOBUF
b790ef3d 1696 if(luaconfsLocal->protobufServer && (!luaconfsLocal->protobufTaggedOnly || !pbMessage.getAppliedPolicy().empty() || !pbMessage.getPolicyTags().empty())) {
e1c8a4bb
RG
1697 Netmask requestorNM(fromaddr, fromaddr.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1698 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
1699 pbMessage.update(uniqueId, &requestor, &destaddr, false, dh->id);
b40562da 1700 pbMessage.setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
58307a85 1701 pbMessage.setQueryTime(g_now.tv_sec, g_now.tv_usec);
67e31ebe 1702 pbMessage.setRequestorId(requestorId);
590388d2 1703 pbMessage.setDeviceId(deviceId);
02b47f43
RG
1704 protobufLogResponse(luaconfsLocal->protobufServer, pbMessage);
1705 }
d9d3f9c1 1706#endif /* HAVE_PROTOBUF */
49a3500d 1707 if(!g_quiet)
1708 L<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<fromaddr.toString()<<endl;
8f7473d7 1709
1bc3c142
BH
1710 g_stats.packetCacheHits++;
1711 SyncRes::s_queries++;
1712 ageDNSPacket(response, age);
b71b60ee 1713 struct msghdr msgh;
1714 struct iovec iov;
1715 char cbuf[256];
1716 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
2c0af54f
PD
1717 msgh.msg_control=NULL;
1718
cbc03320 1719 if(g_fromtosockets.count(fd)) {
fbe2a2e0 1720 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
b71b60ee 1721 }
cbc03320 1722 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
1723 L<<Logger::Warning<<"Sending UDP reply to client "<<fromaddr.toStringWithPort()<<" failed with: "<<strerror(errno)<<endl;
b71b60ee 1724
97bee66d 1725 if(response.length() >= sizeof(struct dnsheader)) {
dd079764
RG
1726 struct dnsheader tmpdh;
1727 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
1728 updateResponseStats(tmpdh.rcode, fromaddr, response.length(), 0, 0);
97bee66d 1729 }
08f3f638 1730 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
19178da9 1731 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
1bc3c142
BH
1732 return 0;
1733 }
3ddb9247 1734 }
1bc3c142
BH
1735 catch(std::exception& e) {
1736 L<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1737 return 0;
1738 }
3ddb9247 1739
f26bf547
RG
1740 if(t_pdl) {
1741 if(t_pdl->ipfilter(fromaddr, destaddr, *dh)) {
4ea94941 1742 if(!g_quiet)
1743 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<" based on policy"<<endl;
1744 g_stats.policyDrops++;
1745 return 0;
1746 }
1747 }
1748
1bc3c142 1749 if(MT->numProcesses() > g_maxMThreads) {
461df9d2 1750 if(!g_quiet)
854d44e3 1751 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<", over capacity"<<endl;
461df9d2 1752
1bc3c142
BH
1753 g_stats.overCapacityDrops++;
1754 return 0;
1755 }
3ddb9247 1756
1bc3c142
BH
1757 DNSComboWriter* dc = new DNSComboWriter(question.c_str(), question.size(), g_now);
1758 dc->setSocket(fd);
49a3500d 1759 dc->d_tag=ctag;
e9f63d47 1760 dc->d_qhash=qhash;
49a3500d 1761 dc->d_query = question;
1bc3c142 1762 dc->setRemote(&fromaddr);
b71b60ee 1763 dc->setLocal(destaddr);
1bc3c142 1764 dc->d_tcp=false;
02b47f43 1765 dc->d_policyTags = policyTags;
05c74122 1766 dc->d_data = data;
b40562da
RG
1767 dc->d_ecsFound = ecsFound;
1768 dc->d_ecsParsed = ecsParsed;
1769 dc->d_ednssubnet = ednssubnet;
aa7929a3 1770#ifdef HAVE_PROTOBUF
4898a348 1771 if (luaconfsLocal->protobufServer || luaconfsLocal->outgoingProtobufServer) {
d9d3f9c1
RG
1772 dc->d_uuid = uniqueId;
1773 }
67e31ebe 1774 dc->d_requestorId = requestorId;
590388d2 1775 dc->d_deviceId = deviceId;
aa7929a3
RG
1776#endif
1777
1bc3c142
BH
1778 MT->makeThread(startDoResolve, (void*) dc); // deletes dc
1779 return 0;
3ddb9247
PD
1780}
1781
b71b60ee 1782
d187038c 1783static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
5db529f8 1784{
a683e8bd 1785 ssize_t len;
5db529f8
BH
1786 char data[1500];
1787 ComboAddress fromaddr;
b71b60ee 1788 struct msghdr msgh;
1789 struct iovec iov;
1790 char cbuf[256];
390f1dab 1791 bool firstQuery = true;
b71b60ee 1792
1793 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
1794 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), data, sizeof(data), &fromaddr);
1795
3ddb9247 1796 for(;;)
b71b60ee 1797 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
390f1dab
RG
1798
1799 firstQuery = false;
1800
92011b8f 1801 if(t_remotes)
1802 t_remotes->push_back(fromaddr);
b23b8614 1803
49a699c4 1804 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
3ddb9247 1805 if(!g_quiet)
4957a608 1806 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2914b022 1807
5db529f8 1808 g_stats.unauthorizedUDP++;
a9af3782 1809 return;
5db529f8 1810 }
15c01deb 1811 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
81859ba5 1812 if(!fromaddr.sin4.sin_port) { // also works for IPv6
3ddb9247 1813 if(!g_quiet)
81859ba5 1814 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
1815
1816 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
1817 return;
1818 }
5db529f8 1819 try {
b23b8614 1820 dnsheader* dh=(dnsheader*)data;
3ddb9247 1821
b23b8614 1822 if(dh->qr) {
048f5db6 1823 g_stats.ignoredCount++;
4957a608
BH
1824 if(g_logCommonErrors)
1825 L<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
5db529f8 1826 }
3abcdab2 1827 else if(dh->opcode) {
048f5db6 1828 g_stats.ignoredCount++;
3abcdab2
PD
1829 if(g_logCommonErrors)
1830 L<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
1831 }
5db529f8 1832 else {
a683e8bd 1833 string question(data, (size_t)len);
b71b60ee 1834 struct timeval tv={0,0};
1835 HarvestTimestamp(&msgh, &tv);
1836 ComboAddress dest;
c3cecd36 1837 memset(&dest, 0, sizeof(dest)); // this makes sure we ignore this address if not returned by recvmsg above
a6147cd2 1838 auto loc = rplookup(g_listenSocketsAddresses, fd);
1839 if(HarvestDestinationAddress(&msgh, &dest)) {
1840 // but.. need to get port too
1841 if(loc)
1842 dest.sin4.sin_port = loc->sin4.sin_port;
1843 }
1844 else {
1845 if(loc) {
1846 dest = *loc;
1847 }
1848 else {
1849 dest.sin4.sin_family = fromaddr.sin4.sin_family;
a683e8bd
RG
1850 socklen_t slen = dest.getSocklen();
1851 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
a6147cd2 1852 }
1853 }
232f0877 1854 if(g_weDistributeQueries)
b71b60ee 1855 distributeAsyncFunction(question, boost::bind(doProcessUDPQuestion, question, fromaddr, dest, tv, fd));
232f0877 1856 else
b71b60ee 1857 doProcessUDPQuestion(question, fromaddr, dest, tv, fd);
5db529f8
BH
1858 }
1859 }
1860 catch(MOADNSException& mde) {
3ddb9247 1861 g_stats.clientParseError++;
84e66a59 1862 if(g_logCommonErrors)
4957a608 1863 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
5db529f8 1864 }
0b602819
KM
1865 catch(std::runtime_error& e) {
1866 g_stats.clientParseError++;
1867 if(g_logCommonErrors)
1868 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
5db529f8
BH
1869 }
1870 }
ac0e821b
BH
1871 else {
1872 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
390f1dab 1873 if(firstQuery && errno == EAGAIN)
9326cae1 1874 g_stats.noPacketError++;
390f1dab 1875
bf3b0cec 1876 break;
ac0e821b 1877 }
5db529f8
BH
1878}
1879
810ff705 1880static void makeTCPServerSockets(unsigned int threadId)
9c495589 1881{
37d3f960 1882 int fd;
f28307ad 1883 vector<string>locals;
2e3d8a19 1884 stringtok(locals,::arg()["local-address"]," ,");
9c495589 1885
f28307ad 1886 if(locals.empty())
3f81d239 1887 throw PDNSException("No local address specified");
3ddb9247 1888
f28307ad 1889 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
1890 ServiceTuple st;
1891 st.port=::arg().asNum("local-port");
1892 parseService(*i, st);
3ddb9247 1893
32252594
BH
1894 ComboAddress sin;
1895
f28307ad 1896 memset((char *)&sin,0, sizeof(sin));
37d3f960 1897 sin.sin4.sin_family = AF_INET;
32252594 1898 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 1899 sin.sin6.sin6_family = AF_INET6;
f71bc087 1900 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 1901 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
37d3f960
BH
1902 }
1903
1904 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
3ddb9247 1905 if(fd<0)
3f81d239 1906 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
f28307ad 1907
3897b9e1 1908 setCloseOnExec(fd);
a903b39c 1909
f28307ad 1910 int tmp=1;
810ff705 1911 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
f28307ad 1912 L<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
c8ddb7c2 1913 exit(1);
f28307ad 1914 }
0dfa94ab 1915 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
1916 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
1917 }
1918
c8ddb7c2 1919#ifdef TCP_DEFER_ACCEPT
810ff705 1920 if(setsockopt(fd, SOL_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
37d3f960 1921 if(i==locals.begin())
4957a608 1922 L<<Logger::Error<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
c8ddb7c2
BH
1923 }
1924#endif
1925
fec7dd5a
SS
1926 if( ::arg().mustDo("non-local-bind") )
1927 Utility::setBindAny(AF_INET, fd);
1928
2332f42d 1929#ifdef SO_REUSEPORT
810ff705
RG
1930 if(g_reusePort) {
1931 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
2332f42d 1932 throw PDNSException("SO_REUSEPORT: "+stringerror());
1933 }
1934#endif
1935
0735b17e
RG
1936 if (::arg().asNum("tcp-fast-open") > 0) {
1937#ifdef TCP_FASTOPEN
1938 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
1939 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
1940 L<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno)<<endl;
1941 }
1942#else
1943 L<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
1944#endif
1945 }
1946
32252594 1947 sin.sin4.sin_port = htons(st.port);
a683e8bd 1948 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
3ddb9247 1949 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
3f81d239 1950 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
3ddb9247 1951
3897b9e1 1952 setNonBlocking(fd);
49a699c4 1953 setSocketSendBuffer(fd, 65000);
37d3f960 1954 listen(fd, 128);
810ff705 1955 deferredAdds[threadId].push_back(make_pair(fd, handleNewTCPQuestion));
c2136bf0 1956 g_tcpListenSockets.push_back(fd);
84433b79 1957 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
1958 // - fd is not that which we know here, but returned from accept()
3ddb9247 1959 if(sin.sin4.sin_family == AF_INET)
32252594 1960 L<<Logger::Error<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 1961 else
32252594 1962 L<<Logger::Error<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 1963 }
9c495589
BH
1964}
1965
810ff705 1966static void makeUDPServerSockets(unsigned int threadId)
288f4aa9 1967{
fec7dd5a 1968 int one=1;
f28307ad 1969 vector<string>locals;
2e3d8a19 1970 stringtok(locals,::arg()["local-address"]," ,");
288f4aa9 1971
f28307ad 1972 if(locals.empty())
3f81d239 1973 throw PDNSException("No local address specified");
3ddb9247 1974
f28307ad 1975 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
1976 ServiceTuple st;
1977 st.port=::arg().asNum("local-port");
1978 parseService(*i, st);
1979
37d3f960 1980 ComboAddress sin;
996c89cc 1981
37d3f960
BH
1982 memset(&sin, 0, sizeof(sin));
1983 sin.sin4.sin_family = AF_INET;
32252594 1984 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 1985 sin.sin6.sin6_family = AF_INET6;
f71bc087 1986 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 1987 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
37d3f960 1988 }
3ddb9247 1989
bb4bdbaf 1990 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
d3b4137e 1991 if(fd < 0) {
3f81d239 1992 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
d3b4137e 1993 }
915b0c39
AT
1994 if (!setSocketTimestamps(fd))
1995 L<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
0dfa94ab 1996
b71b60ee 1997 if(IsAnyAddress(sin)) {
cbc03320 1998 if(sin.sin4.sin_family == AF_INET)
1999 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
2000 g_fromtosockets.insert(fd);
757d3179 2001#ifdef IPV6_RECVPKTINFO
cbc03320 2002 if(sin.sin4.sin_family == AF_INET6)
2003 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
2004 g_fromtosockets.insert(fd);
757d3179 2005#endif
0dfa94ab 2006 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
2007 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
2008 }
b71b60ee 2009 }
fec7dd5a
SS
2010 if( ::arg().mustDo("non-local-bind") )
2011 Utility::setBindAny(AF_INET6, fd);
2012
3897b9e1 2013 setCloseOnExec(fd);
a903b39c 2014
4e9a20e6 2015 setSocketReceiveBuffer(fd, 250000);
32252594 2016 sin.sin4.sin_port = htons(st.port);
37d3f960 2017
2332f42d 2018
2573d4a6 2019#ifdef SO_REUSEPORT
810ff705 2020 if(g_reusePort) {
2332f42d 2021 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
2022 throw PDNSException("SO_REUSEPORT: "+stringerror());
2023 }
2024#endif
a683e8bd 2025 socklen_t socklen=sin.getSocklen();
3ddb9247 2026 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
335da0ba 2027 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
3ddb9247 2028
3897b9e1 2029 setNonBlocking(fd);
c2136bf0 2030
810ff705 2031 deferredAdds[threadId].push_back(make_pair(fd, handleNewUDPQuestion));
40a3dd64 2032 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
3ddb9247 2033 if(sin.sin4.sin_family == AF_INET)
32252594 2034 L<<Logger::Error<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 2035 else
32252594 2036 L<<Logger::Error<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 2037 }
c836dc19 2038}
caa6eefa 2039
d187038c 2040static void daemonize(void)
c836dc19
BH
2041{
2042 if(fork())
2043 exit(0); // bye bye
3ddb9247
PD
2044
2045 setsid();
c836dc19 2046
27a5ead5 2047 int i=open("/dev/null",O_RDWR); /* open stdin */
3ddb9247 2048 if(i < 0)
27a5ead5
BH
2049 L<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
2050 else {
2051 dup2(i,0); /* stdin */
2052 dup2(i,1); /* stderr */
2053 dup2(i,2); /* stderr */
2054 close(i);
2055 }
288f4aa9 2056}
caa6eefa 2057
d187038c 2058static void usr1Handler(int)
c75a6a9e
BH
2059{
2060 statsWanted=true;
2061}
ae1b2e98 2062
d187038c 2063static void usr2Handler(int)
9170fbaf 2064{
f1f34cc2 2065 g_quiet= !g_quiet;
2066 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
2067 ::arg().set("quiet")=g_quiet ? "" : "no";
9170fbaf
BH
2068}
2069
d187038c 2070static void doStats(void)
c75a6a9e 2071{
16beeaa4
BH
2072 static time_t lastOutputTime;
2073 static uint64_t lastQueryCount;
d299d4f5 2074
2075 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
2076 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
3ddb9247 2077
d299d4f5 2078 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
bd301954 2079 L<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
3427fa8a
BH
2080 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
2081 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
3ddb9247
PD
2082 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
2083
bd301954 2084 L<<Logger::Notice<<"stats: throttle map: "
3427fa8a 2085 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
3ddb9247 2086 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
bd301954
JB
2087 L<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2088 L<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
525b8a7c 2089 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
bd301954 2090 L<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
3427fa8a 2091 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
81883dcc 2092
bd301954 2093 //L<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
16beeaa4 2094 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
3ddb9247 2095
bd301954 2096 L<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
16beeaa4 2097 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
3ddb9247 2098
16beeaa4
BH
2099 time_t now = time(0);
2100 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
bd301954 2101 L<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
16beeaa4
BH
2102 }
2103 lastOutputTime = now;
2104 lastQueryCount = SyncRes::s_queries;
c75a6a9e 2105 }
3ddb9247 2106 else if(statsWanted)
bd301954 2107 L<<Logger::Notice<<"stats: no stats yet!"<<endl;
7becf07f 2108
c75a6a9e
BH
2109 statsWanted=false;
2110}
c836dc19 2111
29f0b1ce 2112static void houseKeeping(void *)
c836dc19 2113{
3337c2f7
RG
2114 static thread_local time_t last_stat, last_rootupdate, last_prune, last_secpoll;
2115 static thread_local int cleanCounter=0;
2116 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
cc59bce6 2117 try {
2118 if(s_running)
2119 return;
2120 s_running=true;
3ddb9247 2121
cc59bce6 2122 struct timeval now;
2123 Utility::gettimeofday(&now, 0);
3ddb9247
PD
2124
2125 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
cc59bce6 2126 DTime dt;
2127 dt.setTimeval(now);
a6f7f5fe 2128 t_RC->doPrune(g_maxCacheEntries / g_numThreads); // this function is local to a thread, so fine anyhow
2129 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
3ddb9247 2130
a6f7f5fe 2131 SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
3ddb9247 2132
cc59bce6 2133 if(!((cleanCounter++)%40)) { // this is a full scan!
2134 time_t limit=now.tv_sec-300;
a712cb56 2135 SyncRes::pruneNSSpeeds(limit);
cc59bce6 2136 }
2137 last_prune=time(0);
d67620e4 2138 }
3ddb9247 2139
cc59bce6 2140 if(now.tv_sec - last_rootupdate > 7200) {
30ee601a 2141 int res = SyncRes::getRootNS(g_now, nullptr);
7836f7b4
PL
2142 if (!res)
2143 last_rootupdate=now.tv_sec;
cc59bce6 2144 }
3ddb9247 2145
cc59bce6 2146 if(!t_id) {
0ec489bf 2147 if(g_statisticsInterval > 0 && now.tv_sec - last_stat >= g_statisticsInterval) {
cc59bce6 2148 doStats();
2149 last_stat=time(0);
2150 }
3ddb9247 2151
cc59bce6 2152 if(now.tv_sec - last_secpoll >= 3600) {
2153 try {
2154 doSecPoll(&last_secpoll);
2155 }
581d4ea3 2156 catch(std::exception& e)
2157 {
2158 L<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
2159 }
47e9b74f 2160 catch(PDNSException& e)
2161 {
2162 L<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
2163 }
d0992a65
CH
2164 catch(ImmediateServFailException &e)
2165 {
2166 L<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
2167 }
47e9b74f 2168 catch(...)
2169 {
2170 L<<Logger::Error<<"Exception while performing security poll"<<endl;
2171 }
2172
18b73338 2173 }
d67620e4 2174 }
cc59bce6 2175 s_running=false;
d67620e4 2176 }
cc59bce6 2177 catch(PDNSException& ae)
2178 {
2179 s_running=false;
2180 L<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
2181 throw;
2182 }
779828c4 2183}
d6d5dea7 2184
d187038c 2185static void makeThreadPipes()
49a699c4 2186{
c3828c03 2187 for(unsigned int n=0; n < g_numThreads; ++n) {
49a699c4
BH
2188 struct ThreadPipeSet tps;
2189 int fd[2];
2190 if(pipe(fd) < 0)
2191 unixDie("Creating pipe for inter-thread communications");
3ddb9247 2192
49a699c4
BH
2193 tps.readToThread = fd[0];
2194 tps.writeToThread = fd[1];
3ddb9247 2195
49a699c4
BH
2196 if(pipe(fd) < 0)
2197 unixDie("Creating pipe for inter-thread communications");
2198 tps.readFromThread = fd[0];
2199 tps.writeFromThread = fd[1];
3ddb9247 2200
49a699c4
BH
2201 g_pipes.push_back(tps);
2202 }
2203}
2204
00c9b8c1
BH
2205struct ThreadMSG
2206{
2207 pipefunc_t func;
2208 bool wantAnswer;
2209};
2210
49a699c4
BH
2211void broadcastFunction(const pipefunc_t& func, bool skipSelf)
2212{
49a699c4 2213 unsigned int n = 0;
1dc8f4d0 2214 for(ThreadPipeSet& tps : g_pipes)
49a699c4
BH
2215 {
2216 if(n++ == t_id) {
2217 if(!skipSelf)
2218 func(); // don't write to ourselves!
2219 continue;
2220 }
3ddb9247 2221
00c9b8c1
BH
2222 ThreadMSG* tmsg = new ThreadMSG();
2223 tmsg->func = func;
2224 tmsg->wantAnswer = true;
b841314c
RG
2225 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2226 delete tmsg;
49a699c4 2227 unixDie("write to thread pipe returned wrong size or error");
b841314c 2228 }
3ddb9247 2229
49a699c4
BH
2230 string* resp;
2231 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2232 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 2233
49a699c4
BH
2234 if(resp) {
2235// cerr <<"got response: " << *resp << endl;
2236 delete resp;
2237 }
2238 }
2239}
06ea9015 2240
8171ab83 2241void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
00c9b8c1 2242{
8171ab83 2243 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
06ea9015 2244 unsigned int target = 1 + (hash % (g_pipes.size()-1));
2245
00c9b8c1
BH
2246 if(target == t_id) {
2247 func();
2248 return;
2249 }
3ddb9247 2250 ThreadPipeSet& tps = g_pipes[target];
00c9b8c1
BH
2251 ThreadMSG* tmsg = new ThreadMSG();
2252 tmsg->func = func;
2253 tmsg->wantAnswer = false;
3ddb9247 2254
b841314c
RG
2255 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2256 delete tmsg;
3ddb9247 2257 unixDie("write to thread pipe returned wrong size or error");
b841314c 2258 }
00c9b8c1 2259}
3427fa8a 2260
d187038c 2261static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
49a699c4 2262{
f26bf547 2263 ThreadMSG* tmsg = nullptr;
3ddb9247
PD
2264
2265 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread
49a699c4
BH
2266 unixDie("read from thread pipe returned wrong size or error");
2267 }
3ddb9247 2268
2f22827a 2269 void *resp=0;
2270 try {
2271 resp = tmsg->func();
2272 }
2273 catch(std::exception& e) {
6d2010a8 2274 if(g_logCommonErrors)
2275 L<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2f22827a 2276 }
2277 catch(PDNSException& e) {
6d2010a8 2278 if(g_logCommonErrors)
2279 L<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2f22827a 2280 }
d7c676a5
RG
2281 if(tmsg->wantAnswer) {
2282 if(write(g_pipes[t_id].writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
2283 delete tmsg;
00c9b8c1 2284 unixDie("write to thread pipe returned wrong size or error");
d7c676a5
RG
2285 }
2286 }
3ddb9247 2287
00c9b8c1 2288 delete tmsg;
49a699c4 2289}
09e6702a 2290
13034931
BH
2291template<class T> void *voider(const boost::function<T*()>& func)
2292{
2293 return func();
2294}
2295
b3b5459d
BH
2296vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
2297{
2298 a.insert(a.end(), b.begin(), b.end());
2299 return a;
2300}
2301
92011b8f 2302vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
2303{
2304 a.insert(a.end(), b.begin(), b.end());
2305 return a;
2306}
2307
3ddb9247
PD
2308vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
2309{
2310 a.insert(a.end(), b.begin(), b.end());
2311 return a;
2312}
2313
92011b8f 2314
13034931 2315template<class T> T broadcastAccFunction(const boost::function<T*()>& func, bool skipSelf)
3427fa8a
BH
2316{
2317 unsigned int n = 0;
2318 T ret=T();
1dc8f4d0 2319 for(ThreadPipeSet& tps : g_pipes)
3427fa8a
BH
2320 {
2321 if(n++ == t_id) {
2322 if(!skipSelf) {
2323 T* resp = (T*)func(); // don't write to ourselves!
2324 if(resp) {
2325 //~ cerr <<"got direct: " << *resp << endl;
2326 ret += *resp;
2327 delete resp;
2328 }
2329 }
2330 continue;
2331 }
3ddb9247 2332
00c9b8c1
BH
2333 ThreadMSG* tmsg = new ThreadMSG();
2334 tmsg->func = boost::bind(voider<T>, func);
2335 tmsg->wantAnswer = true;
3ddb9247 2336
b841314c
RG
2337 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2338 delete tmsg;
3427fa8a 2339 unixDie("write to thread pipe returned wrong size or error");
b841314c 2340 }
3ddb9247 2341
3427fa8a
BH
2342 T* resp;
2343 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2344 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 2345
3427fa8a
BH
2346 if(resp) {
2347 //~ cerr <<"got response: " << *resp << endl;
2348 ret += *resp;
2349 delete resp;
2350 }
2351 }
2352 return ret;
2353}
2354
13034931
BH
2355template string broadcastAccFunction(const boost::function<string*()>& fun, bool skipSelf); // explicit instantiation
2356template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun, bool skipSelf); // explicit instantiation
b3b5459d 2357template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun, bool skipSelf); // explicit instantiation
3ddb9247 2358template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun, bool skipSelf); // explicit instantiation
3427fa8a 2359
d187038c 2360static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
09e6702a
BH
2361{
2362 string remote;
2363 string msg=s_rcc.recv(&remote);
2364 RecursorControlParser rcp;
2365 RecursorControlParser::func_t* command;
3ddb9247 2366
09e6702a 2367 string answer=rcp.getAnswer(msg, &command);
f0f3f0b0
PL
2368
2369 // If we are inside a chroot, we need to strip
2370 if (!arg()["chroot"].empty()) {
a683e8bd 2371 size_t len = arg()["chroot"].length();
f0f3f0b0
PL
2372 remote = remote.substr(len);
2373 }
2374
ab5c053d
BH
2375 try {
2376 s_rcc.send(answer, &remote);
2377 command();
2378 }
fdbf35ac 2379 catch(std::exception& e) {
ab5c053d
BH
2380 L<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
2381 }
3f81d239 2382 catch(PDNSException& ae) {
ab5c053d
BH
2383 L<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
2384 }
09e6702a
BH
2385}
2386
d187038c 2387static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2388{
0b18b22e 2389 PacketID* pident=any_cast<PacketID>(&var);
667f7e60 2390 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
09e6702a 2391
667f7e60 2392 shared_array<char> buffer(new char[pident->inNeeded]);
09e6702a 2393
a683e8bd 2394 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
09e6702a 2395 if(ret > 0) {
667f7e60 2396 pident->inMSG.append(&buffer[0], &buffer[ret]);
a683e8bd 2397 pident->inNeeded-=(size_t)ret;
825fa717 2398 if(!pident->inNeeded || pident->inIncompleteOkay) {
667f7e60
BH
2399 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
2400 PacketID pid=*pident;
2401 string msg=pident->inMSG;
3ddb9247 2402
bb4bdbaf 2403 t_fdm->removeReadFD(fd);
3ddb9247 2404 MT->sendEvent(pid, &msg);
09e6702a
BH
2405 }
2406 else {
667f7e60 2407 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
09e6702a
BH
2408 }
2409 }
2410 else {
667f7e60 2411 PacketID tmp=*pident;
bb4bdbaf 2412 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
09e6702a
BH
2413 string empty;
2414 MT->sendEvent(tmp, &empty); // this conveys error status
2415 }
2416}
2417
d187038c 2418static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2419{
0b18b22e 2420 PacketID* pid=any_cast<PacketID>(&var);
a683e8bd 2421 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
09e6702a 2422 if(ret > 0) {
a683e8bd 2423 pid->outPos+=(ssize_t)ret;
667f7e60
BH
2424 if(pid->outPos==pid->outMSG.size()) {
2425 PacketID tmp=*pid;
bb4bdbaf 2426 t_fdm->removeWriteFD(fd);
09e6702a
BH
2427 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
2428 }
2429 }
2430 else { // error or EOF
667f7e60 2431 PacketID tmp(*pid);
bb4bdbaf 2432 t_fdm->removeWriteFD(fd);
09e6702a 2433 string sent;
998a4334 2434 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
09e6702a
BH
2435 }
2436}
2437
34801ab1 2438// resend event to everybody chained onto it
d187038c 2439static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
34801ab1
BH
2440{
2441 if(iter->key.chain.empty())
2442 return;
e27e91a8 2443 // cerr<<"doResends called!\n";
34801ab1
BH
2444 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
2445 resend.fd=-1;
2446 resend.id=*i;
e27e91a8 2447 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
4665c31e 2448
34801ab1
BH
2449 MT->sendEvent(resend, &content);
2450 g_stats.chainResends++;
34801ab1
BH
2451 }
2452}
2453
d187038c 2454static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2455{
600fc20b 2456 PacketID pid=any_cast<PacketID>(var);
a683e8bd 2457 ssize_t len;
e45beeda 2458 char data[g_outgoingEDNSBufsize];
996c89cc 2459 ComboAddress fromaddr;
09e6702a
BH
2460 socklen_t addrlen=sizeof(fromaddr);
2461
998a4334 2462 len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen);
c1da7976 2463
a683e8bd 2464 if(len < (ssize_t) sizeof(dnsheader)) {
998a4334 2465 if(len < 0)
996c89cc 2466 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
09e6702a 2467 else {
3ddb9247 2468 g_stats.serverParseError++;
09e6702a 2469 if(g_logCommonErrors)
85db02c5 2470 L<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
e44d9fa7 2471 ": packet smaller than DNS header"<<endl;
998a4334 2472 }
34801ab1 2473
49a699c4 2474 t_udpclientsocks->returnSocket(fd);
34801ab1
BH
2475 string empty;
2476
2477 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3ddb9247 2478 if(iter != MT->d_waiters.end())
34801ab1 2479 doResends(iter, pid, empty);
3ddb9247 2480
34801ab1 2481 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
998a4334 2482 return;
3ddb9247 2483 }
998a4334
BH
2484
2485 dnsheader dh;
2486 memcpy(&dh, data, sizeof(dh));
3ddb9247 2487
6da3b3ad
PD
2488 PacketID pident;
2489 pident.remote=fromaddr;
2490 pident.id=dh.id;
2491 pident.fd=fd;
34801ab1 2492
33a928af 2493 if(!dh.qr && g_logCommonErrors) {
854d44e3 2494 L<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
6da3b3ad
PD
2495 }
2496
2497 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
2498 !dh.qr) { // one weird server
2499 pident.domain.clear();
2500 pident.type = 0;
2501 }
2502 else {
2503 try {
0b31e67e 2504 if(len > 12)
2505 pident.domain=DNSName(data, len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
6da3b3ad
PD
2506 }
2507 catch(std::exception& e) {
2508 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
0b31e67e 2509 L<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
6da3b3ad 2510 return;
34801ab1 2511 }
6da3b3ad
PD
2512 }
2513 string packet;
2514 packet.assign(data, len);
34801ab1 2515
6da3b3ad
PD
2516 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
2517 if(iter != MT->d_waiters.end()) {
2518 doResends(iter, pident, packet);
2519 }
c1da7976 2520
6da3b3ad 2521retryWithName:
4957a608 2522
6da3b3ad
PD
2523 if(!MT->sendEvent(pident, &packet)) {
2524 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
2525 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
2526 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
e325f20c 2527 pident.domain == mthread->key.domain) {
6da3b3ad 2528 mthread->key.nearMisses++;
998a4334 2529 }
6da3b3ad
PD
2530
2531 // be a bit paranoid here since we're weakening our matching
3ddb9247 2532 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
6da3b3ad
PD
2533 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
2534 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
2535 pident.domain = mthread->key.domain;
2536 pident.type = mthread->key.type;
2537 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
d4fb76e9 2538 }
09e6702a 2539 }
6da3b3ad
PD
2540 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
2541 if(g_logCommonErrors) {
8a464ee3 2542 L<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
d8f6d49f 2543 }
09e6702a 2544 }
6da3b3ad
PD
2545 else if(fd >= 0) {
2546 t_udpclientsocks->returnSocket(fd);
2547 }
09e6702a
BH
2548}
2549
1f4abb20
BH
2550FDMultiplexer* getMultiplexer()
2551{
2552 FDMultiplexer* ret;
f26bf547 2553 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
1f4abb20 2554 try {
f26bf547 2555 ret=i.second();
1f4abb20
BH
2556 return ret;
2557 }
98d0ee4a 2558 catch(FDMultiplexerException &fe) {
0a7f24cb 2559 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
98d0ee4a
BH
2560 }
2561 catch(...) {
2562 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
2563 }
1f4abb20
BH
2564 }
2565 L<<Logger::Error<<"No working multiplexer found!"<<endl;
2566 exit(1);
2567}
2568
3ddb9247 2569
d187038c 2570static string* doReloadLuaScript()
4485aa35 2571{
674cf0f6 2572 string fname= ::arg()["lua-dns-script"];
4485aa35 2573 try {
674cf0f6 2574 if(fname.empty()) {
f26bf547 2575 t_pdl.reset();
674cf0f6 2576 L<<Logger::Error<<t_id<<" Unloaded current lua script"<<endl;
0f39c1a3 2577 return new string("unloaded\n");
4485aa35
BH
2578 }
2579 else {
9694e14f
AT
2580 t_pdl = std::make_shared<RecursorLua4>();
2581 t_pdl->loadFile(fname);
4485aa35
BH
2582 }
2583 }
fdbf35ac 2584 catch(std::exception& e) {
674cf0f6 2585 L<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
0f39c1a3 2586 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
4485aa35 2587 }
3ddb9247 2588
674cf0f6 2589 L<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
0f39c1a3 2590 return new string("(re)loaded '"+fname+"'\n");
4485aa35
BH
2591}
2592
49a699c4
BH
2593string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2594{
3ddb9247 2595 if(begin != end)
49a699c4 2596 ::arg().set("lua-dns-script") = *begin;
3ddb9247 2597
0f39c1a3 2598 return broadcastAccFunction<string>(doReloadLuaScript);
3ddb9247 2599}
49a699c4 2600
d187038c 2601static string* pleaseUseNewTraceRegex(const std::string& newRegex)
77499b05
BH
2602try
2603{
2604 if(newRegex.empty()) {
f26bf547 2605 t_traceRegex.reset();
77499b05
BH
2606 return new string("unset\n");
2607 }
2608 else {
f26bf547 2609 t_traceRegex = std::make_shared<Regex>(newRegex);
77499b05
BH
2610 return new string("ok\n");
2611 }
2612}
3f81d239 2613catch(PDNSException& ae)
77499b05
BH
2614{
2615 return new string(ae.reason+"\n");
2616}
2617
2618string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2619{
2620 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
2621}
2622
4e9a20e6 2623static void checkLinuxIPv6Limits()
2624{
2625#ifdef __linux__
2626 string line;
2627 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
335da0ba 2628 int lim=std::stoi(line);
4e9a20e6 2629 if(lim < 16384) {
36849ff2 2630 L<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
4e9a20e6 2631 }
2632 }
2633#endif
2634}
36849ff2 2635static void checkOrFixFDS()
4e9a20e6 2636{
c0063e60 2637 unsigned int availFDs=getFilenumLimit();
2638 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
2639
2640 if(wantFDs > availFDs) {
067ad20e 2641 unsigned int hardlimit= getFilenumLimit(true);
2642 if(hardlimit >= wantFDs) {
c0063e60 2643 setFilenumLimit(wantFDs);
2644 L<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
36849ff2 2645 }
2646 else {
067ad20e 2647 int newval = (hardlimit - 25) / g_numWorkerThreads;
2648 L<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
36849ff2 2649 g_maxMThreads = newval;
067ad20e 2650 setFilenumLimit(hardlimit);
36849ff2 2651 }
2652 }
4e9a20e6 2653}
77499b05 2654
d187038c 2655static void* recursorThread(void*);
51e2144e 2656
f26bf547 2657static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
49a699c4
BH
2658{
2659 t_allowFrom = ng;
f26bf547 2660 return nullptr;
49a699c4
BH
2661}
2662
dbd23fc2
BH
2663int g_argc;
2664char** g_argv;
2665
18af64a8 2666void parseACLs()
f7c1d4e3 2667{
18af64a8 2668 static bool l_initialized;
3ddb9247 2669
49a699c4 2670 if(l_initialized) { // only reload configuration file on second call
18af64a8 2671 string configname=::arg()["config-dir"]+"/recursor.conf";
3e63da83
JR
2672 if(::arg()["config-name"]!="") {
2673 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
2674 }
18af64a8 2675 cleanSlashes(configname);
3ddb9247
PD
2676
2677 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
7e818521 2678 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
49a699c4 2679 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
242b90e1 2680 ::arg().preParseFile(configname.c_str(), "include-dir");
829849d6
AT
2681 ::arg().preParse(g_argc, g_argv, "include-dir");
2682
2683 // then process includes
2684 std::vector<std::string> extraConfigs;
242b90e1
AT
2685 ::arg().gatherIncludes(extraConfigs);
2686
1dc8f4d0 2687 for(const std::string& fn : extraConfigs) {
7e818521 2688 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
2689 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
2690 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
2691 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
829849d6 2692 }
ca2c884c
AT
2693
2694 ::arg().preParse(g_argc, g_argv, "allow-from-file");
2695 ::arg().preParse(g_argc, g_argv, "allow-from");
f27e6356 2696 }
49a699c4 2697
f26bf547
RG
2698 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
2699 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
3ddb9247 2700
2c95fc65
BH
2701 if(!::arg()["allow-from-file"].empty()) {
2702 string line;
2c95fc65
BH
2703 ifstream ifs(::arg()["allow-from-file"].c_str());
2704 if(!ifs) {
9c61b9d0 2705 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2c95fc65
BH
2706 }
2707
2708 string::size_type pos;
2709 while(getline(ifs,line)) {
2710 pos=line.find('#');
2711 if(pos!=string::npos)
2712 line.resize(pos);
2713 trim(line);
2714 if(line.empty())
2715 continue;
2716
18af64a8 2717 allowFrom->addMask(line);
2c95fc65 2718 }
49a699c4 2719 L<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2c95fc65
BH
2720 }
2721 else if(!::arg()["allow-from"].empty()) {
f7c1d4e3
BH
2722 vector<string> ips;
2723 stringtok(ips, ::arg()["allow-from"], ", ");
3ddb9247 2724
f7c1d4e3
BH
2725 L<<Logger::Warning<<"Only allowing queries from: ";
2726 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
18af64a8 2727 allowFrom->addMask(*i);
f7c1d4e3 2728 if(i!=ips.begin())
674cf0f6 2729 L<<Logger::Warning<<", ";
f7c1d4e3
BH
2730 L<<Logger::Warning<<*i;
2731 }
2732 L<<Logger::Warning<<endl;
2733 }
49a699c4 2734 else {
3ddb9247 2735 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
49a699c4 2736 L<<Logger::Error<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
f26bf547 2737 allowFrom = nullptr;
49a699c4 2738 }
3ddb9247 2739
49a699c4 2740 g_initialAllowFrom = allowFrom;
d7dae798 2741 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
f26bf547 2742 oldAllowFrom = nullptr;
3ddb9247 2743
49a699c4 2744 l_initialized = true;
18af64a8
BH
2745}
2746
795215f2 2747
756e82cf 2748static void setupDelegationOnly()
2749{
2750 vector<string> parts;
2751 stringtok(parts, ::arg()["delegation-only"], ", \t");
2752 for(const auto& p : parts) {
9065eb05 2753 SyncRes::addDelegationOnly(DNSName(p));
756e82cf 2754 }
2755}
795215f2 2756
8fd25133
RG
2757static std::map<unsigned int, std::set<int> > parseCPUMap()
2758{
2759 std::map<unsigned int, std::set<int> > result;
2760
2761 const std::string value = ::arg()["cpu-map"];
2762
2763 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
2764 L<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
2765 return result;
2766 }
2767
2768 std::vector<std::string> parts;
2769
2770 stringtok(parts, value, " \t");
2771
2772 for(const auto& part : parts) {
2773 if (part.find('=') == string::npos)
2774 continue;
2775
2776 try {
2777 auto headers = splitField(part, '=');
2778 trim(headers.first);
2779 trim(headers.second);
2780
2781 unsigned int threadId = pdns_stou(headers.first);
2782 std::vector<std::string> cpus;
2783
2784 stringtok(cpus, headers.second, ",");
2785
2786 for(const auto& cpu : cpus) {
2787 int cpuId = std::stoi(cpu);
2788
2789 result[threadId].insert(cpuId);
2790 }
2791 }
2792 catch(const std::exception& e) {
2793 L<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
2794 }
2795 }
2796
2797 return result;
2798}
2799
2800static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
2801{
2802 const auto& cpuMapping = cpusMap.find(n);
2803 if (cpuMapping != cpusMap.cend()) {
2804 int rc = mapThreadToCPUList(tid, cpuMapping->second);
2805 if (rc == 0) {
2806 L<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
2807 for (const auto cpu : cpuMapping->second) {
2808 L<<Logger::Info<<" "<<cpu;
2809 }
2810 L<<Logger::Info<<endl;
2811 }
2812 else {
2813 L<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
2814 for (const auto cpu : cpuMapping->second) {
2815 L<<Logger::Info<<" "<<cpu;
2816 }
2817 L<<Logger::Info<<strerror(rc)<<endl;
2818 }
2819 }
2820}
2821
d187038c 2822static int serviceMain(int argc, char*argv[])
18af64a8 2823{
5124de27 2824 L.setName(s_programname);
b6cfa948 2825 L.disableSyslog(::arg().mustDo("disable-syslog"));
b18fa400 2826 L.setTimestamps(::arg().mustDo("log-timestamp"));
18af64a8
BH
2827
2828 if(!::arg()["logging-facility"].empty()) {
f8499e52
BH
2829 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
2830 if(val >= 0)
2831 theL().setFacility(val);
18af64a8
BH
2832 else
2833 L<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
2834 }
2835
ba1a571d 2836 showProductVersion();
18af64a8 2837 seedRandom(::arg()["entropy-source"]);
3afde9b2 2838
06ea9015 2839 g_disthashseed=dns_random(0xffffffff);
2840
b7ef5828
PL
2841 checkLinuxIPv6Limits();
2842 try {
2843 vector<string> addrs;
2844 if(!::arg()["query-local-address6"].empty()) {
2845 SyncRes::s_doIPv6=true;
2846 L<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
2847
2848 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
2849 for(const string& addr : addrs) {
2850 g_localQueryAddresses6.push_back(ComboAddress(addr));
2851 }
2852 }
2853 else {
2854 L<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
2855 }
2856 addrs.clear();
2857 stringtok(addrs, ::arg()["query-local-address"], ", ;");
2858 for(const string& addr : addrs) {
2859 g_localQueryAddresses4.push_back(ComboAddress(addr));
2860 }
2861 }
2862 catch(std::exception& e) {
2863 L<<Logger::Error<<"Assigning local query addresses: "<<e.what();
2864 exit(99);
2865 }
2866
e48c6b8a
PL
2867 // keep this ABOVE loadRecursorLuaConfig!
2868 if(::arg()["dnssec"]=="off")
2869 g_dnssecmode=DNSSECMode::Off;
2870 else if(::arg()["dnssec"]=="process-no-validate")
2871 g_dnssecmode=DNSSECMode::ProcessNoValidate;
2872 else if(::arg()["dnssec"]=="process")
2873 g_dnssecmode=DNSSECMode::Process;
2874 else if(::arg()["dnssec"]=="validate")
2875 g_dnssecmode=DNSSECMode::ValidateAll;
2876 else if(::arg()["dnssec"]=="log-fail")
2877 g_dnssecmode=DNSSECMode::ValidateForLog;
2878 else {
2879 L<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
2880 exit(1);
2881 }
2882
2883 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
d377bb54 2884 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
e48c6b8a 2885
a6f7f5fe 2886 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
2887 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
2888
0f5785a6
PL
2889 try {
2890 loadRecursorLuaConfig(::arg()["lua-config-file"], ::arg().mustDo("daemon"));
2891 }
2892 catch (PDNSException &e) {
2893 L<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
2894 exit(1);
2895 }
ad42489c 2896
18af64a8 2897 parseACLs();
92011b8f 2898 sortPublicSuffixList();
2899
eb5bae86 2900 if(!::arg()["dont-query"].empty()) {
eb5bae86
BH
2901 vector<string> ips;
2902 stringtok(ips, ::arg()["dont-query"], ", ");
66e0b6ea
BH
2903 ips.push_back("0.0.0.0");
2904 ips.push_back("::");
c36bc97a 2905
eb5bae86
BH
2906 L<<Logger::Warning<<"Will not send queries to: ";
2907 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
9065eb05 2908 SyncRes::addDontQuery(*i);
eb5bae86 2909 if(i!=ips.begin())
4957a608 2910 L<<Logger::Warning<<", ";
eb5bae86
BH
2911 L<<Logger::Warning<<*i;
2912 }
2913 L<<Logger::Warning<<endl;
2914 }
2915
f7c1d4e3 2916 g_quiet=::arg().mustDo("quiet");
3ddb9247 2917
1bc3c142
BH
2918 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
2919 if(g_weDistributeQueries) {
2920 L<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
2921 }
3ddb9247 2922
756e82cf 2923 setupDelegationOnly();
b33c2462 2924 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
756e82cf 2925
77499b05
BH
2926 if(::arg()["trace"]=="fail") {
2927 SyncRes::setDefaultLogMode(SyncRes::Store);
2928 }
2929 else if(::arg().mustDo("trace")) {
2930 SyncRes::setDefaultLogMode(SyncRes::Log);
f7c1d4e3
BH
2931 ::arg().set("quiet")="no";
2932 g_quiet=false;
3e9c6c0a 2933 g_dnssecLOG=true;
f7c1d4e3 2934 }
3ddb9247 2935
aadceba8 2936 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
2937
1051f8a9
BH
2938 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
2939
f7c1d4e3 2940 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
63637fd8 2941 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
1051f8a9 2942 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
79ec0627
PL
2943 // Cap the packetcache-servfail-ttl to the packetcache-ttl
2944 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
2945 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
628e2c7b
PA
2946 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
2947 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
f7c1d4e3 2948 SyncRes::s_serverID=::arg()["server-id"];
173d790e 2949 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
9de3e034 2950 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
7c3398aa 2951 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
01402d56 2952 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
f7c1d4e3
BH
2953 if(SyncRes::s_serverID.empty()) {
2954 char tmp[128];
2955 gethostname(tmp, sizeof(tmp)-1);
2956 SyncRes::s_serverID=tmp;
2957 }
3ddb9247 2958
e9f9b8ec
RG
2959 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
2960 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
2961
8a3a3822
RG
2962 if (!::arg().isEmpty("ecs-scope-zero-address")) {
2963 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
2964 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
2965 }
2966 else {
2967 bool found = false;
2968 for (const auto& addr : g_localQueryAddresses4) {
2969 if (!IsAnyAddress(addr)) {
2970 SyncRes::setECSScopeZeroAddress(Netmask(addr, 32));
2971 found = true;
2972 break;
2973 }
2974 }
2975 if (!found) {
2976 for (const auto& addr : g_localQueryAddresses6) {
2977 if (!IsAnyAddress(addr)) {
2978 SyncRes::setECSScopeZeroAddress(Netmask(addr, 128));
2979 found = true;
2980 break;
2981 }
2982 }
2983 if (!found) {
2984 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
2985 }
2986 }
2987 }
2988
2fe3354d
CH
2989 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
2990 SyncRes::parseEDNSSubnetAddFor(::arg()["ecs-add-for"]);
2991 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
2992
5b0ddd18 2993 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
bb4bdbaf 2994
49a699c4 2995 g_initialDomainMap = parseAuthAndForwards();
3ddb9247 2996
08f3f638 2997 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3ddb9247 2998
f7c1d4e3 2999 g_logCommonErrors=::arg().mustDo("log-common-errors");
98d36505 3000 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
e661a20b
PD
3001
3002 g_anyToTcp = ::arg().mustDo("any-to-tcp");
a09a8ce0
PD
3003 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
3004
b3adda56
PD
3005 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
3006
810ff705 3007 g_numWorkerThreads = ::arg().asNum("threads");
1c4f2e1b
RG
3008 if (g_numWorkerThreads < 1) {
3009 L<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
3010 g_numWorkerThreads = 1;
3011 }
3012
810ff705
RG
3013 g_numThreads = g_numWorkerThreads + g_weDistributeQueries;
3014 g_maxMThreads = ::arg().asNum("max-mthreads");
3015
00b8cadc
RG
3016 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
3017
0ec489bf 3018 g_statisticsInterval = ::arg().asNum("statistics-interval");
3019
810ff705
RG
3020#ifdef SO_REUSEPORT
3021 g_reusePort = ::arg().mustDo("reuseport");
3022#endif
3023
3024 g_useOneSocketPerThread = (!g_weDistributeQueries && g_reusePort);
3025
3026 if (g_useOneSocketPerThread) {
3027 for (unsigned int threadId = 0; threadId < g_numWorkerThreads; threadId++) {
3028 makeUDPServerSockets(threadId);
3029 makeTCPServerSockets(threadId);
3030 }
3031 }
3032 else {
3033 makeUDPServerSockets(0);
3034 makeTCPServerSockets(0);
3035 }
815099b2 3036
677e2a46
BH
3037 int forks;
3038 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
1bc3c142
BH
3039 if(!fork()) // we are child
3040 break;
3041 }
3ddb9247 3042
f7c1d4e3
BH
3043 if(::arg().mustDo("daemon")) {
3044 L<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
3045 L.toConsole(Logger::Critical);
f7c1d4e3 3046 daemonize();
a4241908 3047 loadRecursorLuaConfig(::arg()["lua-config-file"], false);
f7c1d4e3
BH
3048 }
3049 signal(SIGUSR1,usr1Handler);
3050 signal(SIGUSR2,usr2Handler);
3051 signal(SIGPIPE,SIG_IGN);
810ff705 3052
a6414fdc 3053 checkOrFixFDS();
3ddb9247 3054
d1b28475
KM
3055#ifdef HAVE_LIBSODIUM
3056 if (sodium_init() == -1) {
3057 L<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
3058 exit(99);
3059 }
3060#endif
3061
3afde9b2
PL
3062 openssl_thread_setup();
3063 openssl_seed();
3064
138435cb
BH
3065 int newgid=0;
3066 if(!::arg()["setgid"].empty())
3067 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
3068 int newuid=0;
3069 if(!::arg()["setuid"].empty())
3070 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
3071
f1d6a7ce
KM
3072 Utility::dropGroupPrivs(newuid, newgid);
3073
138435cb 3074 if (!::arg()["chroot"].empty()) {
75336810
PL
3075#ifdef HAVE_SYSTEMD
3076 char *ns;
3077 ns = getenv("NOTIFY_SOCKET");
3078 if (ns != nullptr) {
3079 L<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
3080 exit(1);
3081 }
3082#endif
138435cb
BH
3083 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
3084 L<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
3085 exit(1);
3086 }
f0f3f0b0
PL
3087 else
3088 L<<Logger::Error<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
138435cb
BH
3089 }
3090
f0f3f0b0
PL
3091 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
3092 if(!s_pidfname.empty())
3093 unlink(s_pidfname.c_str()); // remove possible old pid file
3094 writePid();
3095
3096 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
3097
f1d6a7ce 3098 Utility::dropUserPrivs(newuid);
c0063e60 3099
49a699c4 3100 makeThreadPipes();
3ddb9247 3101
5d4dd7fe
BH
3102 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
3103 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
fde296a3 3104 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
343257a4 3105
d705aad9
RG
3106 if (::arg().mustDo("snmp-agent")) {
3107 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
3108 g_snmpAgent->run();
3109 }
3110
8fd25133 3111 const auto cpusMap = parseCPUMap();
c3828c03 3112 if(g_numThreads == 1) {
76698c6e 3113 L<<Logger::Warning<<"Operating unthreaded"<<endl;
6b6720de
PL
3114#ifdef HAVE_SYSTEMD
3115 sd_notify(0, "READY=1");
3116#endif
8fd25133 3117 setCPUMap(cpusMap, 0, pthread_self());
76698c6e
BH
3118 recursorThread(0);
3119 }
3120 else {
3121 pthread_t tid;
c3828c03
BH
3122 L<<Logger::Warning<<"Launching "<< g_numThreads <<" threads"<<endl;
3123 for(unsigned int n=0; n < g_numThreads; ++n) {
77499b05 3124 pthread_create(&tid, 0, recursorThread, (void*)(long)n);
8fd25133
RG
3125
3126 setCPUMap(cpusMap, n, tid);
76698c6e
BH
3127 }
3128 void* res;
6b6720de
PL
3129#ifdef HAVE_SYSTEMD
3130 sd_notify(0, "READY=1");
3131#endif
76698c6e 3132 pthread_join(tid, &res);
bb4bdbaf 3133 }
bb4bdbaf
BH
3134 return 0;
3135}
3136
d187038c 3137static void* recursorThread(void* ptr)
bb4bdbaf
BH
3138try
3139{
2e2cd8ec 3140 t_id=(int) (long) ptr;
49a699c4 3141 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
a712cb56 3142 SyncRes::setDomainMap(g_initialDomainMap);
49a699c4 3143 t_allowFrom = g_initialAllowFrom;
f26bf547
RG
3144 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
3145 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
49a699c4 3146 primeHints();
3ddb9247 3147
f26bf547 3148 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
3ddb9247 3149
aa7929a3 3150#ifdef HAVE_PROTOBUF
f26bf547 3151 t_uuidGenerator = std::unique_ptr<boost::uuids::random_generator>(new boost::uuids::random_generator());
aa7929a3 3152#endif
49a699c4 3153 L<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3ddb9247 3154
674cf0f6
BH
3155 try {
3156 if(!::arg()["lua-dns-script"].empty()) {
9694e14f
AT
3157 t_pdl = std::make_shared<RecursorLua4>();
3158 t_pdl->loadFile(::arg()["lua-dns-script"]);
674cf0f6
BH
3159 L<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
3160 }
674cf0f6
BH
3161 }
3162 catch(std::exception &e) {
3163 L<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
62f0ae62 3164 _exit(99);
674cf0f6 3165 }
3ddb9247 3166
f8f243b0 3167 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
92011b8f 3168 if(ringsize) {
f26bf547 3169 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
f8f243b0 3170 if(g_weDistributeQueries) // if so, only 1 thread does recvfrom
3ddb9247 3171 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries"));
f8f243b0 3172 else
3ddb9247 3173 t_remotes->set_capacity(ringsize);
f26bf547 3174 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 3175 t_servfailremotes->set_capacity(ringsize);
f26bf547 3176 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 3177 t_largeanswerremotes->set_capacity(ringsize);
92011b8f 3178
f26bf547 3179 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 3180 t_queryring->set_capacity(ringsize);
f26bf547 3181 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 3182 t_servfailqueryring->set_capacity(ringsize);
92011b8f 3183 }
3ddb9247 3184
f26bf547 3185 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
3ddb9247 3186
bb4bdbaf
BH
3187 PacketID pident;
3188
3189 t_fdm=getMultiplexer();
f3d1d67b 3190 if(!t_id) {
d07bf7ff 3191 if(::arg().mustDo("webserver")) {
30a1aa92 3192 L<<Logger::Warning << "Enabling web server" << endl;
8989097d 3193 try {
1ce57618 3194 new RecursorWebServer(t_fdm);
8989097d
CH
3195 }
3196 catch(PDNSException &e) {
3197 L<<Logger::Error<<"Exception: "<<e.reason<<endl;
3198 exit(99);
3199 }
f3d1d67b 3200 }
83252304 3201 L<<Logger::Error<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
f3d1d67b 3202 }
83252304 3203
49a699c4 3204 t_fdm->addReadFD(g_pipes[t_id].readToThread, handlePipeRequest);
83252304 3205
810ff705 3206 if(g_useOneSocketPerThread) {
2573d4a6
RG
3207 for(deferredAdd_t::const_iterator i = deferredAdds[t_id].cbegin(); i != deferredAdds[t_id].cend(); ++i) {
3208 t_fdm->addReadFD(i->first, i->second);
810ff705
RG
3209 }
3210 }
3211 else {
3212 if(!g_weDistributeQueries || !t_id) { // if we distribute queries, only t_id = 0 listens
0670917b 3213 for(deferredAdd_t::const_iterator i = deferredAdds[0].cbegin(); i != deferredAdds[0].cend(); ++i) {
810ff705
RG
3214 t_fdm->addReadFD(i->first, i->second);
3215 }
3216 }
3217 }
3ddb9247 3218
b0b37121 3219 registerAllStats();
674cf0f6 3220 if(!t_id) {
674cf0f6
BH
3221 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
3222 }
1bc3c142 3223
f7c1d4e3 3224 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3ddb9247 3225
f7c1d4e3 3226 bool listenOnTCP(true);
49a699c4 3227
2c78bd57 3228 time_t last_carbon=0;
3229 time_t carbonInterval=::arg().asNum("carbon-interval");
ac0995bb 3230 counter.store(0); // used to periodically execute certain tasks
f7c1d4e3 3231 for(;;) {
ac0e821b 3232 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3ddb9247 3233
3427fa8a
BH
3234 if(!(counter%500)) {
3235 MT->makeThread(houseKeeping, 0);
f7c1d4e3
BH
3236 }
3237
d2392145 3238 if(!(counter%55)) {
d8f6d49f 3239 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
bb4bdbaf 3240 expired_t expired=t_fdm->getTimeouts(g_now);
3ddb9247 3241
f7c1d4e3 3242 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
cd989c87 3243 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4957a608 3244 if(g_logCommonErrors)
cd989c87 3245 L<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toString() <<endl;
4957a608 3246 t_fdm->removeReadFD(i->first);
f7c1d4e3
BH
3247 }
3248 }
3ddb9247 3249
f7c1d4e3
BH
3250 counter++;
3251
3427fa8a 3252 if(!t_id && statsWanted) {
f7c1d4e3
BH
3253 doStats();
3254 }
3255
3256 Utility::gettimeofday(&g_now, 0);
2c78bd57 3257
3258 if(!t_id && (g_now.tv_sec - last_carbon >= carbonInterval)) {
3259 MT->makeThread(doCarbonDump, 0);
3260 last_carbon = g_now.tv_sec;
3261 }
3262
bb4bdbaf 3263 t_fdm->run(&g_now);
3ea54bf0 3264 // 'run' updates g_now for us
f7c1d4e3 3265
b8ef5c5c 3266 if(!g_weDistributeQueries || !t_id) { // if pdns distributes queries, only tid 0 should do this
5c889cf5 3267 if(listenOnTCP) {
3268 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
3269 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
3270 t_fdm->removeReadFD(*i);
3271 listenOnTCP=false;
3272 }
f7c1d4e3 3273 }
5c889cf5 3274 else {
3275 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
3276 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
3277 t_fdm->addReadFD(*i, handleNewTCPQuestion);
3278 listenOnTCP=true;
3279 }
f7c1d4e3
BH
3280 }
3281 }
3282 }
3283}
3f81d239 3284catch(PDNSException &ae) {
bb4bdbaf
BH
3285 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
3286 return 0;
3287}
3288catch(std::exception &e) {
3289 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
3290 return 0;
3291}
3292catch(...) {
3293 L<<Logger::Error<<"any other exception in main: "<<endl;
3294 return 0;
3295}
3296
51e2144e 3297
3ddb9247 3298int main(int argc, char **argv)
288f4aa9 3299{
dbd23fc2
BH
3300 g_argc = argc;
3301 g_argv = argv;
5e3de507 3302 g_stats.startupTime=time(0);
3e135495 3303 versionSetProduct(ProductRecursor);
8a63d3ce 3304 reportBasicTypes();
0007c2e5 3305 reportOtherTypes();
ea634573 3306
22030c37 3307 int ret = EXIT_SUCCESS;
caa6eefa 3308
288f4aa9 3309 try {
f888311c 3310 ::arg().set("stack-size","stack size per mthread")="200000";
2e3d8a19 3311 ::arg().set("soa-minimum-ttl","Don't change")="0";
2e3d8a19 3312 ::arg().set("no-shuffle","Don't change")="off";
2e3d8a19 3313 ::arg().set("local-port","port to listen on")="53";
32252594 3314 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
fec7dd5a 3315 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
77499b05 3316 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
a6415142 3317 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
c87e1876 3318 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
d3f809bf 3319 ::arg().set("daemon","Operate as a daemon")="no";
191f2e47 3320 ::arg().setSwitch("write-pid","Write a PID file")="yes";
9afa8662 3321 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
b6cfa948 3322 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
b18fa400 3323 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
22e0810c 3324 ::arg().set("log-common-errors","If we should log rather common errors")="no";
2e3d8a19
BH
3325 ::arg().set("chroot","switch to chroot jail")="";
3326 ::arg().set("setgid","If set, change group id to this gid for more security")="";
3327 ::arg().set("setuid","If set, change user id to this uid for more security")="";
c83ee49d 3328 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
bb4bdbaf 3329 ::arg().set("threads", "Launch this number of threads")="2";
adabfcb9 3330 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
5124de27 3331 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
d07bf7ff 3332 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
479e0976
CH
3333 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
3334 ::arg().set("api-logfile", "Location of the server logfile (used by the REST API)") = "/var/log/pdns.log";
3335 ::arg().set("api-readonly", "Disallow data modification through the REST API when set") = "no";
3336 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
d07bf7ff
PL
3337 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
3338 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
3339 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
be3e1477 3340 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
cc08b5a9 3341 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
e12f8407 3342 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
2c78bd57 3343 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
0ec489bf 3344 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
c038218b 3345 ::arg().set("quiet","Suppress logging of questions and answers")="";
f27e6356 3346 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
2e3d8a19 3347 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
fdbf35ac
BH
3348 ::arg().set("socket-owner","Owner of socket")="";
3349 ::arg().set("socket-group","Group of socket")="";
3350 ::arg().set("socket-mode", "Permissions for socket")="";
3ddb9247 3351
f0f3f0b0 3352 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
2e3d8a19
BH
3353 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
3354 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
d4fb76e9 3355 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
2e3d8a19 3356 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
85c32340 3357 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
2e3d8a19 3358 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
324dc148 3359 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
979edd70 3360 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
2e3d8a19 3361 ::arg().set("hint-file", "If set, load root hints from this file")="";
b45eb27c 3362 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
a9af3782 3363 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
c3e753c7 3364 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
1051f8a9 3365 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
927c12b0 3366 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
1051f8a9 3367 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
54c4c0d8 3368 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname")="";
92011b8f 3369 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
ba1a571d 3370 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
49a699c4 3371 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
2c95fc65 3372 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
51e2144e 3373 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3ddb9247 3374 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4e120339 3375 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
fde296a3 3376 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
0d5f0a9f 3377 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4ef015cd 3378 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
5605c067 3379 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3e61e7f7 3380 ::arg().set("lua-config-file", "More powerful configuration options")="";
644dd1da 3381
5605c067 3382 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
927c12b0
BH
3383 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
3384 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
5605c067 3385 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
ac0b4eb3 3386 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3ea54bf0 3387 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
e498dac1 3388 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
4485aa35 3389 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
08f3f638 3390 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3ddb9247 3391 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
35695d18 3392 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
3393 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
3f975863 3394 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
2fe3354d 3395 ::arg().set("ecs-add-for", "List of client netmasks for which EDNS Client Subnet will be added")="0.0.0.0/0, ::/0, " LOCAL_NETS_INVERSE;
8a3a3822 3396 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
a16c4536 3397 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
e498dac1 3398 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
4ca2d205 3399 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
e661a20b 3400 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
b3adda56 3401 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
00b8cadc 3402 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
a09a8ce0 3403 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1680";
b33c2462 3404 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1680";
aadceba8 3405 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
173d790e 3406 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
c5950146 3407 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
7c3398aa 3408 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
a09a8ce0 3409
68e6df3c 3410 ::arg().set("include-dir","Include *.conf files from this directory")="";
d67620e4 3411 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
2332f42d 3412
3413 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
2e3d8a19 3414
d705aad9 3415 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
396f126e 3416 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
d705aad9 3417
0735b17e 3418 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
d377bb54 3419 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
0735b17e 3420
8fd25133
RG
3421 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
3422
98d36505
RG
3423 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
3424
2e3d8a19 3425 ::arg().setCmd("help","Provide a helpful message");
ba1a571d 3426 ::arg().setCmd("version","Print version string");
d5141417 3427 ::arg().setCmd("config","Output blank configuration");
f27e6356 3428 L.toConsole(Logger::Info);
2e3d8a19 3429 ::arg().laxParse(argc,argv); // do a lax parse
c75a6a9e 3430
2d733c0f
CH
3431 string configname=::arg()["config-dir"]+"/recursor.conf";
3432 if(::arg()["config-name"]!="") {
3433 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
5124de27 3434 s_programname+="-"+::arg()["config-name"];
2d733c0f
CH
3435 }
3436 cleanSlashes(configname);
5124de27 3437
577cf284
BH
3438 if(::arg().mustDo("config")) {
3439 cout<<::arg().configstring()<<endl;
3440 exit(0);
3441 }
3442
3ddb9247 3443 if(!::arg().file(configname.c_str()))
c75a6a9e
BH
3444 L<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
3445
2e3d8a19 3446 ::arg().parse(argc,argv);
c836dc19 3447
f0f3f0b0
PL
3448 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() && !::arg().mustDo("api-readonly") ) {
3449 L<<Logger::Error<<"Using chroot and a writable API is not possible"<<endl;
3450 exit(EXIT_FAILURE);
3451 }
3452
3453 if (::arg()["socket-dir"].empty()) {
3454 if (::arg()["chroot"].empty())
3455 ::arg().set("socket-dir") = LOCALSTATEDIR;
3456 else
3457 ::arg().set("socket-dir") = "/";
3458 }
3459
2e3d8a19 3460 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
562588a3 3461
61d74169 3462 if(::arg().asNum("threads")==1)
3463 ::arg().set("pdns-distributes-queries")="no";
3464
2e3d8a19 3465 if(::arg().mustDo("help")) {
ff5ba4f9
WA
3466 cout<<"syntax:"<<endl<<endl;
3467 cout<<::arg().helpstring(::arg()["help"])<<endl;
3468 exit(0);
b636533b 3469 }
5e3de507 3470 if(::arg().mustDo("version")) {
ba1a571d 3471 showProductVersion();
3613a51c 3472 showBuildConfiguration();
67076869 3473 exit(0);
5e3de507 3474 }
b636533b 3475
34162f8f 3476 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
f48d7b65 3477
34162f8f
CH
3478 if (logUrgency < Logger::Error)
3479 logUrgency = Logger::Error;
f48d7b65 3480 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
3481 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
3482 }
34162f8f
CH
3483 L.setLoglevel(logUrgency);
3484 L.toConsole(logUrgency);
3485
f7c1d4e3 3486 serviceMain(argc, argv);
288f4aa9 3487 }
3f81d239 3488 catch(PDNSException &ae) {
c836dc19 3489 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
22030c37 3490 ret=EXIT_FAILURE;
288f4aa9 3491 }
fdbf35ac 3492 catch(std::exception &e) {
c836dc19 3493 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
22030c37 3494 ret=EXIT_FAILURE;
288f4aa9
BH
3495 }
3496 catch(...) {
c836dc19 3497 L<<Logger::Error<<"any other exception in main: "<<endl;
22030c37 3498 ret=EXIT_FAILURE;
288f4aa9 3499 }
3ddb9247 3500
22030c37 3501 return ret;
288f4aa9 3502}