]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/pdns_recursor.cc
turn on root-nx-trust by default, and document that
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
CommitLineData
288f4aa9
BH
1/*
2 PowerDNS Versatile Database Driven Nameserver
32cb6fd4 3 Copyright (C) 2003 - 2016 PowerDNS.COM BV
288f4aa9
BH
4
5 This program is free software; you can redistribute it and/or modify
3ddb9247 6 it under the terms of the GNU General Public License version 2
f28307ad 7 as published by the Free Software Foundation
288f4aa9 8
f782fe38
MH
9 Additionally, the license of this program contains a special
10 exception which allows to distribute the program in binary form when
11 it is linked against OpenSSL.
12
288f4aa9
BH
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
06bd9ccf 20 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
288f4aa9 21*/
caa6eefa 22
870a0fe4
AT
23#ifdef HAVE_CONFIG_H
24#include "config.h"
25#endif
3e61e7f7 26
76473b92
KM
27#include <netdb.h>
28#include <sys/stat.h>
29#include <unistd.h>
fa8fd4d2 30
2470b36e 31#include "ws-recursor.hh"
49a699c4 32#include <pthread.h>
3ea54bf0 33#include "recpacketcache.hh"
3ddb9247 34#include "utility.hh"
51e2144e 35#include "dns_random.hh"
3afde9b2 36#include "opensslsigners.hh"
288f4aa9
BH
37#include <iostream>
38#include <errno.h>
81859ba5 39#include <boost/static_assert.hpp>
288f4aa9
BH
40#include <map>
41#include <set>
97bb160b 42#include "recursor_cache.hh"
38c9ceaa 43#include "cachecleaner.hh"
288f4aa9 44#include <stdio.h>
c75a6a9e 45#include <signal.h>
288f4aa9 46#include <stdlib.h>
bb4bdbaf 47#include "misc.hh"
288f4aa9
BH
48#include "mtasker.hh"
49#include <utility>
288f4aa9
BH
50#include "arguments.hh"
51#include "syncres.hh"
88def049
BH
52#include <fcntl.h>
53#include <fstream>
3e61e7f7 54#include "sortlist.hh"
55extern SortList g_sortlist;
5c633640
BH
56#include "sstuff.hh"
57#include <boost/tuple/tuple.hpp>
58#include <boost/tuple/tuple_comparison.hpp>
72df400f 59#include <boost/shared_array.hpp>
7f1fa77d 60#include <boost/function.hpp>
5605c067 61#include <boost/algorithm/string.hpp>
8f7473d7 62#ifdef MALLOC_TRACE
63#include "malloctrace.hh"
64#endif
40a3dd64 65#include <netinet/tcp.h>
ea634573
BH
66#include "dnsparser.hh"
67#include "dnswriter.hh"
68#include "dnsrecords.hh"
f814d7c8 69#include "zoneparser-tng.hh"
1d5b3ce6 70#include "rec_channel.hh"
aaacf7f2 71#include "logger.hh"
c8ddb7c2 72#include "iputils.hh"
09e6702a 73#include "mplexer.hh"
c038218b 74#include "config.h"
808c5ef7 75#include "lua-recursor4.hh"
ba1a571d 76#include "version.hh"
79332bff 77#include "responsestats.hh"
d67620e4 78#include "secpoll-recursor.hh"
c5c066bf 79#include "dnsname.hh"
644dd1da 80#include "filterpo.hh"
81#include "rpzloader.hh"
b3f0ed10 82#include "validate-recursor.hh"
f3c18728 83#include "rec-lua-conf.hh"
5c3b5e7f 84#include "ednsoptions.hh"
85c7ca75 85#include "gettime.hh"
f3c18728 86
d9d3f9c1 87#include "rec-protobuf.hh"
aa7929a3 88
6b6720de
PL
89#ifdef HAVE_SYSTEMD
90#include <systemd/sd-daemon.h>
91#endif
92
bb4bdbaf 93__thread FDMultiplexer* t_fdm;
674cf0f6 94__thread unsigned int t_id;
09e6702a 95unsigned int g_maxTCPPerClient;
5b0ddd18 96unsigned int g_networkTimeoutMsec;
08f3f638 97uint64_t g_latencyStatSize;
09e6702a 98bool g_logCommonErrors;
e661a20b 99bool g_anyToTcp;
b33c2462 100uint16_t g_udpTruncationThreshold, g_outgoingEDNSBufsize;
a3e7b735 101__thread shared_ptr<RecursorLua4>* t_pdl;
b3adda56 102bool g_lowercaseOutgoing;
60c8afa8 103
104__thread addrringbuf_t* t_remotes, *t_servfailremotes, *t_largeanswerremotes;
105
c5c066bf 106__thread boost::circular_buffer<pair<DNSName, uint16_t> >* t_queryring, *t_servfailqueryring;
77499b05 107__thread shared_ptr<Regex>* t_traceRegex;
674cf0f6 108
aa7929a3
RG
109#ifdef HAVE_PROTOBUF
110__thread boost::uuids::random_generator* t_uuidGenerator;
111#endif
112
376effcf 113NetmaskGroup g_ednssubnets;
114SuffixMatchNode g_ednsdomains;
115
d7dae798
BH
116RecursorControlChannel s_rcc; // only active in thread 0
117
118// for communicating with our threads
49a699c4
BH
119struct ThreadPipeSet
120{
121 int writeToThread;
122 int readToThread;
123 int writeFromThread;
124 int readFromThread;
125};
3ea54bf0 126
d7dae798 127vector<ThreadPipeSet> g_pipes; // effectively readonly after startup
5c633640 128
d7dae798 129SyncRes::domainmap_t* g_initialDomainMap; // new threads needs this to be setup
49a699c4
BH
130
131#include "namespaces.hh"
3ea54bf0 132
49a699c4 133__thread MemRecursorCache* t_RC;
16beeaa4 134__thread RecursorPacketCache* t_packetCache;
1d5b3ce6
BH
135RecursorStats g_stats;
136bool g_quiet;
49a699c4 137
1bc3c142
BH
138bool g_weDistributeQueries; // if true, only 1 thread listens on the incoming query sockets
139
41942bb3 140__thread NetmaskGroup* t_allowFrom;
49a699c4
BH
141static NetmaskGroup* g_initialAllowFrom; // new thread needs to be setup with this
142
eb5bae86 143NetmaskGroup* g_dontQuery;
2d733c0f 144string s_programname="pdns_recursor";
49a699c4 145
40a3dd64
BH
146typedef vector<int> tcpListenSockets_t;
147tcpListenSockets_t g_tcpListenSockets; // shared across threads, but this is fine, never written to from a thread. All threads listen on all sockets
3159c9ef 148int g_tcpTimeout;
85c32340 149unsigned int g_maxMThreads;
183eb877 150__thread struct timeval g_now; // timestamp, updated (too) frequently
84433b79 151typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
152listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
cbc03320 153set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
18af64a8 154
d7dae798
BH
155__thread MT_t* MT; // the big MTasker
156
f8f243b0 157unsigned int g_numThreads, g_numWorkerThreads;
c3828c03 158
12cd44ee 159#define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
160// Bad Nets taken from both:
3ddb9247 161// http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
12cd44ee 162// and
163// http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
164// where such a network may not be considered a valid destination
165#define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
166#define DONT_QUERY LOCAL_NETS ", " BAD_NETS
49a699c4 167
d7dae798 168//! used to send information to a newborn mthread
ea634573 169struct DNSComboWriter {
3ddb9247 170 DNSComboWriter(const char* data, uint16_t len, const struct timeval& now) : d_mdp(data, len), d_now(now),
232f0877 171 d_tcp(false), d_socket(-1)
ea634573
BH
172 {}
173 MOADNSParser d_mdp;
00c9b8c1 174 void setRemote(const ComboAddress* sa)
ea634573 175 {
37d3f960 176 d_remote=*sa;
ea634573
BH
177 }
178
b71b60ee 179 void setLocal(const ComboAddress& sa)
180 {
181 d_local=sa;
182 }
183
184
ea634573
BH
185 void setSocket(int sock)
186 {
187 d_socket=sock;
188 }
a1754c6a
BH
189
190 string getRemote() const
191 {
37d3f960 192 return d_remote.toString();
a1754c6a
BH
193 }
194
c9e9e5e0 195 struct timeval d_now;
b71b60ee 196 ComboAddress d_remote, d_local;
aa7929a3
RG
197#ifdef HAVE_PROTOBUF
198 boost::uuids::uuid d_uuid;
02b47f43 199 Netmask d_ednssubnet;
aa7929a3 200#endif
ea634573
BH
201 bool d_tcp;
202 int d_socket;
a82f68f0 203 int d_tag{0};
49a3500d 204 string d_query;
cd989c87 205 shared_ptr<TCPConnection> d_tcpConnection;
e8340d27 206 vector<pair<uint16_t, string> > d_ednsOpts;
02b47f43 207 std::vector<std::string> d_policyTags;
ea634573
BH
208};
209
210
288f4aa9
BH
211ArgvMap &arg()
212{
213 static ArgvMap theArg;
214 return theArg;
215}
4ef015cd 216
09e6702a 217
d8f6d49f 218void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 219
50c81227 220// -1 is error, 0 is timeout, 1 is success
3ddb9247 221int asendtcp(const string& data, Socket* sock)
5c633640
BH
222{
223 PacketID pident;
224 pident.sock=sock;
225 pident.outMSG=data;
3ddb9247 226
bb4bdbaf 227 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
50c81227 228 string packet;
5c633640 229
5b0ddd18 230 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
23db0a09 231
9170fbaf 232 if(!ret || ret==-1) { // timeout
bb4bdbaf 233 t_fdm->removeWriteFD(sock->getHandle());
5c633640 234 }
50c81227
BH
235 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
236 return -1;
237 }
9170fbaf 238 return ret;
5c633640
BH
239}
240
d8f6d49f 241void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 242
9170fbaf 243// -1 is error, 0 is timeout, 1 is success
a683e8bd 244int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
288f4aa9 245{
50c81227 246 data.clear();
5c633640
BH
247 PacketID pident;
248 pident.sock=sock;
249 pident.inNeeded=len;
825fa717 250 pident.inIncompleteOkay=incompleteOkay;
bb4bdbaf 251 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
5c633640 252
bb4bdbaf 253 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
9170fbaf 254 if(!ret || ret==-1) { // timeout
bb4bdbaf 255 t_fdm->removeReadFD(sock->getHandle());
288f4aa9 256 }
50c81227
BH
257 else if(data.empty()) {// error, EOF or other
258 return -1;
259 }
260
9170fbaf 261 return ret;
288f4aa9
BH
262}
263
fba1e944 264void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
4465e941 265{
fba1e944 266 PacketID pident=*any_cast<PacketID>(&var);
4465e941 267 char resp[512];
a683e8bd 268 ssize_t ret=recv(fd, resp, sizeof(resp), 0);
4465e941 269 t_fdm->removeReadFD(fd);
270 if(ret >= 0) {
a683e8bd 271 string data(resp, (size_t) ret);
fba1e944 272 MT->sendEvent(pident, &data);
4465e941 273 }
274 else {
fba1e944 275 string empty;
276 MT->sendEvent(pident, &empty);
277 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
4465e941 278 }
279}
fba1e944 280string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
4465e941 281{
4465e941 282 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
283 s.setNonBlocking();
284 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
285
286 s.bind(local);
287 s.connect(dest);
4465e941 288 s.send(query);
289
290 PacketID pident;
291 pident.sock=&s;
292 pident.type=0;
fba1e944 293 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
4465e941 294
295 string data;
fba1e944 296
4465e941 297 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
fba1e944 298
4465e941 299 if(!ret || ret==-1) { // timeout
4465e941 300 t_fdm->removeReadFD(s.getHandle());
301 }
302 else if(data.empty()) {// error, EOF or other
fba1e944 303 // we could special case this
4465e941 304 return data;
305 }
4465e941 306 return data;
307}
308
309
3ddb9247 310vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
046c5a5d 311const ComboAddress g_local4("0.0.0.0"), g_local6("::");
1652a63e 312
d7dae798 313//! pick a random query local address
1652a63e 314ComboAddress getQueryLocalAddress(int family, uint16_t port)
5a38281c 315{
1652a63e 316 ComboAddress ret;
5a38281c 317 if(family==AF_INET) {
3ddb9247 318 if(g_localQueryAddresses4.empty())
1652a63e 319 ret = g_local4;
3ddb9247 320 else
1652a63e
BH
321 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
322 ret.sin4.sin_port = htons(port);
5a38281c
BH
323 }
324 else {
325 if(g_localQueryAddresses6.empty())
1652a63e
BH
326 ret = g_local6;
327 else
328 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
3ddb9247 329
1652a63e 330 ret.sin6.sin6_port = htons(port);
5a38281c 331 }
1652a63e 332 return ret;
5a38281c 333}
4ef015cd 334
d8f6d49f 335void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
09e6702a 336
d7dae798
BH
337void setSocketBuffer(int fd, int optname, uint32_t size)
338{
339 uint32_t psize=0;
340 socklen_t len=sizeof(psize);
3ddb9247 341
d7dae798
BH
342 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
343 L<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
3ddb9247 344 return;
d7dae798
BH
345 }
346
347 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
c057bfaa 348 L<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
d7dae798
BH
349}
350
351
352static void setSocketReceiveBuffer(int fd, uint32_t size)
353{
354 setSocketBuffer(fd, SO_RCVBUF, size);
355}
356
357static void setSocketSendBuffer(int fd, uint32_t size)
358{
359 setSocketBuffer(fd, SO_SNDBUF, size);
360}
361
362
4ef015cd
BH
363// you can ask this class for a UDP socket to send a query from
364// this socket is not yours, don't even think about deleting it
365// but after you call 'returnSocket' on it, don't assume anything anymore
366class UDPClientSocks
367{
4ef015cd 368 unsigned int d_numsocks;
4ef015cd 369public:
e2642526 370 UDPClientSocks() : d_numsocks(0)
4ef015cd
BH
371 {
372 }
373
996c89cc 374 typedef set<int> socks_t;
4ef015cd
BH
375 socks_t d_socks;
376
2ee280cf 377 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
d8f6d49f 378 int getSocket(const ComboAddress& toaddr, int* fd)
4ef015cd 379 {
d8f6d49f
BH
380 *fd=makeClientSocket(toaddr.sin4.sin_family);
381 if(*fd < 0) // temporary error - receive exception otherwise
2ee280cf 382 return -2;
d8f6d49f
BH
383
384 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
385 int err = errno;
41ff43f8 386 // returnSocket(*fd);
3897b9e1 387 closesocket(*fd);
d8f6d49f 388 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
4957a608 389 return -2;
998a4334 390 return -1;
d8f6d49f 391 }
998a4334 392
d8f6d49f 393 d_socks.insert(*fd);
998a4334 394 d_numsocks++;
d8f6d49f 395 return 0;
4ef015cd
BH
396 }
397
095c3045
BH
398 void returnSocket(int fd)
399 {
400 socks_t::iterator i=d_socks.find(fd);
34801ab1 401 if(i==d_socks.end()) {
335da0ba 402 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd)+") not in the pool");
34801ab1 403 }
bb4bdbaf 404 returnSocketLocked(i);
095c3045
BH
405 }
406
4ef015cd 407 // return a socket to the pool, or simply erase it
bb4bdbaf 408 void returnSocketLocked(socks_t::iterator& i)
4ef015cd 409 {
600fc20b 410 if(i==d_socks.end()) {
3f81d239 411 throw PDNSException("Trying to return a socket not in the pool");
600fc20b 412 }
80baf329 413 try {
bb4bdbaf 414 t_fdm->removeReadFD(*i);
80baf329
BH
415 }
416 catch(FDMultiplexerException& e) {
bb4bdbaf 417 // we sometimes return a socket that has not yet been assigned to t_fdm
80baf329 418 }
3897b9e1 419 closesocket(*i);
3ddb9247 420
998a4334
BH
421 d_socks.erase(i++);
422 --d_numsocks;
4ef015cd 423 }
d8f6d49f
BH
424
425 // returns -1 for errors which might go away, throws for ones that won't
bb4bdbaf 426 static int makeClientSocket(int family)
d8f6d49f 427 {
a683e8bd 428 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
42c235e5 429
d8f6d49f
BH
430 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
431 return ret;
3ddb9247
PD
432
433 if(ret<0)
335da0ba 434 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
36855b53 435
7eb73ffa 436 // setCloseOnExec(ret); // we're not going to exec
5a38281c 437
d8f6d49f 438 int tries=10;
3aa91c3e 439 ComboAddress sin;
d8f6d49f 440 while(--tries) {
1652a63e 441 uint16_t port;
3ddb9247 442
d8f6d49f 443 if(tries==1) // fall back to kernel 'random'
4957a608 444 port = 0;
1652a63e
BH
445 else
446 port = 1025 + dns_random(64510);
5a38281c 447
3aa91c3e 448 sin=getQueryLocalAddress(family, port); // does htons for us
5a38281c 449
3ddb9247 450 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
4957a608 451 break;
d8f6d49f
BH
452 }
453 if(!tries)
3aa91c3e 454 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
3ddb9247 455
3897b9e1 456 setNonBlocking(ret);
d8f6d49f
BH
457 return ret;
458 }
49a699c4
BH
459};
460
461static __thread UDPClientSocks* t_udpclientsocks;
4ef015cd 462
288f4aa9 463/* these two functions are used by LWRes */
34801ab1 464// -2 is OS error, -1 is error that depends on the remote, > 0 is success
a683e8bd 465int asendto(const char *data, size_t len, int flags,
3ddb9247 466 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
288f4aa9 467{
34801ab1
BH
468
469 PacketID pident;
787e5eab
BH
470 pident.domain = domain;
471 pident.remote = toaddr;
472 pident.type = qtype;
34801ab1
BH
473
474 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
475 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
476
477 for(; chain.first != chain.second; chain.first++) {
478 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
e27e91a8 479 /*
4665c31e
BH
480 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
481 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
4957a608 482 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
e27e91a8 483 */
34801ab1
BH
484 chain.first->key.chain.insert(id); // we can chain
485 *fd=-1; // gets used in waitEvent / sendEvent later on
486 return 1;
487 }
488 }
489
49a699c4 490 int ret=t_udpclientsocks->getSocket(toaddr, fd);
d8f6d49f
BH
491 if(ret < 0)
492 return ret;
34801ab1 493
998a4334
BH
494 pident.fd=*fd;
495 pident.id=id;
3ddb9247 496
bb4bdbaf
BH
497 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
498 ret = send(*fd, data, len, 0);
499
5b0ddd18 500 int tmp = errno;
bb4bdbaf 501
7302ed0a 502 if(ret < 0)
49a699c4 503 t_udpclientsocks->returnSocket(*fd);
bb4bdbaf 504
5b0ddd18 505 errno = tmp; // this is for logging purposes only
7302ed0a 506 return ret;
288f4aa9
BH
507}
508
9170fbaf 509// -1 is error, 0 is timeout, 1 is success
a683e8bd 510int arecvfrom(char *data, size_t len, int flags, const ComboAddress& fromaddr, size_t *d_len,
c5c066bf 511 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
288f4aa9 512{
0d5f0a9f 513 static optional<unsigned int> nearMissLimit;
3ddb9247 514 if(!nearMissLimit)
0d5f0a9f
BH
515 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
516
288f4aa9 517 PacketID pident;
4ef015cd 518 pident.fd=fd;
288f4aa9 519 pident.id=id;
0d5f0a9f 520 pident.domain=domain;
787e5eab 521 pident.type = qtype;
996c89cc 522 pident.remote=fromaddr;
b636533b 523
288f4aa9 524 string packet;
5b0ddd18 525 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
34801ab1 526
9170fbaf 527 if(ret > 0) {
996c89cc 528 if(packet.empty()) // means "error"
3ddb9247 529 return -1;
998a4334 530
a683e8bd 531 *d_len=packet.size();
9170fbaf 532 memcpy(data,packet.c_str(),min(len,*d_len));
0d5f0a9f 533 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
996c89cc 534 L<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
0d5f0a9f 535 g_stats.spoofCount++;
35ce8576
BH
536 return -1;
537 }
288f4aa9 538 }
09e6702a 539 else {
34801ab1 540 if(fd >= 0)
49a699c4 541 t_udpclientsocks->returnSocket(fd);
09e6702a 542 }
9170fbaf 543 return ret;
288f4aa9
BH
544}
545
aa4e4cbf 546
87a5ea63 547string s_pidfname;
88def049
BH
548static void writePid(void)
549{
191f2e47 550 if(!::arg().mustDo("write-pid"))
551 return;
18e7758c 552 ofstream of(s_pidfname.c_str(), std::ios_base::app);
88def049 553 if(of)
705f31ae 554 of<< Utility::getpid() <<endl;
88def049 555 else
c057bfaa 556 L<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
88def049
BH
557}
558
bd0289fc
BH
559typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
560tcpClientCounts_t __thread* t_tcpClientCounts;
0e9d9ce2 561
cd989c87 562TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : d_remote(addr), d_fd(fd)
3ddb9247
PD
563{
564 ++s_currentConnections;
cd989c87 565 (*t_tcpClientCounts)[d_remote]++;
0e408828 566}
cd989c87
BH
567
568TCPConnection::~TCPConnection()
0e408828 569{
3ddb9247 570 if(closesocket(d_fd) < 0)
cd989c87 571 unixDie("closing socket for TCPConnection");
3ddb9247 572 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
cd989c87 573 t_tcpClientCounts->erase(d_remote);
1bc9e6bd 574 --s_currentConnections;
0e408828 575}
0e9d9ce2 576
3ddb9247 577AtomicCounter TCPConnection::s_currentConnections;
d8f6d49f 578void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
6dcd28c3 579
92011b8f 580// the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
c5c066bf 581void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
2cc13433 582{
92011b8f 583 if(packetsize > 1000 && t_largeanswerremotes)
584 t_largeanswerremotes->push_back(remote);
2cc13433
BH
585 switch(res) {
586 case RCode::ServFail:
92011b8f 587 if(t_servfailremotes) {
588 t_servfailremotes->push_back(remote);
589 if(query) // packet cache
590 t_servfailqueryring->push_back(make_pair(*query, qtype));
591 }
2cc13433
BH
592 g_stats.servFails++;
593 break;
594 case RCode::NXDomain:
595 g_stats.nxDomains++;
596 break;
597 case RCode::NoError:
598 g_stats.noErrors++;
599 break;
600 }
601}
602
a903b39c 603static string makeLoginfo(DNSComboWriter* dc)
604try
605{
5ad5bb7d 606 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->d_remote.toString())+")";
a903b39c 607}
608catch(...)
609{
610 return "Exception making error message for exception";
611}
612
aa7929a3 613#ifdef HAVE_PROTOBUF
e1c8a4bb 614static void protobufLogQuery(const std::shared_ptr<RemoteLogger>& logger, uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::string appliedPolicy, const std::vector<std::string>& policyTags)
aa7929a3 615{
e1c8a4bb
RG
616 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
617 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
618 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
d9d3f9c1 619 message.setEDNSSubnet(ednssubnet);
02b47f43 620
02b47f43 621 if (!appliedPolicy.empty()) {
d9d3f9c1 622 message.setAppliedPolicy(appliedPolicy);
02b47f43
RG
623 }
624 if (!policyTags.empty()) {
d9d3f9c1 625 message.setPolicyTags(policyTags);
02b47f43 626 }
aa7929a3 627
d9d3f9c1 628// cerr <<message.toDebugString()<<endl;
aa7929a3 629 std::string str;
d9d3f9c1 630 message.serialize(str);
aa7929a3 631 logger->queueData(str);
aa7929a3
RG
632}
633
d9d3f9c1 634static void protobufLogResponse(const std::shared_ptr<RemoteLogger>& logger, const RecProtoBufMessage& message)
aa7929a3 635{
d9d3f9c1 636// cerr <<message.toDebugString()<<endl;
aa7929a3 637 std::string str;
d9d3f9c1 638 message.serialize(str);
aa7929a3 639 logger->queueData(str);
aa7929a3
RG
640}
641#endif
642
288f4aa9
BH
643void startDoResolve(void *p)
644{
7b1469bb 645 DNSComboWriter* dc=(DNSComboWriter *)p;
288f4aa9 646 try {
92011b8f 647 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
648
b18ace73 649 uint32_t maxanswersize= dc->d_tcp ? 65535 : min((uint16_t) 512, g_udpTruncationThreshold);
7f7b8d55 650 EDNSOpts edo;
8e079f3a 651 bool haveEDNS=false;
652 if(getEDNSOpts(dc->d_mdp, &edo)) {
653 if(!dc->d_tcp)
654 maxanswersize = min(edo.d_packetsize, g_udpTruncationThreshold);
e8340d27 655 dc->d_ednsOpts = edo.d_options;
8e079f3a 656 haveEDNS=true;
10321a98 657 }
e325f20c 658 vector<DNSRecord> ret;
ea634573 659 vector<uint8_t> packet;
b23b8614 660
ad42489c 661 auto luaconfsLocal = g_luaconfs.getLocal();
83971888 662 std::string appliedPolicy;
d9d3f9c1 663 RecProtoBufMessage pbMessage(RecProtoBufMessage::Response);
aa7929a3 664#ifdef HAVE_PROTOBUF
d9d3f9c1 665 if (luaconfsLocal->protobufServer) {
e1c8a4bb
RG
666 Netmask requestorNM(dc->d_remote, dc->d_remote.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
667 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
668 pbMessage.update(dc->d_uuid, &requestor, &dc->d_local, dc->d_tcp, dc->d_mdp.d_header.id);
d9d3f9c1
RG
669 pbMessage.setEDNSSubnet(dc->d_ednssubnet);
670 pbMessage.setQuestion(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
671 }
672#endif /* HAVE_PROTOBUF */
ad42489c 673
3ddb9247 674 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
ea634573
BH
675
676 pw.getHeader()->aa=0;
677 pw.getHeader()->ra=1;
c154c8a4 678 pw.getHeader()->qr=1;
bb4bdbaf 679 pw.getHeader()->tc=0;
ea634573 680 pw.getHeader()->id=dc->d_mdp.d_header.id;
10321a98 681 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
57769f13 682 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
ea634573 683
1059837e 684 // DO NOT MOVE THIS CODE UP - DNSPacketWriter needs to get the original-cased version
b3adda56
PD
685 if (g_lowercaseOutgoing)
686 dc->d_mdp.d_qname = DNSName(toLower(dc->d_mdp.d_qname.toString()));
687
904d3219
PD
688 uint32_t minTTL=std::numeric_limits<uint32_t>::max();
689
690 SyncRes sr(dc->d_now);
2e921ec6 691 bool DNSSECOK=false;
3457a2a0 692 if(t_pdl) {
693 sr.setLuaEngine(*t_pdl);
4ea94941 694 sr.d_requestor=dc->d_remote;
3457a2a0 695 }
2e921ec6 696
9eec8c98 697 if(g_dnssecmode != DNSSECMode::Off) {
2e921ec6 698 sr.d_doDNSSEC=true;
9eec8c98
PL
699
700 // Does the requestor want DNSSEC records?
701 if(edo.d_Z & EDNSOpts::DNSSECOK) {
702 DNSSECOK=true;
703 g_stats.dnssecQueries++;
704 }
705 } else {
706 // Ignore the client-set CD flag
707 pw.getHeader()->cd=0;
5b9853c9 708 }
57769f13 709
904d3219
PD
710 bool tracedQuery=false; // we could consider letting Lua know about this too
711 bool variableAnswer = false;
9fc36e90 712 bool shouldNotValidate = false;
904d3219 713
56b4d21b 714 int res;
39ec5d29 715 DNSFilterEngine::Policy dfepol;
716 DNSRecord spoofed;
e661a20b 717 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
56b4d21b
PD
718 pw.getHeader()->tc = 1;
719 res = 0;
720 variableAnswer = true;
e661a20b
PD
721 goto sendit;
722 }
723
c5c066bf 724 if(t_traceRegex->get() && (*t_traceRegex)->match(dc->d_mdp.d_qname.toString())) {
77499b05
BH
725 sr.setLogMode(SyncRes::Store);
726 tracedQuery=true;
727 }
3ddb9247 728
8f7473d7 729
976ec823 730 if(!g_quiet || tracedQuery) {
461df9d2 731 L<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
976ec823 732 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
969cbf08 733#ifdef HAVE_PROTOBUF
02b47f43
RG
734 if(!dc->d_ednssubnet.empty()) {
735 L<<" (ecs "<<dc->d_ednssubnet.toString()<<")";
976ec823 736 }
969cbf08 737#endif
976ec823 738 L<<endl;
739 }
c75a6a9e 740
fededf47 741 sr.setId(MT->getTid());
67828389 742 if(!dc->d_mdp.d_header.rd)
c836dc19
BH
743 sr.setCacheOnly();
744
84433b79 745
3ddb9247 746 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
e325f20c 747
ad42489c 748 dfepol = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_remote);
39ec5d29 749
750 switch(dfepol.d_kind) {
751 case DNSFilterEngine::PolicyKind::NoAction:
644dd1da 752 break;
39ec5d29 753 case DNSFilterEngine::PolicyKind::Drop:
644dd1da 754 g_stats.policyDrops++;
755 delete dc;
756 dc=0;
757 return;
39ec5d29 758 case DNSFilterEngine::PolicyKind::NXDOMAIN:
644dd1da 759 res=RCode::NXDomain;
83971888 760 appliedPolicy=dfepol.d_name;
644dd1da 761 goto haveAnswer;
762
39ec5d29 763 case DNSFilterEngine::PolicyKind::NODATA:
764 res=RCode::NoError;
83971888 765 appliedPolicy=dfepol.d_name;
39ec5d29 766 goto haveAnswer;
767
768 case DNSFilterEngine::PolicyKind::Custom:
644dd1da 769 res=RCode::NoError;
39ec5d29 770 spoofed.d_name=dc->d_mdp.d_qname;
5a1f298f 771 spoofed.d_type=dfepol.d_custom->getType();
3876ee44 772 spoofed.d_ttl = dfepol.d_ttl;
39ec5d29 773 spoofed.d_class = 1;
774 spoofed.d_content = dfepol.d_custom;
589ad24b 775 spoofed.d_place = DNSResourceRecord::ANSWER;
39ec5d29 776 ret.push_back(spoofed);
83971888 777 appliedPolicy=dfepol.d_name;
644dd1da 778 goto haveAnswer;
779
39ec5d29 780
781 case DNSFilterEngine::PolicyKind::Truncate:
644dd1da 782 if(!dc->d_tcp) {
783 res=RCode::NoError;
784 pw.getHeader()->tc=1;
83971888 785 appliedPolicy=dfepol.d_name;
644dd1da 786 goto haveAnswer;
787 }
788 break;
789 }
790
02b47f43 791 if(!t_pdl->get() || !(*t_pdl)->preresolve(dc->d_remote, dc->d_local, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_tcp, ret, dc->d_ednsOpts.empty() ? 0 : &dc->d_ednsOpts, dc->d_tag, &appliedPolicy, &dc->d_policyTags, res, &variableAnswer)) {
44971ca0
PD
792 try {
793 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
9fc36e90 794 shouldNotValidate = sr.wasOutOfBand();
44971ca0
PD
795 }
796 catch(ImmediateServFailException &e) {
854d44e3 797 if(g_logCommonErrors)
798 L<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
44971ca0
PD
799 res = RCode::ServFail;
800 }
4485aa35 801
ad42489c 802 dfepol = luaconfsLocal->dfe.getPostPolicy(ret);
39ec5d29 803 switch(dfepol.d_kind) {
804 case DNSFilterEngine::PolicyKind::NoAction:
644dd1da 805 break;
39ec5d29 806 case DNSFilterEngine::PolicyKind::Drop:
644dd1da 807 g_stats.policyDrops++;
808 delete dc;
809 dc=0;
810 return;
39ec5d29 811 case DNSFilterEngine::PolicyKind::NXDOMAIN:
644dd1da 812 ret.clear();
813 res=RCode::NXDomain;
83971888 814 appliedPolicy=dfepol.d_name;
644dd1da 815 goto haveAnswer;
816
39ec5d29 817 case DNSFilterEngine::PolicyKind::NODATA:
644dd1da 818 ret.clear();
819 res=RCode::NoError;
83971888 820 appliedPolicy=dfepol.d_name;
644dd1da 821 goto haveAnswer;
822
39ec5d29 823 case DNSFilterEngine::PolicyKind::Truncate:
644dd1da 824 if(!dc->d_tcp) {
825 ret.clear();
826 res=RCode::NoError;
827 pw.getHeader()->tc=1;
83971888 828 appliedPolicy=dfepol.d_name;
644dd1da 829 goto haveAnswer;
830 }
831 break;
39ec5d29 832
833 case DNSFilterEngine::PolicyKind::Custom:
ad42489c 834 ret.clear();
39ec5d29 835 res=RCode::NoError;
836 spoofed.d_name=dc->d_mdp.d_qname;
5a1f298f 837 spoofed.d_type=dfepol.d_custom->getType();
3876ee44 838 spoofed.d_ttl = dfepol.d_ttl;
39ec5d29 839 spoofed.d_class = 1;
840 spoofed.d_content = dfepol.d_custom;
589ad24b 841 spoofed.d_place = DNSResourceRecord::ANSWER;
39ec5d29 842 ret.push_back(spoofed);
83971888 843 appliedPolicy=dfepol.d_name;
39ec5d29 844 goto haveAnswer;
644dd1da 845 }
a3e7b735 846
674cf0f6 847 if(t_pdl->get()) {
bd53ea9d 848 if(res == RCode::NoError) {
e325f20c 849 auto i=ret.cbegin();
850 for(; i!= ret.cend(); ++i)
e693ff5a 851 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
232f0877 852 break;
e325f20c 853 if(i == ret.cend())
c672b54a 854 (*t_pdl)->nodata(dc->d_remote, dc->d_local, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_tcp, ret, res, &variableAnswer);
a3e7b735 855 }
856 else if(res == RCode::NXDomain)
c672b54a 857 (*t_pdl)->nxdomain(dc->d_remote, dc->d_local, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_tcp, ret, res, &variableAnswer);
644dd1da 858
a3e7b735 859
02b47f43 860 (*t_pdl)->postresolve(dc->d_remote, dc->d_local, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_tcp, ret, &appliedPolicy, &dc->d_policyTags, res, &variableAnswer);
d2322a5e 861 }
4485aa35 862 }
644dd1da 863 haveAnswer:;
3e8216c8 864 if(res == PolicyDecision::DROP) {
e9c2ad3a 865 g_stats.policyDrops++;
ae7e77ad 866 delete dc;
867 dc=0;
868 return;
3ddb9247 869 }
3e8216c8 870 if(tracedQuery || res == PolicyDecision::PASS || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1dc8f4d0 871 {
85ffbc53
PD
872 string trace(sr.getTrace());
873 if(!trace.empty()) {
874 vector<string> lines;
875 boost::split(lines, trace, boost::is_any_of("\n"));
1dc8f4d0 876 for(const string& line : lines) {
85ffbc53
PD
877 if(!line.empty())
878 L<<Logger::Warning<< line << endl;
879 }
880 }
881 }
3ddb9247 882
b3f0ed10 883 if(res == PolicyDecision::PASS) { // XXX what does this MEAN? Why servfail on PASS?
0fe1d080
PD
884 pw.getHeader()->rcode=RCode::ServFail;
885 // no commit here, because no record
886 g_stats.servFails++;
887 }
288f4aa9 888 else {
ea634573 889 pw.getHeader()->rcode=res;
92011b8f 890
f3fe4ae6 891 // Does the validation mode or query demand validation?
9fc36e90 892 if(!shouldNotValidate && (g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process))) {
b25cae9a 893 try {
f3fe4ae6 894 if(sr.doLog()) {
5fc44cd2 895 L<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<endl;
2e921ec6 896 }
b25cae9a 897
898 auto state=validateRecords(ret);
899 if(state == Secure) {
2e921ec6 900 if(sr.doLog()) {
5fc44cd2 901 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates correctly"<<endl;
2e921ec6 902 }
b25cae9a 903
904 // Is the query source interested in the value of the ad-bit?
885c8881 905 if (dc->d_mdp.d_header.ad || DNSSECOK)
b25cae9a 906 pw.getHeader()->ad=1;
907 }
908 else if(state == Insecure) {
f3fe4ae6 909 if(sr.doLog()) {
5fc44cd2 910 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates as Insecure"<<endl;
12ce523e 911 }
b25cae9a 912
913 pw.getHeader()->ad=0;
f3fe4ae6 914 }
b25cae9a 915 else if(state == Bogus) {
c87e1876 916 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
5fc44cd2 917 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates as Bogus"<<endl;
b25cae9a 918 }
919
920 // Does the query or validation mode sending out a SERVFAIL on validation errors?
885c8881 921 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
b25cae9a 922 if(sr.doLog()) {
5fc44cd2 923 L<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
b25cae9a 924 }
925
926 pw.getHeader()->rcode=RCode::ServFail;
927 goto sendit;
928 } else {
929 if(sr.doLog()) {
5fc44cd2 930 L<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
b25cae9a 931 }
932 }
933 }
934 }
935 catch(ImmediateServFailException &e) {
936 if(g_logCommonErrors)
5fc44cd2 937 L<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
b25cae9a 938 pw.getHeader()->rcode=RCode::ServFail;
939 goto sendit;
f3fe4ae6 940 }
b3f0ed10 941 }
942
c154c8a4 943 if(ret.size()) {
92476c8b 944 orderAndShuffle(ret);
ad42489c 945 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_remote)) {
3e61e7f7 946 sort(ret.begin(), ret.end(), *sl);
947 variableAnswer=true;
948 }
8e079f3a 949 }
950 if(haveEDNS) {
951 ret.push_back(makeOpt(edo.d_packetsize, 0, edo.d_Z));
952 }
0afa32d4
RG
953
954 bool needCommit = false;
8e079f3a 955 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
3e80ebce
KM
956 if( ! DNSSECOK &&
957 ( i->d_type == QType::NSEC3 ||
958 (
959 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
960 (
961 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
962 i->d_place != DNSResourceRecord::ANSWER
963 )
964 )
965 )
966 ) {
2e921ec6 967 continue;
3e80ebce
KM
968 }
969
8e079f3a 970 pw.startRecord(i->d_name, i->d_type, i->d_ttl, i->d_class, i->d_place);
971 if(i->d_type != QType::OPT) // their TTL ain't real
972 minTTL = min(minTTL, i->d_ttl);
973 i->d_content->toPacket(pw);
974 if(pw.size() > maxanswersize) {
975 pw.rollback();
976 if(i->d_place==DNSResourceRecord::ANSWER) // only truncate if we actually omitted parts of the answer
add935a2 977 {
4957a608 978 pw.getHeader()->tc=1;
add935a2
PD
979 pw.truncate();
980 }
8e079f3a 981 goto sendit; // need to jump over pw.commit
982 }
0afa32d4 983 needCommit = true;
aa7929a3 984#ifdef HAVE_PROTOBUF
d9d3f9c1
RG
985 if(luaconfsLocal->protobufServer && (i->d_type == QType::A || i->d_type == QType::AAAA || i->d_type == QType::CNAME)) {
986 pbMessage.addRR(*i);
aa7929a3
RG
987 }
988#endif
ea634573 989 }
0afa32d4 990 if(needCommit)
8e079f3a 991 pw.commit();
288f4aa9 992 }
10321a98 993 sendit:;
b3f0ed10 994
79332bff 995 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
92011b8f 996 updateResponseStats(res, dc->d_remote, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
aa7929a3
RG
997#ifdef HAVE_PROTOBUF
998 if (luaconfsLocal->protobufServer) {
d9d3f9c1
RG
999 pbMessage.setBytes(packet.size());
1000 pbMessage.setResponseCode(pw.getHeader()->rcode);
1001 pbMessage.setAppliedPolicy(appliedPolicy);
1002 pbMessage.setPolicyTags(dc->d_policyTags);
58307a85 1003 pbMessage.setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
02b47f43 1004 protobufLogResponse(luaconfsLocal->protobufServer, pbMessage);
aa7929a3
RG
1005 }
1006#endif
ea634573 1007 if(!dc->d_tcp) {
b71b60ee 1008 struct msghdr msgh;
1009 struct iovec iov;
1010 char cbuf[256];
1011 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
2c0af54f
PD
1012 msgh.msg_control=NULL;
1013
cbc03320 1014 if(g_fromtosockets.count(dc->d_socket)) {
fbe2a2e0 1015 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
2c0af54f 1016 }
cbc03320 1017 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
1018 L<<Logger::Warning<<"Sending UDP reply to client "<<dc->d_remote.toStringWithPort()<<" failed with: "<<strerror(errno)<<endl;
3762e821 1019 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
d9d3f9c1 1020 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_query,
76e2b9e3 1021 string((const char*)&*packet.begin(), packet.size()),
3ddb9247 1022 g_now.tv_sec,
76e2b9e3 1023 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
d9d3f9c1
RG
1024 min(minTTL,SyncRes::s_packetcachettl),
1025 &pbMessage);
1051f8a9 1026 }
3762e821 1027 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
feccc9fc 1028 }
9c495589
BH
1029 else {
1030 char buf[2];
ea634573
BH
1031 buf[0]=packet.size()/256;
1032 buf[1]=packet.size()%256;
feccc9fc 1033
c038218b 1034 Utility::iovec iov[2];
feccc9fc 1035
ea634573
BH
1036 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1037 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
feccc9fc 1038
c038218b 1039 int ret=Utility::writev(dc->d_socket, iov, 2);
0e9d9ce2 1040 bool hadError=true;
feccc9fc 1041
3ddb9247 1042 if(ret == 0)
18af64a8 1043 L<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
3ddb9247 1044 else if(ret < 0 )
18af64a8 1045 L<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
ea634573 1046 else if((unsigned int)ret != 2 + packet.size())
18af64a8 1047 L<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<ret<<")"<<endl;
0e9d9ce2 1048 else
18af64a8 1049 hadError=false;
3ddb9247 1050
09e6702a 1051 // update tcp connection status, either by closing or moving to 'BYTE0'
3ddb9247 1052
09e6702a 1053 if(hadError) {
18af64a8 1054 // no need to remove us from FDM, we weren't there
c36bc97a 1055 dc->d_socket = -1;
09e6702a 1056 }
a6ae6414 1057 else {
cd989c87 1058 dc->d_tcpConnection->state=TCPConnection::BYTE0;
18af64a8 1059 Utility::gettimeofday(&g_now, 0); // needs to be updated
cd989c87
BH
1060 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection);
1061 t_fdm->setReadTTD(dc->d_socket, g_now, g_tcpTimeout);
0e9d9ce2 1062 }
9c495589 1063 }
3ddb9247 1064
1d5b3ce6 1065 if(!g_quiet) {
461df9d2 1066 L<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
ea634573 1067 L<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
9de3e034 1068 sr.d_totUsec/1000.0<<" ms, "<<
1069 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<<res<<endl;
c75a6a9e 1070 }
b23b8614 1071
3ddb9247 1072 sr.d_outqueries ? t_RC->cacheMisses++ : t_RC->cacheHits++;
fe213470
BH
1073 float spent=makeFloat(sr.d_now-dc->d_now);
1074 if(spent < 0.001)
1075 g_stats.answers0_1++;
1076 else if(spent < 0.010)
1077 g_stats.answers1_10++;
1078 else if(spent < 0.1)
1079 g_stats.answers10_100++;
1080 else if(spent < 1.0)
1081 g_stats.answers100_1000++;
1082 else
1083 g_stats.answersSlow++;
1084
574af7ea 1085 uint64_t newLat=(uint64_t)(spent*1000000);
b841314c 1086 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
08f3f638 1087 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
0a6b1027 1088 // no worries, we do this for packet cache hits elsewhere
c6d04bdc 1089 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
ea634573 1090 delete dc;
c36bc97a 1091 dc=0;
288f4aa9 1092 }
3f81d239 1093 catch(PDNSException &ae) {
a903b39c 1094 L<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
c36bc97a 1095 delete dc;
288f4aa9 1096 }
7b1469bb 1097 catch(MOADNSException& e) {
a903b39c 1098 L<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<e.what()<<endl;
c36bc97a 1099 delete dc;
7b1469bb 1100 }
fdbf35ac 1101 catch(std::exception& e) {
a903b39c 1102 L<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what()<<endl;
c36bc97a 1103 delete dc;
c154c8a4 1104 }
288f4aa9 1105 catch(...) {
a903b39c 1106 L<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
288f4aa9 1107 }
3ddb9247 1108
ec6eacbc 1109 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
288f4aa9
BH
1110}
1111
677e2a46 1112void makeControlChannelSocket(int processNum=-1)
1d5b3ce6 1113{
2d733c0f 1114 string sockname=::arg()["socket-dir"]+"/"+s_programname;
677e2a46 1115 if(processNum >= 0)
335da0ba 1116 sockname += "."+std::to_string(processNum);
677e2a46 1117 sockname+=".controlsocket";
41f7a068 1118 s_rcc.listen(sockname);
3ddb9247 1119
387de317
BH
1120 int sockowner = -1;
1121 int sockgroup = -1;
1122
1123 if (!::arg().isEmpty("socket-group"))
1124 sockgroup=::arg().asGid("socket-group");
1125 if (!::arg().isEmpty("socket-owner"))
1126 sockowner=::arg().asUid("socket-owner");
3ddb9247 1127
f838ad8d
BH
1128 if (sockgroup > -1 || sockowner > -1) {
1129 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1130 unixDie("Failed to chown control socket");
1131 }
1132 }
387de317
BH
1133
1134 // do mode change if socket-mode is given
1135 if(!::arg().isEmpty("socket-mode")) {
1136 mode_t sockmode=::arg().asMode("socket-mode");
34c513f9
RG
1137 if(chmod(sockname.c_str(), sockmode) < 0) {
1138 unixDie("Failed to chmod control socket");
1139 }
387de317 1140 }
1d5b3ce6
BH
1141}
1142
02b47f43
RG
1143static void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass, Netmask* ednssubnet)
1144{
1145 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
1146 size_t questionLen = question.length();
1147 unsigned int consumed=0;
1148 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1149
1150 size_t pos= sizeof(dnsheader)+consumed+4;
1151 /* at least OPT root label (1), type (2), class (2) and ttl (4) + OPT RR rdlen (2)
1152 = 11 */
1153 if(ntohs(dh->arcount) == 1 && questionLen > pos + 11) { // this code can extract one (1) EDNS Subnet option
1154 /* OPT root label (1) followed by type (2) */
1155 if(question.at(pos)==0 && question.at(pos+1)==0 && question.at(pos+2)==QType::OPT) {
1156 char* ecsStart = nullptr;
1157 size_t ecsLen = 0;
1158 int res = getEDNSOption((char*)question.c_str()+pos+9, questionLen - pos - 9, EDNSOptionCode::ECS, &ecsStart, &ecsLen);
1159 if (res == 0 && ecsLen > 4) {
1160 EDNSSubnetOpts eso;
1161 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1162 *ednssubnet=eso.source;
1163 }
1164 }
1165 }
1166 }
1167}
1168
d8f6d49f 1169void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1170{
cd989c87 1171 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
c038218b 1172
879b3f70 1173 if(conn->state==TCPConnection::BYTE0) {
b841314c 1174 ssize_t bytes=recv(conn->getFD(), conn->data, 2, 0);
09e6702a 1175 if(bytes==1)
667f7e60 1176 conn->state=TCPConnection::BYTE1;
3ddb9247 1177 if(bytes==2) {
a0aa4f64 1178 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
667f7e60
BH
1179 conn->bytesread=0;
1180 conn->state=TCPConnection::GETQUESTION;
09e6702a
BH
1181 }
1182 if(!bytes || bytes < 0) {
bb4bdbaf 1183 t_fdm->removeReadFD(fd);
09e6702a
BH
1184 return;
1185 }
1186 }
667f7e60 1187 else if(conn->state==TCPConnection::BYTE1) {
b841314c 1188 ssize_t bytes=recv(conn->getFD(), conn->data+1, 1, 0);
09e6702a 1189 if(bytes==1) {
667f7e60 1190 conn->state=TCPConnection::GETQUESTION;
a0aa4f64 1191 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
667f7e60 1192 conn->bytesread=0;
09e6702a
BH
1193 }
1194 if(!bytes || bytes < 0) {
1195 if(g_logCommonErrors)
cd989c87 1196 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected after first byte"<<endl;
bb4bdbaf 1197 t_fdm->removeReadFD(fd);
09e6702a
BH
1198 return;
1199 }
1200 }
667f7e60 1201 else if(conn->state==TCPConnection::GETQUESTION) {
b841314c 1202 ssize_t bytes=recv(conn->getFD(), conn->data + conn->bytesread, conn->qlen - conn->bytesread, 0);
f9d67b41 1203 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
cd989c87 1204 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected while reading question body"<<endl;
bb4bdbaf 1205 t_fdm->removeReadFD(fd);
09e6702a
BH
1206 return;
1207 }
b841314c 1208 conn->bytesread+=(uint16_t)bytes;
667f7e60 1209 if(conn->bytesread==conn->qlen) {
bb4bdbaf 1210 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
879b3f70 1211
09e6702a
BH
1212 DNSComboWriter* dc=0;
1213 try {
cd989c87 1214 dc=new DNSComboWriter(conn->data, conn->qlen, g_now);
09e6702a
BH
1215 }
1216 catch(MOADNSException &mde) {
3ddb9247 1217 g_stats.clientParseError++;
4957a608 1218 if(g_logCommonErrors)
cd989c87 1219 L<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toString() <<endl;
4957a608 1220 return;
09e6702a 1221 }
cd989c87
BH
1222 dc->d_tcpConnection = conn; // carry the torch
1223 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
09e6702a 1224 dc->d_tcp=true;
cd989c87 1225 dc->setRemote(&conn->d_remote);
a6147cd2 1226 ComboAddress dest;
1227 memset(&dest, 0, sizeof(dest));
1228 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1229 socklen_t len = dest.getSocklen();
1230 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1231 dc->setLocal(dest);
aa7929a3 1232#ifdef HAVE_PROTOBUF
02b47f43
RG
1233 auto luaconfsLocal = g_luaconfs.getLocal();
1234
1235 if(luaconfsLocal->protobufServer) {
1236 dc->d_uuid = (*t_uuidGenerator)();
1237
1238 try {
1239 DNSName qname;
1240 uint16_t qtype;
1241 uint16_t qclass;
1242 Netmask ednssubnet;
1243 const struct dnsheader* dh = (const struct dnsheader*) conn->data;
1244
1245 getQNameAndSubnet(std::string(conn->data, conn->qlen), &qname, &qtype, &qclass, &ednssubnet);
1246 dc->d_ednssubnet = ednssubnet;
1247
e1c8a4bb 1248 protobufLogQuery(luaconfsLocal->protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, dest, conn->d_remote, ednssubnet, true, dh->id, conn->qlen, qname, qtype, qclass, std::string(), std::vector<std::string>());
02b47f43
RG
1249 }
1250 catch(std::exception& e) {
1251 if(g_logCommonErrors)
1252 L<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
1253 }
1254 }
aa7929a3 1255#endif
879b3f70 1256 if(dc->d_mdp.d_header.qr) {
4957a608 1257 delete dc;
048f5db6 1258 g_stats.ignoredCount++;
4328f463 1259 L<<Logger::Error<<"Ignoring answer from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
4957a608 1260 return;
879b3f70 1261 }
3abcdab2
PD
1262 if(dc->d_mdp.d_header.opcode) {
1263 delete dc;
048f5db6 1264 g_stats.ignoredCount++;
4328f463 1265 L<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
3abcdab2
PD
1266 return;
1267 }
09e6702a 1268 else {
4957a608
BH
1269 ++g_stats.qcounter;
1270 ++g_stats.tcpqcounter;
50a5ef72 1271 MT->makeThread(startDoResolve, dc); // deletes dc, will set state to BYTE0 again
4957a608 1272 return;
09e6702a
BH
1273 }
1274 }
1275 }
1276}
1277
6dcd28c3 1278//! Handle new incoming TCP connection
d8f6d49f 1279void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
09e6702a 1280{
37d3f960 1281 ComboAddress addr;
09e6702a 1282 socklen_t addrlen=sizeof(addr);
a683e8bd 1283 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
b841314c 1284 if(newsock>=0) {
85c32340
BH
1285 if(MT->numProcesses() > g_maxMThreads) {
1286 g_stats.overCapacityDrops++;
3897b9e1 1287 closesocket(newsock);
85c32340
BH
1288 return;
1289 }
1290
92011b8f 1291 if(t_remotes)
1292 t_remotes->push_back(addr);
49a699c4 1293 if(t_allowFrom && !t_allowFrom->match(&addr)) {
3ddb9247 1294 if(!g_quiet)
4957a608 1295 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2914b022 1296
09e6702a 1297 g_stats.unauthorizedTCP++;
3897b9e1 1298 closesocket(newsock);
09e6702a
BH
1299 return;
1300 }
bd0289fc 1301 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
09e6702a 1302 g_stats.tcpClientOverflow++;
3897b9e1 1303 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
09e6702a
BH
1304 return;
1305 }
3ddb9247 1306
3897b9e1 1307 setNonBlocking(newsock);
cd989c87
BH
1308 shared_ptr<TCPConnection> tc(new TCPConnection(newsock, addr));
1309 tc->state=TCPConnection::BYTE0;
3ddb9247 1310
cd989c87 1311 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc);
c038218b 1312
0bff046b 1313 struct timeval now;
c038218b 1314 Utility::gettimeofday(&now, 0);
cd989c87 1315 t_fdm->setReadTTD(tc->getFD(), now, g_tcpTimeout);
09e6702a
BH
1316 }
1317}
3ddb9247 1318
b71b60ee 1319string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
1bc3c142 1320{
183eb877 1321 gettimeofday(&g_now, 0);
b71b60ee 1322 struct timeval diff = g_now - tv;
1323 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
183eb877 1324
22cf1fda 1325 if(tv.tv_sec && delta > 1000.0) {
b71b60ee 1326 g_stats.tooOldDrops++;
1327 return 0;
1328 }
1329
1bc3c142 1330 ++g_stats.qcounter;
d7f10541
BH
1331 if(fromaddr.sin4.sin_family==AF_INET6)
1332 g_stats.ipv6qcounter++;
1bc3c142
BH
1333
1334 string response;
93f0da94 1335 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
49a3500d 1336 unsigned int ctag=0;
12aff2e5 1337 bool needECS = false;
02b47f43 1338 std::vector<std::string> policyTags;
12aff2e5 1339#ifdef HAVE_PROTOBUF
02b47f43 1340 boost::uuids::uuid uniqueId;
02b47f43
RG
1341 auto luaconfsLocal = g_luaconfs.getLocal();
1342 if (luaconfsLocal->protobufServer) {
1343 needECS = true;
1344 uniqueId = (*t_uuidGenerator)();
1345 }
12aff2e5 1346#endif
e824728a 1347 Netmask ednssubnet;
1bc3c142 1348 try {
02b47f43
RG
1349 DNSName qname;
1350 uint16_t qtype=0;
1351 uint16_t qclass=0;
1bc3c142 1352 uint32_t age;
8f7473d7 1353#ifdef MALLOC_TRACE
1354 /*
1355 static uint64_t last=0;
1356 if(!last)
1357 g_mtracer->clearAllocators();
1358 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
1359 last=g_mtracer->getAllocs();
1360 cout<<g_mtracer->topAllocatorsString()<<endl;
1361 g_mtracer->clearAllocators();
1362 */
1363#endif
55a1378f 1364
12aff2e5 1365 if(needECS || (t_pdl->get() && (*t_pdl)->d_gettag)) {
b2eacd67 1366 try {
02b47f43 1367 getQNameAndSubnet(question, &qname, &qtype, &qclass, &ednssubnet);
12aff2e5
RG
1368
1369 if(t_pdl->get() && (*t_pdl)->d_gettag) {
1370 try {
02b47f43 1371 ctag=(*t_pdl)->gettag(fromaddr, ednssubnet, destaddr, qname, qtype, &policyTags);
12aff2e5
RG
1372 }
1373 catch(std::exception& e) {
1374 if(g_logCommonErrors)
1375 L<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
1376 }
8ea8c302 1377 }
b2eacd67 1378 }
1379 catch(std::exception& e)
1380 {
1381 if(g_logCommonErrors)
1382 L<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 1383 }
12ce523e 1384 }
3ddb9247 1385
02b47f43 1386 bool cacheHit = false;
d9d3f9c1 1387 RecProtoBufMessage pbMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
02b47f43
RG
1388#ifdef HAVE_PROTOBUF
1389 if(luaconfsLocal->protobufServer) {
e1c8a4bb 1390 protobufLogQuery(luaconfsLocal->protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, fromaddr, destaddr, ednssubnet, false, dh->id, question.size(), qname, qtype, qclass, std::string(), policyTags);
d9d3f9c1
RG
1391 }
1392#endif /* HAVE_PROTOBUF */
02b47f43 1393
d9d3f9c1
RG
1394 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, g_now.tv_sec, &response, &age, &pbMessage));
1395 if (cacheHit) {
1396#ifdef HAVE_PROTOBUF
1397 if(luaconfsLocal->protobufServer) {
e1c8a4bb
RG
1398 Netmask requestorNM(fromaddr, fromaddr.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1399 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
1400 pbMessage.update(uniqueId, &requestor, &destaddr, false, dh->id);
d9d3f9c1 1401 pbMessage.setEDNSSubnet(ednssubnet);
58307a85 1402 pbMessage.setQueryTime(g_now.tv_sec, g_now.tv_usec);
02b47f43
RG
1403 protobufLogResponse(luaconfsLocal->protobufServer, pbMessage);
1404 }
d9d3f9c1 1405#endif /* HAVE_PROTOBUF */
49a3500d 1406 if(!g_quiet)
1407 L<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<fromaddr.toString()<<endl;
8f7473d7 1408
1bc3c142
BH
1409 g_stats.packetCacheHits++;
1410 SyncRes::s_queries++;
1411 ageDNSPacket(response, age);
b71b60ee 1412 struct msghdr msgh;
1413 struct iovec iov;
1414 char cbuf[256];
1415 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
2c0af54f
PD
1416 msgh.msg_control=NULL;
1417
cbc03320 1418 if(g_fromtosockets.count(fd)) {
fbe2a2e0 1419 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
b71b60ee 1420 }
cbc03320 1421 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
1422 L<<Logger::Warning<<"Sending UDP reply to client "<<fromaddr.toStringWithPort()<<" failed with: "<<strerror(errno)<<endl;
b71b60ee 1423
97bee66d
BH
1424 if(response.length() >= sizeof(struct dnsheader)) {
1425 struct dnsheader dh;
1426 memcpy(&dh, response.c_str(), sizeof(dh));
92011b8f 1427 updateResponseStats(dh.rcode, fromaddr, response.length(), 0, 0);
97bee66d 1428 }
08f3f638 1429 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
1bc3c142
BH
1430 return 0;
1431 }
3ddb9247 1432 }
1bc3c142
BH
1433 catch(std::exception& e) {
1434 L<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1435 return 0;
1436 }
3ddb9247 1437
4ea94941 1438 if(t_pdl->get()) {
93f0da94 1439 if((*t_pdl)->ipfilter(fromaddr, destaddr, *dh)) {
4ea94941 1440 if(!g_quiet)
1441 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<" based on policy"<<endl;
1442 g_stats.policyDrops++;
1443 return 0;
1444 }
1445 }
1446
1bc3c142 1447 if(MT->numProcesses() > g_maxMThreads) {
461df9d2 1448 if(!g_quiet)
854d44e3 1449 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<", over capacity"<<endl;
461df9d2 1450
1bc3c142
BH
1451 g_stats.overCapacityDrops++;
1452 return 0;
1453 }
3ddb9247 1454
1bc3c142
BH
1455 DNSComboWriter* dc = new DNSComboWriter(question.c_str(), question.size(), g_now);
1456 dc->setSocket(fd);
49a3500d 1457 dc->d_tag=ctag;
1458 dc->d_query = question;
1bc3c142 1459 dc->setRemote(&fromaddr);
b71b60ee 1460 dc->setLocal(destaddr);
1bc3c142 1461 dc->d_tcp=false;
02b47f43 1462 dc->d_policyTags = policyTags;
aa7929a3 1463#ifdef HAVE_PROTOBUF
d9d3f9c1
RG
1464 if (luaconfsLocal->protobufServer) {
1465 dc->d_uuid = uniqueId;
1466 }
02b47f43 1467 dc->d_ednssubnet = ednssubnet;
aa7929a3
RG
1468#endif
1469
1bc3c142
BH
1470 MT->makeThread(startDoResolve, (void*) dc); // deletes dc
1471 return 0;
3ddb9247
PD
1472}
1473
b71b60ee 1474
d8f6d49f 1475void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
5db529f8 1476{
a683e8bd 1477 ssize_t len;
5db529f8
BH
1478 char data[1500];
1479 ComboAddress fromaddr;
b71b60ee 1480 struct msghdr msgh;
1481 struct iovec iov;
1482 char cbuf[256];
1483
1484 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
1485 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), data, sizeof(data), &fromaddr);
1486
3ddb9247 1487 for(;;)
b71b60ee 1488 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
92011b8f 1489 if(t_remotes)
1490 t_remotes->push_back(fromaddr);
b23b8614 1491
49a699c4 1492 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
3ddb9247 1493 if(!g_quiet)
4957a608 1494 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2914b022 1495
5db529f8 1496 g_stats.unauthorizedUDP++;
a9af3782 1497 return;
5db529f8 1498 }
15c01deb 1499 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
81859ba5 1500 if(!fromaddr.sin4.sin_port) { // also works for IPv6
3ddb9247 1501 if(!g_quiet)
81859ba5 1502 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
1503
1504 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
1505 return;
1506 }
5db529f8 1507 try {
b23b8614 1508 dnsheader* dh=(dnsheader*)data;
3ddb9247 1509
b23b8614 1510 if(dh->qr) {
048f5db6 1511 g_stats.ignoredCount++;
4957a608
BH
1512 if(g_logCommonErrors)
1513 L<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
5db529f8 1514 }
3abcdab2 1515 else if(dh->opcode) {
048f5db6 1516 g_stats.ignoredCount++;
3abcdab2
PD
1517 if(g_logCommonErrors)
1518 L<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
1519 }
5db529f8 1520 else {
a683e8bd 1521 string question(data, (size_t)len);
b71b60ee 1522 struct timeval tv={0,0};
1523 HarvestTimestamp(&msgh, &tv);
1524 ComboAddress dest;
1525 memset(&dest, 0, sizeof(dest)); // this makes sure we igore this address if not returned by recvmsg above
a6147cd2 1526 auto loc = rplookup(g_listenSocketsAddresses, fd);
1527 if(HarvestDestinationAddress(&msgh, &dest)) {
1528 // but.. need to get port too
1529 if(loc)
1530 dest.sin4.sin_port = loc->sin4.sin_port;
1531 }
1532 else {
1533 if(loc) {
1534 dest = *loc;
1535 }
1536 else {
1537 dest.sin4.sin_family = fromaddr.sin4.sin_family;
a683e8bd
RG
1538 socklen_t slen = dest.getSocklen();
1539 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
a6147cd2 1540 }
1541 }
232f0877 1542 if(g_weDistributeQueries)
b71b60ee 1543 distributeAsyncFunction(question, boost::bind(doProcessUDPQuestion, question, fromaddr, dest, tv, fd));
232f0877 1544 else
b71b60ee 1545 doProcessUDPQuestion(question, fromaddr, dest, tv, fd);
5db529f8
BH
1546 }
1547 }
1548 catch(MOADNSException& mde) {
3ddb9247 1549 g_stats.clientParseError++;
84e66a59 1550 if(g_logCommonErrors)
4957a608 1551 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
5db529f8 1552 }
0b602819
KM
1553 catch(std::runtime_error& e) {
1554 g_stats.clientParseError++;
1555 if(g_logCommonErrors)
1556 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
5db529f8
BH
1557 }
1558 }
ac0e821b
BH
1559 else {
1560 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
3ddb9247 1561 if(errno == EAGAIN)
9326cae1 1562 g_stats.noPacketError++;
bf3b0cec 1563 break;
ac0e821b 1564 }
5db529f8
BH
1565}
1566
1bc3c142 1567
5db529f8
BH
1568typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
1569deferredAdd_t deferredAdd;
1570
f28307ad 1571void makeTCPServerSockets()
9c495589 1572{
37d3f960 1573 int fd;
f28307ad 1574 vector<string>locals;
2e3d8a19 1575 stringtok(locals,::arg()["local-address"]," ,");
9c495589 1576
f28307ad 1577 if(locals.empty())
3f81d239 1578 throw PDNSException("No local address specified");
3ddb9247 1579
f28307ad 1580 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
1581 ServiceTuple st;
1582 st.port=::arg().asNum("local-port");
1583 parseService(*i, st);
3ddb9247 1584
32252594
BH
1585 ComboAddress sin;
1586
f28307ad 1587 memset((char *)&sin,0, sizeof(sin));
37d3f960 1588 sin.sin4.sin_family = AF_INET;
32252594 1589 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 1590 sin.sin6.sin6_family = AF_INET6;
f71bc087 1591 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 1592 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
37d3f960
BH
1593 }
1594
1595 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
3ddb9247 1596 if(fd<0)
3f81d239 1597 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
f28307ad 1598
3897b9e1 1599 setCloseOnExec(fd);
a903b39c 1600
f28307ad 1601 int tmp=1;
37d3f960 1602 if(setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&tmp,sizeof tmp)<0) {
f28307ad 1603 L<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
c8ddb7c2 1604 exit(1);
f28307ad 1605 }
0dfa94ab 1606 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
1607 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
1608 }
1609
c8ddb7c2 1610#ifdef TCP_DEFER_ACCEPT
37d3f960
BH
1611 if(setsockopt(fd, SOL_TCP,TCP_DEFER_ACCEPT,(char*)&tmp,sizeof tmp) >= 0) {
1612 if(i==locals.begin())
4957a608 1613 L<<Logger::Error<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
c8ddb7c2
BH
1614 }
1615#endif
1616
fec7dd5a
SS
1617 if( ::arg().mustDo("non-local-bind") )
1618 Utility::setBindAny(AF_INET, fd);
1619
2332f42d 1620#ifdef SO_REUSEPORT
1621 if(::arg().mustDo("reuseport")) {
1622 int one=1;
1623 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
1624 throw PDNSException("SO_REUSEPORT: "+stringerror());
1625 }
1626#endif
1627
32252594 1628 sin.sin4.sin_port = htons(st.port);
a683e8bd 1629 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
3ddb9247 1630 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
3f81d239 1631 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
3ddb9247 1632
3897b9e1 1633 setNonBlocking(fd);
49a699c4 1634 setSocketSendBuffer(fd, 65000);
37d3f960 1635 listen(fd, 128);
5db529f8 1636 deferredAdd.push_back(make_pair(fd, handleNewTCPQuestion));
c2136bf0 1637 g_tcpListenSockets.push_back(fd);
84433b79 1638 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
1639 // - fd is not that which we know here, but returned from accept()
3ddb9247 1640 if(sin.sin4.sin_family == AF_INET)
32252594 1641 L<<Logger::Error<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 1642 else
32252594 1643 L<<Logger::Error<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 1644 }
9c495589
BH
1645}
1646
f28307ad 1647void makeUDPServerSockets()
288f4aa9 1648{
fec7dd5a 1649 int one=1;
f28307ad 1650 vector<string>locals;
2e3d8a19 1651 stringtok(locals,::arg()["local-address"]," ,");
288f4aa9 1652
f28307ad 1653 if(locals.empty())
3f81d239 1654 throw PDNSException("No local address specified");
3ddb9247 1655
f28307ad 1656 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
1657 ServiceTuple st;
1658 st.port=::arg().asNum("local-port");
1659 parseService(*i, st);
1660
37d3f960 1661 ComboAddress sin;
996c89cc 1662
37d3f960
BH
1663 memset(&sin, 0, sizeof(sin));
1664 sin.sin4.sin_family = AF_INET;
32252594 1665 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 1666 sin.sin6.sin6_family = AF_INET6;
f71bc087 1667 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 1668 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
37d3f960 1669 }
3ddb9247 1670
bb4bdbaf 1671 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
d3b4137e 1672 if(fd < 0) {
3f81d239 1673 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
d3b4137e 1674 }
915b0c39
AT
1675 if (!setSocketTimestamps(fd))
1676 L<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
0dfa94ab 1677
b71b60ee 1678 if(IsAnyAddress(sin)) {
cbc03320 1679 if(sin.sin4.sin_family == AF_INET)
1680 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
1681 g_fromtosockets.insert(fd);
757d3179 1682#ifdef IPV6_RECVPKTINFO
cbc03320 1683 if(sin.sin4.sin_family == AF_INET6)
1684 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
1685 g_fromtosockets.insert(fd);
757d3179 1686#endif
0dfa94ab 1687 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
1688 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
1689 }
b71b60ee 1690 }
fec7dd5a
SS
1691 if( ::arg().mustDo("non-local-bind") )
1692 Utility::setBindAny(AF_INET6, fd);
1693
3897b9e1 1694 setCloseOnExec(fd);
a903b39c 1695
4e9a20e6 1696 setSocketReceiveBuffer(fd, 250000);
32252594 1697 sin.sin4.sin_port = htons(st.port);
37d3f960 1698
2332f42d 1699
1700#ifdef SO_REUSEPORT
1701 if(::arg().mustDo("reuseport")) {
1702 int one=1;
1703 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
1704 throw PDNSException("SO_REUSEPORT: "+stringerror());
1705 }
1706#endif
a683e8bd 1707 socklen_t socklen=sin.getSocklen();
3ddb9247 1708 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
335da0ba 1709 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
3ddb9247 1710
3897b9e1 1711 setNonBlocking(fd);
c2136bf0 1712
0aaecd50 1713 deferredAdd.push_back(make_pair(fd, handleNewUDPQuestion));
40a3dd64 1714 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
3ddb9247 1715 if(sin.sin4.sin_family == AF_INET)
32252594 1716 L<<Logger::Error<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 1717 else
32252594 1718 L<<Logger::Error<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 1719 }
c836dc19 1720}
caa6eefa 1721
9c495589 1722
c836dc19
BH
1723void daemonize(void)
1724{
1725 if(fork())
1726 exit(0); // bye bye
3ddb9247
PD
1727
1728 setsid();
c836dc19 1729
27a5ead5 1730 int i=open("/dev/null",O_RDWR); /* open stdin */
3ddb9247 1731 if(i < 0)
27a5ead5
BH
1732 L<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
1733 else {
1734 dup2(i,0); /* stdin */
1735 dup2(i,1); /* stderr */
1736 dup2(i,2); /* stderr */
1737 close(i);
1738 }
288f4aa9 1739}
caa6eefa 1740
cc59bce6 1741AtomicCounter counter;
c75a6a9e
BH
1742bool statsWanted;
1743
1744void usr1Handler(int)
1745{
1746 statsWanted=true;
1747}
ae1b2e98 1748
9170fbaf
BH
1749void usr2Handler(int)
1750{
f1f34cc2 1751 g_quiet= !g_quiet;
1752 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
1753 ::arg().set("quiet")=g_quiet ? "" : "no";
9170fbaf
BH
1754}
1755
c75a6a9e
BH
1756void doStats(void)
1757{
16beeaa4
BH
1758 static time_t lastOutputTime;
1759 static uint64_t lastQueryCount;
d299d4f5 1760
1761 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
1762 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
3ddb9247 1763
d299d4f5 1764 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
bd301954 1765 L<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
3427fa8a
BH
1766 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
1767 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
3ddb9247
PD
1768 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
1769
bd301954 1770 L<<Logger::Notice<<"stats: throttle map: "
3427fa8a 1771 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
3ddb9247 1772 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
bd301954
JB
1773 L<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
1774 L<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
525b8a7c 1775 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
bd301954 1776 L<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
3427fa8a 1777 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
81883dcc 1778
bd301954 1779 //L<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
16beeaa4 1780 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
3ddb9247 1781
bd301954 1782 L<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
16beeaa4 1783 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
3ddb9247 1784
16beeaa4
BH
1785 time_t now = time(0);
1786 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
bd301954 1787 L<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
16beeaa4
BH
1788 }
1789 lastOutputTime = now;
1790 lastQueryCount = SyncRes::s_queries;
c75a6a9e 1791 }
3ddb9247 1792 else if(statsWanted)
bd301954 1793 L<<Logger::Notice<<"stats: no stats yet!"<<endl;
7becf07f 1794
c75a6a9e
BH
1795 statsWanted=false;
1796}
c836dc19 1797
29f0b1ce 1798static void houseKeeping(void *)
c836dc19 1799{
d67620e4 1800 static __thread time_t last_stat, last_rootupdate, last_prune, last_secpoll;
8baca3fa 1801 static __thread int cleanCounter=0;
cc59bce6 1802 static __thread bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
1803 try {
1804 if(s_running)
1805 return;
1806 s_running=true;
3ddb9247 1807
cc59bce6 1808 struct timeval now;
1809 Utility::gettimeofday(&now, 0);
3ddb9247
PD
1810
1811 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
cc59bce6 1812 DTime dt;
1813 dt.setTimeval(now);
1814 t_RC->doPrune(); // this function is local to a thread, so fine anyhow
f8f243b0 1815 t_packetCache->doPruneTo(::arg().asNum("max-packetcache-entries") / g_numWorkerThreads);
3ddb9247 1816
f8f243b0 1817 pruneCollection(t_sstorage->negcache, ::arg().asNum("max-cache-entries") / (g_numWorkerThreads * 10), 200);
3ddb9247 1818
cc59bce6 1819 if(!((cleanCounter++)%40)) { // this is a full scan!
1820 time_t limit=now.tv_sec-300;
1821 for(SyncRes::nsspeeds_t::iterator i = t_sstorage->nsSpeeds.begin() ; i!= t_sstorage->nsSpeeds.end(); )
1822 if(i->second.stale(limit))
1823 t_sstorage->nsSpeeds.erase(i++);
1824 else
1825 ++i;
1826 }
1827 last_prune=time(0);
d67620e4 1828 }
3ddb9247 1829
cc59bce6 1830 if(now.tv_sec - last_rootupdate > 7200) {
1831 SyncRes sr(now);
1832 sr.setDoEDNS0(true);
e325f20c 1833 vector<DNSRecord> ret;
3ddb9247 1834
cc59bce6 1835 sr.setNoCache();
1836 int res=-1;
18b73338 1837 try {
6ed9a611 1838 res=sr.beginResolve(DNSName("."), QType(QType::NS), 1, ret);
cc59bce6 1839 }
3aa91c3e 1840 catch(PDNSException& e)
1841 {
1842 L<<Logger::Error<<"Failed to update . records, got an exception: "<<e.reason<<endl;
1843 }
1844
1845 catch(std::exception& e)
1846 {
1847 L<<Logger::Error<<"Failed to update . records, got an exception: "<<e.what()<<endl;
1848 }
1849
cc59bce6 1850 catch(...)
1851 {
1852 L<<Logger::Error<<"Failed to update . records, got an exception"<<endl;
1853 }
1854 if(!res) {
1855 L<<Logger::Notice<<"Refreshed . records"<<endl;
1856 last_rootupdate=now.tv_sec;
1857 }
1858 else
1859 L<<Logger::Error<<"Failed to update . records, RCODE="<<res<<endl;
1860 }
3ddb9247 1861
cc59bce6 1862 if(!t_id) {
3ddb9247 1863 if(now.tv_sec - last_stat >= 1800) {
cc59bce6 1864 doStats();
1865 last_stat=time(0);
1866 }
3ddb9247 1867
cc59bce6 1868 if(now.tv_sec - last_secpoll >= 3600) {
1869 try {
1870 doSecPoll(&last_secpoll);
1871 }
1872 catch(...) {}
18b73338 1873 }
d67620e4 1874 }
cc59bce6 1875 s_running=false;
d67620e4 1876 }
cc59bce6 1877 catch(PDNSException& ae)
1878 {
1879 s_running=false;
1880 L<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
1881 throw;
1882 }
779828c4 1883}
d6d5dea7 1884
49a699c4
BH
1885void makeThreadPipes()
1886{
c3828c03 1887 for(unsigned int n=0; n < g_numThreads; ++n) {
49a699c4
BH
1888 struct ThreadPipeSet tps;
1889 int fd[2];
1890 if(pipe(fd) < 0)
1891 unixDie("Creating pipe for inter-thread communications");
3ddb9247 1892
49a699c4
BH
1893 tps.readToThread = fd[0];
1894 tps.writeToThread = fd[1];
3ddb9247 1895
49a699c4
BH
1896 if(pipe(fd) < 0)
1897 unixDie("Creating pipe for inter-thread communications");
1898 tps.readFromThread = fd[0];
1899 tps.writeFromThread = fd[1];
3ddb9247 1900
49a699c4
BH
1901 g_pipes.push_back(tps);
1902 }
1903}
1904
00c9b8c1
BH
1905struct ThreadMSG
1906{
1907 pipefunc_t func;
1908 bool wantAnswer;
1909};
1910
49a699c4
BH
1911void broadcastFunction(const pipefunc_t& func, bool skipSelf)
1912{
49a699c4 1913 unsigned int n = 0;
1dc8f4d0 1914 for(ThreadPipeSet& tps : g_pipes)
49a699c4
BH
1915 {
1916 if(n++ == t_id) {
1917 if(!skipSelf)
1918 func(); // don't write to ourselves!
1919 continue;
1920 }
3ddb9247 1921
00c9b8c1
BH
1922 ThreadMSG* tmsg = new ThreadMSG();
1923 tmsg->func = func;
1924 tmsg->wantAnswer = true;
b841314c
RG
1925 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
1926 delete tmsg;
49a699c4 1927 unixDie("write to thread pipe returned wrong size or error");
b841314c 1928 }
3ddb9247 1929
49a699c4
BH
1930 string* resp;
1931 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
1932 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 1933
49a699c4
BH
1934 if(resp) {
1935// cerr <<"got response: " << *resp << endl;
1936 delete resp;
1937 }
1938 }
1939}
06ea9015 1940
2fafb640 1941static uint32_t g_disthashseed;
8171ab83 1942void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
00c9b8c1 1943{
8171ab83 1944 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
06ea9015 1945 unsigned int target = 1 + (hash % (g_pipes.size()-1));
1946
00c9b8c1
BH
1947 if(target == t_id) {
1948 func();
1949 return;
1950 }
3ddb9247 1951 ThreadPipeSet& tps = g_pipes[target];
00c9b8c1
BH
1952 ThreadMSG* tmsg = new ThreadMSG();
1953 tmsg->func = func;
1954 tmsg->wantAnswer = false;
3ddb9247 1955
b841314c
RG
1956 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
1957 delete tmsg;
3ddb9247 1958 unixDie("write to thread pipe returned wrong size or error");
b841314c 1959 }
00c9b8c1 1960}
3427fa8a 1961
49a699c4
BH
1962void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
1963{
00c9b8c1 1964 ThreadMSG* tmsg;
3ddb9247
PD
1965
1966 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread
49a699c4
BH
1967 unixDie("read from thread pipe returned wrong size or error");
1968 }
3ddb9247 1969
2f22827a 1970 void *resp=0;
1971 try {
1972 resp = tmsg->func();
1973 }
1974 catch(std::exception& e) {
6d2010a8 1975 if(g_logCommonErrors)
1976 L<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2f22827a 1977 }
1978 catch(PDNSException& e) {
6d2010a8 1979 if(g_logCommonErrors)
1980 L<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2f22827a 1981 }
00c9b8c1
BH
1982 if(tmsg->wantAnswer)
1983 if(write(g_pipes[t_id].writeFromThread, &resp, sizeof(resp)) != sizeof(resp))
1984 unixDie("write to thread pipe returned wrong size or error");
3ddb9247 1985
00c9b8c1 1986 delete tmsg;
49a699c4 1987}
09e6702a 1988
13034931
BH
1989template<class T> void *voider(const boost::function<T*()>& func)
1990{
1991 return func();
1992}
1993
b3b5459d
BH
1994vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
1995{
1996 a.insert(a.end(), b.begin(), b.end());
1997 return a;
1998}
1999
92011b8f 2000vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
2001{
2002 a.insert(a.end(), b.begin(), b.end());
2003 return a;
2004}
2005
3ddb9247
PD
2006vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
2007{
2008 a.insert(a.end(), b.begin(), b.end());
2009 return a;
2010}
2011
92011b8f 2012
13034931 2013template<class T> T broadcastAccFunction(const boost::function<T*()>& func, bool skipSelf)
3427fa8a
BH
2014{
2015 unsigned int n = 0;
2016 T ret=T();
1dc8f4d0 2017 for(ThreadPipeSet& tps : g_pipes)
3427fa8a
BH
2018 {
2019 if(n++ == t_id) {
2020 if(!skipSelf) {
2021 T* resp = (T*)func(); // don't write to ourselves!
2022 if(resp) {
2023 //~ cerr <<"got direct: " << *resp << endl;
2024 ret += *resp;
2025 delete resp;
2026 }
2027 }
2028 continue;
2029 }
3ddb9247 2030
00c9b8c1
BH
2031 ThreadMSG* tmsg = new ThreadMSG();
2032 tmsg->func = boost::bind(voider<T>, func);
2033 tmsg->wantAnswer = true;
3ddb9247 2034
b841314c
RG
2035 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2036 delete tmsg;
3427fa8a 2037 unixDie("write to thread pipe returned wrong size or error");
b841314c 2038 }
3ddb9247 2039
3427fa8a
BH
2040 T* resp;
2041 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2042 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 2043
3427fa8a
BH
2044 if(resp) {
2045 //~ cerr <<"got response: " << *resp << endl;
2046 ret += *resp;
2047 delete resp;
2048 }
2049 }
2050 return ret;
2051}
2052
13034931
BH
2053template string broadcastAccFunction(const boost::function<string*()>& fun, bool skipSelf); // explicit instantiation
2054template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun, bool skipSelf); // explicit instantiation
b3b5459d 2055template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun, bool skipSelf); // explicit instantiation
3ddb9247 2056template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun, bool skipSelf); // explicit instantiation
3427fa8a 2057
d8f6d49f 2058void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
09e6702a
BH
2059{
2060 string remote;
2061 string msg=s_rcc.recv(&remote);
2062 RecursorControlParser rcp;
2063 RecursorControlParser::func_t* command;
3ddb9247 2064
09e6702a 2065 string answer=rcp.getAnswer(msg, &command);
f0f3f0b0
PL
2066
2067 // If we are inside a chroot, we need to strip
2068 if (!arg()["chroot"].empty()) {
a683e8bd 2069 size_t len = arg()["chroot"].length();
f0f3f0b0
PL
2070 remote = remote.substr(len);
2071 }
2072
ab5c053d
BH
2073 try {
2074 s_rcc.send(answer, &remote);
2075 command();
2076 }
fdbf35ac 2077 catch(std::exception& e) {
ab5c053d
BH
2078 L<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
2079 }
3f81d239 2080 catch(PDNSException& ae) {
ab5c053d
BH
2081 L<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
2082 }
09e6702a
BH
2083}
2084
d8f6d49f 2085void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2086{
0b18b22e 2087 PacketID* pident=any_cast<PacketID>(&var);
667f7e60 2088 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
09e6702a 2089
667f7e60 2090 shared_array<char> buffer(new char[pident->inNeeded]);
09e6702a 2091
a683e8bd 2092 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
09e6702a 2093 if(ret > 0) {
667f7e60 2094 pident->inMSG.append(&buffer[0], &buffer[ret]);
a683e8bd 2095 pident->inNeeded-=(size_t)ret;
825fa717 2096 if(!pident->inNeeded || pident->inIncompleteOkay) {
667f7e60
BH
2097 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
2098 PacketID pid=*pident;
2099 string msg=pident->inMSG;
3ddb9247 2100
bb4bdbaf 2101 t_fdm->removeReadFD(fd);
3ddb9247 2102 MT->sendEvent(pid, &msg);
09e6702a
BH
2103 }
2104 else {
667f7e60 2105 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
09e6702a
BH
2106 }
2107 }
2108 else {
667f7e60 2109 PacketID tmp=*pident;
bb4bdbaf 2110 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
09e6702a
BH
2111 string empty;
2112 MT->sendEvent(tmp, &empty); // this conveys error status
2113 }
2114}
2115
d8f6d49f 2116void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2117{
0b18b22e 2118 PacketID* pid=any_cast<PacketID>(&var);
a683e8bd 2119 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
09e6702a 2120 if(ret > 0) {
a683e8bd 2121 pid->outPos+=(ssize_t)ret;
667f7e60
BH
2122 if(pid->outPos==pid->outMSG.size()) {
2123 PacketID tmp=*pid;
bb4bdbaf 2124 t_fdm->removeWriteFD(fd);
09e6702a
BH
2125 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
2126 }
2127 }
2128 else { // error or EOF
667f7e60 2129 PacketID tmp(*pid);
bb4bdbaf 2130 t_fdm->removeWriteFD(fd);
09e6702a 2131 string sent;
998a4334 2132 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
09e6702a
BH
2133 }
2134}
2135
34801ab1
BH
2136// resend event to everybody chained onto it
2137void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
2138{
2139 if(iter->key.chain.empty())
2140 return;
e27e91a8 2141 // cerr<<"doResends called!\n";
34801ab1
BH
2142 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
2143 resend.fd=-1;
2144 resend.id=*i;
e27e91a8 2145 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
4665c31e 2146
34801ab1
BH
2147 MT->sendEvent(resend, &content);
2148 g_stats.chainResends++;
34801ab1
BH
2149 }
2150}
2151
d8f6d49f 2152void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2153{
600fc20b 2154 PacketID pid=any_cast<PacketID>(var);
a683e8bd 2155 ssize_t len;
e45beeda 2156 char data[g_outgoingEDNSBufsize];
996c89cc 2157 ComboAddress fromaddr;
09e6702a
BH
2158 socklen_t addrlen=sizeof(fromaddr);
2159
998a4334 2160 len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen);
c1da7976 2161
a683e8bd 2162 if(len < (ssize_t) sizeof(dnsheader)) {
998a4334 2163 if(len < 0)
996c89cc 2164 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
09e6702a 2165 else {
3ddb9247 2166 g_stats.serverParseError++;
09e6702a 2167 if(g_logCommonErrors)
85db02c5 2168 L<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
e44d9fa7 2169 ": packet smaller than DNS header"<<endl;
998a4334 2170 }
34801ab1 2171
49a699c4 2172 t_udpclientsocks->returnSocket(fd);
34801ab1
BH
2173 string empty;
2174
2175 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3ddb9247 2176 if(iter != MT->d_waiters.end())
34801ab1 2177 doResends(iter, pid, empty);
3ddb9247 2178
34801ab1 2179 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
998a4334 2180 return;
3ddb9247 2181 }
998a4334
BH
2182
2183 dnsheader dh;
2184 memcpy(&dh, data, sizeof(dh));
3ddb9247 2185
6da3b3ad
PD
2186 PacketID pident;
2187 pident.remote=fromaddr;
2188 pident.id=dh.id;
2189 pident.fd=fd;
34801ab1 2190
33a928af 2191 if(!dh.qr && g_logCommonErrors) {
854d44e3 2192 L<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
6da3b3ad
PD
2193 }
2194
2195 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
2196 !dh.qr) { // one weird server
2197 pident.domain.clear();
2198 pident.type = 0;
2199 }
2200 else {
2201 try {
0b31e67e 2202 if(len > 12)
2203 pident.domain=DNSName(data, len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
6da3b3ad
PD
2204 }
2205 catch(std::exception& e) {
2206 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
0b31e67e 2207 L<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
6da3b3ad 2208 return;
34801ab1 2209 }
6da3b3ad
PD
2210 }
2211 string packet;
2212 packet.assign(data, len);
34801ab1 2213
6da3b3ad
PD
2214 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
2215 if(iter != MT->d_waiters.end()) {
2216 doResends(iter, pident, packet);
2217 }
c1da7976 2218
6da3b3ad 2219retryWithName:
4957a608 2220
6da3b3ad
PD
2221 if(!MT->sendEvent(pident, &packet)) {
2222 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
2223 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
2224 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
e325f20c 2225 pident.domain == mthread->key.domain) {
6da3b3ad 2226 mthread->key.nearMisses++;
998a4334 2227 }
6da3b3ad
PD
2228
2229 // be a bit paranoid here since we're weakening our matching
3ddb9247 2230 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
6da3b3ad
PD
2231 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
2232 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
2233 pident.domain = mthread->key.domain;
2234 pident.type = mthread->key.type;
2235 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
d4fb76e9 2236 }
09e6702a 2237 }
6da3b3ad
PD
2238 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
2239 if(g_logCommonErrors) {
8a464ee3 2240 L<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
d8f6d49f 2241 }
09e6702a 2242 }
6da3b3ad
PD
2243 else if(fd >= 0) {
2244 t_udpclientsocks->returnSocket(fd);
2245 }
09e6702a
BH
2246}
2247
1f4abb20
BH
2248FDMultiplexer* getMultiplexer()
2249{
2250 FDMultiplexer* ret;
2251 for(FDMultiplexer::FDMultiplexermap_t::const_iterator i = FDMultiplexer::getMultiplexerMap().begin();
2252 i != FDMultiplexer::getMultiplexerMap().end(); ++i) {
2253 try {
2254 ret=i->second();
1f4abb20
BH
2255 return ret;
2256 }
98d0ee4a 2257 catch(FDMultiplexerException &fe) {
0a7f24cb 2258 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
98d0ee4a
BH
2259 }
2260 catch(...) {
2261 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
2262 }
1f4abb20
BH
2263 }
2264 L<<Logger::Error<<"No working multiplexer found!"<<endl;
2265 exit(1);
2266}
2267
3ddb9247 2268
0f39c1a3 2269string* doReloadLuaScript()
4485aa35 2270{
674cf0f6 2271 string fname= ::arg()["lua-dns-script"];
4485aa35 2272 try {
674cf0f6
BH
2273 if(fname.empty()) {
2274 t_pdl->reset();
2275 L<<Logger::Error<<t_id<<" Unloaded current lua script"<<endl;
0f39c1a3 2276 return new string("unloaded\n");
4485aa35
BH
2277 }
2278 else {
a3e7b735 2279 *t_pdl = shared_ptr<RecursorLua4>(new RecursorLua4(fname));
4485aa35
BH
2280 }
2281 }
fdbf35ac 2282 catch(std::exception& e) {
674cf0f6 2283 L<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
0f39c1a3 2284 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
4485aa35 2285 }
3ddb9247 2286
674cf0f6 2287 L<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
0f39c1a3 2288 return new string("(re)loaded '"+fname+"'\n");
4485aa35
BH
2289}
2290
49a699c4
BH
2291string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2292{
3ddb9247 2293 if(begin != end)
49a699c4 2294 ::arg().set("lua-dns-script") = *begin;
3ddb9247 2295
0f39c1a3 2296 return broadcastAccFunction<string>(doReloadLuaScript);
3ddb9247 2297}
49a699c4 2298
77499b05
BH
2299string* pleaseUseNewTraceRegex(const std::string& newRegex)
2300try
2301{
2302 if(newRegex.empty()) {
2303 t_traceRegex->reset();
2304 return new string("unset\n");
2305 }
2306 else {
2307 (*t_traceRegex) = shared_ptr<Regex>(new Regex(newRegex));
2308 return new string("ok\n");
2309 }
2310}
3f81d239 2311catch(PDNSException& ae)
77499b05
BH
2312{
2313 return new string(ae.reason+"\n");
2314}
2315
2316string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2317{
2318 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
2319}
2320
4e9a20e6 2321static void checkLinuxIPv6Limits()
2322{
2323#ifdef __linux__
2324 string line;
2325 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
335da0ba 2326 int lim=std::stoi(line);
4e9a20e6 2327 if(lim < 16384) {
36849ff2 2328 L<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
4e9a20e6 2329 }
2330 }
2331#endif
2332}
36849ff2 2333static void checkOrFixFDS()
4e9a20e6 2334{
c0063e60 2335 unsigned int availFDs=getFilenumLimit();
2336 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
2337
2338 if(wantFDs > availFDs) {
067ad20e 2339 unsigned int hardlimit= getFilenumLimit(true);
2340 if(hardlimit >= wantFDs) {
c0063e60 2341 setFilenumLimit(wantFDs);
2342 L<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
36849ff2 2343 }
2344 else {
067ad20e 2345 int newval = (hardlimit - 25) / g_numWorkerThreads;
2346 L<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
36849ff2 2347 g_maxMThreads = newval;
067ad20e 2348 setFilenumLimit(hardlimit);
36849ff2 2349 }
2350 }
4e9a20e6 2351}
77499b05 2352
bb4bdbaf 2353void* recursorThread(void*);
51e2144e 2354
3427fa8a 2355void* pleaseSupplantACLs(NetmaskGroup *ng)
49a699c4
BH
2356{
2357 t_allowFrom = ng;
3427fa8a 2358 return 0;
49a699c4
BH
2359}
2360
dbd23fc2
BH
2361int g_argc;
2362char** g_argv;
2363
18af64a8 2364void parseACLs()
f7c1d4e3 2365{
18af64a8 2366 static bool l_initialized;
3ddb9247 2367
49a699c4 2368 if(l_initialized) { // only reload configuration file on second call
18af64a8
BH
2369 string configname=::arg()["config-dir"]+"/recursor.conf";
2370 cleanSlashes(configname);
3ddb9247
PD
2371
2372 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
7e818521 2373 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
49a699c4 2374 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
242b90e1 2375 ::arg().preParseFile(configname.c_str(), "include-dir");
829849d6
AT
2376 ::arg().preParse(g_argc, g_argv, "include-dir");
2377
2378 // then process includes
2379 std::vector<std::string> extraConfigs;
242b90e1
AT
2380 ::arg().gatherIncludes(extraConfigs);
2381
1dc8f4d0 2382 for(const std::string& fn : extraConfigs) {
7e818521 2383 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
2384 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
2385 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
2386 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
829849d6 2387 }
ca2c884c
AT
2388
2389 ::arg().preParse(g_argc, g_argv, "allow-from-file");
2390 ::arg().preParse(g_argc, g_argv, "allow-from");
f27e6356 2391 }
49a699c4
BH
2392
2393 NetmaskGroup* oldAllowFrom = t_allowFrom, *allowFrom=new NetmaskGroup;
3ddb9247 2394
2c95fc65
BH
2395 if(!::arg()["allow-from-file"].empty()) {
2396 string line;
2c95fc65
BH
2397 ifstream ifs(::arg()["allow-from-file"].c_str());
2398 if(!ifs) {
3ddb9247 2399 delete allowFrom;
9c61b9d0 2400 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2c95fc65
BH
2401 }
2402
2403 string::size_type pos;
2404 while(getline(ifs,line)) {
2405 pos=line.find('#');
2406 if(pos!=string::npos)
2407 line.resize(pos);
2408 trim(line);
2409 if(line.empty())
2410 continue;
2411
18af64a8 2412 allowFrom->addMask(line);
2c95fc65 2413 }
49a699c4 2414 L<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2c95fc65
BH
2415 }
2416 else if(!::arg()["allow-from"].empty()) {
f7c1d4e3
BH
2417 vector<string> ips;
2418 stringtok(ips, ::arg()["allow-from"], ", ");
3ddb9247 2419
f7c1d4e3
BH
2420 L<<Logger::Warning<<"Only allowing queries from: ";
2421 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
18af64a8 2422 allowFrom->addMask(*i);
f7c1d4e3 2423 if(i!=ips.begin())
674cf0f6 2424 L<<Logger::Warning<<", ";
f7c1d4e3
BH
2425 L<<Logger::Warning<<*i;
2426 }
2427 L<<Logger::Warning<<endl;
2428 }
49a699c4 2429 else {
3ddb9247 2430 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
49a699c4
BH
2431 L<<Logger::Error<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
2432 delete allowFrom;
2433 allowFrom = 0;
2434 }
3ddb9247 2435
49a699c4 2436 g_initialAllowFrom = allowFrom;
d7dae798 2437 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
49a699c4 2438 delete oldAllowFrom;
3ddb9247 2439
49a699c4 2440 l_initialized = true;
18af64a8
BH
2441}
2442
795215f2 2443boost::optional<Netmask> getEDNSSubnetMask(const ComboAddress& local, const DNSName&dn, const ComboAddress& rem)
2444{
2445 if(local.sin4.sin_family != AF_INET || local.sin4.sin_addr.s_addr) { // detect unset 'requestor'
2446 if(g_ednsdomains.check(dn) || g_ednssubnets.match(rem)) {
1353273b 2447 int bits =local.sin4.sin_family == AF_INET ? 24 : 56;
795215f2 2448 ComboAddress trunc(local);
2449 trunc.truncate(bits);
2450 return boost::optional<Netmask>(Netmask(trunc, bits));
2451 }
2452 }
2453 return boost::optional<Netmask>();
2454}
2455
2456void parseEDNSSubnetWhitelist(const std::string& wlist)
2457{
2458 vector<string> parts;
39588f55 2459 stringtok(parts, wlist, ",; ");
795215f2 2460 for(const auto& a : parts) {
2461 try {
2462 Netmask nm(a);
2463 g_ednssubnets.addMask(nm);
2464 }
2465 catch(...) {
2466 g_ednsdomains.add(DNSName(a));
2467 }
2468 }
2469}
2470
756e82cf 2471SuffixMatchNode g_delegationOnly;
2472static void setupDelegationOnly()
2473{
2474 vector<string> parts;
2475 stringtok(parts, ::arg()["delegation-only"], ", \t");
2476 for(const auto& p : parts) {
2477 g_delegationOnly.add(DNSName(p));
2478 }
2479}
795215f2 2480
18af64a8
BH
2481int serviceMain(int argc, char*argv[])
2482{
5124de27 2483 L.setName(s_programname);
18af64a8 2484 L.setLoglevel((Logger::Urgency)(6)); // info and up
b6cfa948 2485 L.disableSyslog(::arg().mustDo("disable-syslog"));
18af64a8
BH
2486
2487 if(!::arg()["logging-facility"].empty()) {
f8499e52
BH
2488 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
2489 if(val >= 0)
2490 theL().setFacility(val);
18af64a8
BH
2491 else
2492 L<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
2493 }
2494
ba1a571d 2495 showProductVersion();
18af64a8 2496 seedRandom(::arg()["entropy-source"]);
3afde9b2 2497
06ea9015 2498 g_disthashseed=dns_random(0xffffffff);
2499
ad42489c 2500 loadRecursorLuaConfig(::arg()["lua-config-file"]);
2501
18af64a8 2502 parseACLs();
92011b8f 2503 sortPublicSuffixList();
2504
eb5bae86
BH
2505 if(!::arg()["dont-query"].empty()) {
2506 g_dontQuery=new NetmaskGroup;
2507 vector<string> ips;
2508 stringtok(ips, ::arg()["dont-query"], ", ");
66e0b6ea
BH
2509 ips.push_back("0.0.0.0");
2510 ips.push_back("::");
c36bc97a 2511
eb5bae86
BH
2512 L<<Logger::Warning<<"Will not send queries to: ";
2513 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
2514 g_dontQuery->addMask(*i);
2515 if(i!=ips.begin())
4957a608 2516 L<<Logger::Warning<<", ";
eb5bae86
BH
2517 L<<Logger::Warning<<*i;
2518 }
2519 L<<Logger::Warning<<endl;
2520 }
2521
f7c1d4e3 2522 g_quiet=::arg().mustDo("quiet");
3ddb9247 2523
1bc3c142
BH
2524 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
2525 if(g_weDistributeQueries) {
2526 L<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
2527 }
3ddb9247 2528
756e82cf 2529 setupDelegationOnly();
b33c2462 2530 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
756e82cf 2531
ea9ddf87 2532 if(::arg()["dnssec"]=="off")
12ce523e 2533 g_dnssecmode=DNSSECMode::Off;
a6415142
PL
2534 else if(::arg()["dnssec"]=="process-no-validate")
2535 g_dnssecmode=DNSSECMode::ProcessNoValidate;
ea9ddf87 2536 else if(::arg()["dnssec"]=="process")
12ce523e 2537 g_dnssecmode=DNSSECMode::Process;
2538 else if(::arg()["dnssec"]=="validate")
ea9ddf87 2539 g_dnssecmode=DNSSECMode::ValidateAll;
12ce523e 2540 else if(::arg()["dnssec"]=="log-fail")
2541 g_dnssecmode=DNSSECMode::ValidateForLog;
2542 else {
2543 L<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
2544 exit(1);
2545 }
2546
c87e1876
PL
2547 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
2548
77499b05
BH
2549 if(::arg()["trace"]=="fail") {
2550 SyncRes::setDefaultLogMode(SyncRes::Store);
2551 }
2552 else if(::arg().mustDo("trace")) {
2553 SyncRes::setDefaultLogMode(SyncRes::Log);
f7c1d4e3
BH
2554 ::arg().set("quiet")="no";
2555 g_quiet=false;
3e9c6c0a 2556 g_dnssecLOG=true;
f7c1d4e3 2557 }
3ddb9247 2558
aadceba8 2559 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
2560
4e9a20e6 2561 checkLinuxIPv6Limits();
5a38281c 2562 try {
3ddb9247 2563 vector<string> addrs;
5a38281c
BH
2564 if(!::arg()["query-local-address6"].empty()) {
2565 SyncRes::s_doIPv6=true;
d4fb76e9 2566 L<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
3ddb9247 2567
5a38281c 2568 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
1dc8f4d0 2569 for(const string& addr : addrs) {
4957a608 2570 g_localQueryAddresses6.push_back(ComboAddress(addr));
5a38281c
BH
2571 }
2572 }
d4fb76e9
BH
2573 else {
2574 L<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
2575 }
5a38281c
BH
2576 addrs.clear();
2577 stringtok(addrs, ::arg()["query-local-address"], ", ;");
1dc8f4d0 2578 for(const string& addr : addrs) {
5a38281c
BH
2579 g_localQueryAddresses4.push_back(ComboAddress(addr));
2580 }
2581 }
2582 catch(std::exception& e) {
2583 L<<Logger::Error<<"Assigning local query addresses: "<<e.what();
2584 exit(99);
f7c1d4e3 2585 }
f555e92e 2586
1051f8a9
BH
2587 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
2588
f7c1d4e3 2589 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
c3e753c7 2590 SyncRes::s_maxcachettl=::arg().asNum("max-cache-ttl");
1051f8a9 2591 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
79ec0627
PL
2592 // Cap the packetcache-servfail-ttl to the packetcache-ttl
2593 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
2594 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
628e2c7b
PA
2595 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
2596 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
f7c1d4e3 2597 SyncRes::s_serverID=::arg()["server-id"];
173d790e 2598 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
9de3e034 2599 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
01402d56 2600 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
f7c1d4e3
BH
2601 if(SyncRes::s_serverID.empty()) {
2602 char tmp[128];
2603 gethostname(tmp, sizeof(tmp)-1);
2604 SyncRes::s_serverID=tmp;
2605 }
3ddb9247 2606
5b0ddd18 2607 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
bb4bdbaf 2608
49a699c4 2609 g_initialDomainMap = parseAuthAndForwards();
3ddb9247 2610
08f3f638 2611 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3ddb9247 2612
f7c1d4e3 2613 g_logCommonErrors=::arg().mustDo("log-common-errors");
e661a20b
PD
2614
2615 g_anyToTcp = ::arg().mustDo("any-to-tcp");
a09a8ce0
PD
2616 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
2617
b3adda56
PD
2618 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
2619
f7c1d4e3
BH
2620 makeUDPServerSockets();
2621 makeTCPServerSockets();
815099b2 2622
376effcf 2623 parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
2624
677e2a46
BH
2625 int forks;
2626 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
1bc3c142
BH
2627 if(!fork()) // we are child
2628 break;
2629 }
3ddb9247 2630
f7c1d4e3
BH
2631 if(::arg().mustDo("daemon")) {
2632 L<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
2633 L.toConsole(Logger::Critical);
f7c1d4e3
BH
2634 daemonize();
2635 }
2636 signal(SIGUSR1,usr1Handler);
2637 signal(SIGUSR2,usr2Handler);
2638 signal(SIGPIPE,SIG_IGN);
a6414fdc 2639 g_numThreads = ::arg().asNum("threads") + ::arg().mustDo("pdns-distributes-queries");
c0063e60 2640 g_numWorkerThreads = ::arg().asNum("threads");
a6414fdc
AT
2641 g_maxMThreads = ::arg().asNum("max-mthreads");
2642 checkOrFixFDS();
3ddb9247 2643
3afde9b2
PL
2644 openssl_thread_setup();
2645 openssl_seed();
2646
138435cb
BH
2647 int newgid=0;
2648 if(!::arg()["setgid"].empty())
2649 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
2650 int newuid=0;
2651 if(!::arg()["setuid"].empty())
2652 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
2653
f1d6a7ce
KM
2654 Utility::dropGroupPrivs(newuid, newgid);
2655
138435cb
BH
2656 if (!::arg()["chroot"].empty()) {
2657 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
2658 L<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
2659 exit(1);
2660 }
f0f3f0b0
PL
2661 else
2662 L<<Logger::Error<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
138435cb
BH
2663 }
2664
f0f3f0b0
PL
2665 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
2666 if(!s_pidfname.empty())
2667 unlink(s_pidfname.c_str()); // remove possible old pid file
2668 writePid();
2669
2670 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
2671
f1d6a7ce 2672 Utility::dropUserPrivs(newuid);
c0063e60 2673
49a699c4 2674 makeThreadPipes();
3ddb9247 2675
5d4dd7fe
BH
2676 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
2677 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
343257a4 2678
c3828c03 2679 if(g_numThreads == 1) {
76698c6e 2680 L<<Logger::Warning<<"Operating unthreaded"<<endl;
6b6720de
PL
2681#ifdef HAVE_SYSTEMD
2682 sd_notify(0, "READY=1");
2683#endif
76698c6e
BH
2684 recursorThread(0);
2685 }
2686 else {
2687 pthread_t tid;
c3828c03
BH
2688 L<<Logger::Warning<<"Launching "<< g_numThreads <<" threads"<<endl;
2689 for(unsigned int n=0; n < g_numThreads; ++n) {
77499b05 2690 pthread_create(&tid, 0, recursorThread, (void*)(long)n);
76698c6e
BH
2691 }
2692 void* res;
6b6720de
PL
2693#ifdef HAVE_SYSTEMD
2694 sd_notify(0, "READY=1");
2695#endif
76698c6e 2696 pthread_join(tid, &res);
bb4bdbaf 2697 }
bb4bdbaf
BH
2698 return 0;
2699}
2700
2701void* recursorThread(void* ptr)
2702try
2703{
2e2cd8ec 2704 t_id=(int) (long) ptr;
49a699c4 2705 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
ac0e821b 2706 t_sstorage->domainmap = g_initialDomainMap;
49a699c4
BH
2707 t_allowFrom = g_initialAllowFrom;
2708 t_udpclientsocks = new UDPClientSocks();
bd0289fc 2709 t_tcpClientCounts = new tcpClientCounts_t();
49a699c4 2710 primeHints();
3ddb9247 2711
49a699c4 2712 t_packetCache = new RecursorPacketCache();
3ddb9247 2713
aa7929a3
RG
2714#ifdef HAVE_PROTOBUF
2715 t_uuidGenerator = new boost::uuids::random_generator();
2716#endif
49a699c4 2717 L<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3ddb9247 2718
a3e7b735 2719 t_pdl = new shared_ptr<RecursorLua4>();
3ddb9247 2720
674cf0f6
BH
2721 try {
2722 if(!::arg()["lua-dns-script"].empty()) {
a3e7b735 2723 *t_pdl = shared_ptr<RecursorLua4>(new RecursorLua4(::arg()["lua-dns-script"]));
674cf0f6
BH
2724 L<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
2725 }
674cf0f6
BH
2726 }
2727 catch(std::exception &e) {
2728 L<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
62f0ae62 2729 _exit(99);
674cf0f6 2730 }
3ddb9247 2731
77499b05 2732 t_traceRegex = new shared_ptr<Regex>();
f8f243b0 2733 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
92011b8f 2734 if(ringsize) {
60c8afa8 2735 t_remotes = new addrringbuf_t();
f8f243b0 2736 if(g_weDistributeQueries) // if so, only 1 thread does recvfrom
3ddb9247 2737 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries"));
f8f243b0 2738 else
3ddb9247 2739 t_remotes->set_capacity(ringsize);
60c8afa8 2740 t_servfailremotes = new addrringbuf_t();
3ddb9247 2741 t_servfailremotes->set_capacity(ringsize);
60c8afa8 2742 t_largeanswerremotes = new addrringbuf_t();
3ddb9247 2743 t_largeanswerremotes->set_capacity(ringsize);
92011b8f 2744
c5c066bf 2745 t_queryring = new boost::circular_buffer<pair<DNSName, uint16_t> >();
3ddb9247 2746 t_queryring->set_capacity(ringsize);
c5c066bf 2747 t_servfailqueryring = new boost::circular_buffer<pair<DNSName, uint16_t> >();
3ddb9247 2748 t_servfailqueryring->set_capacity(ringsize);
92011b8f 2749 }
3ddb9247 2750
bb4bdbaf 2751 MT=new MTasker<PacketID,string>(::arg().asNum("stack-size"));
3ddb9247 2752
bb4bdbaf
BH
2753 PacketID pident;
2754
2755 t_fdm=getMultiplexer();
f3d1d67b 2756 if(!t_id) {
d07bf7ff 2757 if(::arg().mustDo("webserver")) {
30a1aa92 2758 L<<Logger::Warning << "Enabling web server" << endl;
8989097d 2759 try {
1ce57618 2760 new RecursorWebServer(t_fdm);
8989097d
CH
2761 }
2762 catch(PDNSException &e) {
2763 L<<Logger::Error<<"Exception: "<<e.reason<<endl;
2764 exit(99);
2765 }
f3d1d67b 2766 }
83252304 2767 L<<Logger::Error<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
f3d1d67b 2768 }
83252304 2769
49a699c4 2770 t_fdm->addReadFD(g_pipes[t_id].readToThread, handlePipeRequest);
83252304 2771
1bc3c142 2772 if(!g_weDistributeQueries || !t_id) // if we distribute queries, only t_id = 0 listens
3ddb9247 2773 for(deferredAdd_t::const_iterator i=deferredAdd.begin(); i!=deferredAdd.end(); ++i)
1bc3c142 2774 t_fdm->addReadFD(i->first, i->second);
3ddb9247 2775
674cf0f6 2776 if(!t_id) {
674cf0f6
BH
2777 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
2778 }
1bc3c142 2779
f7c1d4e3 2780 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3ddb9247 2781
f7c1d4e3 2782 bool listenOnTCP(true);
49a699c4 2783
2c78bd57 2784 time_t last_carbon=0;
2785 time_t carbonInterval=::arg().asNum("carbon-interval");
cc59bce6 2786 counter=AtomicCounter(0); // used to periodically execute certain tasks
f7c1d4e3 2787 for(;;) {
ac0e821b 2788 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3ddb9247 2789
3427fa8a
BH
2790 if(!(counter%500)) {
2791 MT->makeThread(houseKeeping, 0);
f7c1d4e3
BH
2792 }
2793
d2392145 2794 if(!(counter%55)) {
d8f6d49f 2795 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
bb4bdbaf 2796 expired_t expired=t_fdm->getTimeouts(g_now);
3ddb9247 2797
f7c1d4e3 2798 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
cd989c87 2799 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4957a608 2800 if(g_logCommonErrors)
cd989c87 2801 L<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toString() <<endl;
4957a608 2802 t_fdm->removeReadFD(i->first);
f7c1d4e3
BH
2803 }
2804 }
3ddb9247 2805
f7c1d4e3
BH
2806 counter++;
2807
3427fa8a 2808 if(!t_id && statsWanted) {
f7c1d4e3
BH
2809 doStats();
2810 }
2811
2812 Utility::gettimeofday(&g_now, 0);
2c78bd57 2813
2814 if(!t_id && (g_now.tv_sec - last_carbon >= carbonInterval)) {
2815 MT->makeThread(doCarbonDump, 0);
2816 last_carbon = g_now.tv_sec;
2817 }
2818
bb4bdbaf 2819 t_fdm->run(&g_now);
3ea54bf0 2820 // 'run' updates g_now for us
f7c1d4e3 2821
b8ef5c5c 2822 if(!g_weDistributeQueries || !t_id) { // if pdns distributes queries, only tid 0 should do this
5c889cf5 2823 if(listenOnTCP) {
2824 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
2825 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
2826 t_fdm->removeReadFD(*i);
2827 listenOnTCP=false;
2828 }
f7c1d4e3 2829 }
5c889cf5 2830 else {
2831 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
2832 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
2833 t_fdm->addReadFD(*i, handleNewTCPQuestion);
2834 listenOnTCP=true;
2835 }
f7c1d4e3
BH
2836 }
2837 }
2838 }
2839}
3f81d239 2840catch(PDNSException &ae) {
bb4bdbaf
BH
2841 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
2842 return 0;
2843}
2844catch(std::exception &e) {
2845 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
2846 return 0;
2847}
2848catch(...) {
2849 L<<Logger::Error<<"any other exception in main: "<<endl;
2850 return 0;
2851}
2852
51e2144e 2853
3ddb9247 2854int main(int argc, char **argv)
288f4aa9 2855{
dbd23fc2
BH
2856 g_argc = argc;
2857 g_argv = argv;
5e3de507 2858 g_stats.startupTime=time(0);
3e135495 2859 versionSetProduct(ProductRecursor);
8a63d3ce 2860 reportBasicTypes();
0007c2e5 2861 reportOtherTypes();
ea634573 2862
22030c37 2863 int ret = EXIT_SUCCESS;
caa6eefa 2864
288f4aa9 2865 try {
f888311c 2866 ::arg().set("stack-size","stack size per mthread")="200000";
2e3d8a19 2867 ::arg().set("soa-minimum-ttl","Don't change")="0";
2e3d8a19 2868 ::arg().set("no-shuffle","Don't change")="off";
2e3d8a19 2869 ::arg().set("local-port","port to listen on")="53";
32252594 2870 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
fec7dd5a 2871 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
77499b05 2872 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
a6415142 2873 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
c87e1876 2874 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
d3f809bf 2875 ::arg().set("daemon","Operate as a daemon")="no";
191f2e47 2876 ::arg().setSwitch("write-pid","Write a PID file")="yes";
34162f8f 2877 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="4";
b6cfa948 2878 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
0e9d9ce2 2879 ::arg().set("log-common-errors","If we should log rather common errors")="yes";
2e3d8a19
BH
2880 ::arg().set("chroot","switch to chroot jail")="";
2881 ::arg().set("setgid","If set, change group id to this gid for more security")="";
2882 ::arg().set("setuid","If set, change user id to this uid for more security")="";
5b0ddd18 2883 ::arg().set("network-timeout", "Wait this nummer of milliseconds for network i/o")="1500";
bb4bdbaf 2884 ::arg().set("threads", "Launch this number of threads")="2";
adabfcb9 2885 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
5124de27 2886 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
d07bf7ff 2887 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
479e0976
CH
2888 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
2889 ::arg().set("api-logfile", "Location of the server logfile (used by the REST API)") = "/var/log/pdns.log";
2890 ::arg().set("api-readonly", "Disallow data modification through the REST API when set") = "no";
2891 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
d07bf7ff
PL
2892 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
2893 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
2894 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
69e7f117 2895 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="0.0.0.0/0,::/0";
cc08b5a9 2896 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
2c78bd57 2897 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server")="";
2898 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
c038218b 2899 ::arg().set("quiet","Suppress logging of questions and answers")="";
f27e6356 2900 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
2e3d8a19 2901 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
fdbf35ac
BH
2902 ::arg().set("socket-owner","Owner of socket")="";
2903 ::arg().set("socket-group","Group of socket")="";
2904 ::arg().set("socket-mode", "Permissions for socket")="";
3ddb9247 2905
f0f3f0b0 2906 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
2e3d8a19
BH
2907 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
2908 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
d4fb76e9 2909 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
2e3d8a19 2910 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
85c32340 2911 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
2e3d8a19 2912 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
324dc148 2913 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
979edd70 2914 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
2e3d8a19 2915 ::arg().set("hint-file", "If set, load root hints from this file")="";
b45eb27c 2916 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
a9af3782 2917 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
c3e753c7 2918 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
1051f8a9 2919 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
927c12b0 2920 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
1051f8a9 2921 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
7f7b8d55 2922 ::arg().set("server-id", "Returned when queried for 'server.id' TXT or NSID, defaults to hostname")="";
92011b8f 2923 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
ba1a571d 2924 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
49a699c4 2925 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
2c95fc65 2926 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
51e2144e 2927 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3ddb9247 2928 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4e120339 2929 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
0d5f0a9f 2930 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4ef015cd 2931 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
5605c067 2932 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3e61e7f7 2933 ::arg().set("lua-config-file", "More powerful configuration options")="";
644dd1da 2934
5605c067 2935 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
927c12b0
BH
2936 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
2937 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
5605c067 2938 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
ac0b4eb3 2939 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3ea54bf0 2940 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
9bc8c14c 2941 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="";
4485aa35 2942 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
08f3f638 2943 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3ddb9247 2944 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
3f975863 2945 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
966d3ba8 2946 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="";
605038a7 2947 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="";
e661a20b 2948 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
b3adda56 2949 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
a09a8ce0 2950 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1680";
b33c2462 2951 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1680";
aadceba8 2952 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
173d790e 2953 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
c5950146 2954 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
a09a8ce0 2955
68e6df3c 2956 ::arg().set("include-dir","Include *.conf files from this directory")="";
d67620e4 2957 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
2332f42d 2958
2959 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
2e3d8a19
BH
2960
2961 ::arg().setCmd("help","Provide a helpful message");
ba1a571d 2962 ::arg().setCmd("version","Print version string");
d5141417 2963 ::arg().setCmd("config","Output blank configuration");
f27e6356 2964 L.toConsole(Logger::Info);
2e3d8a19 2965 ::arg().laxParse(argc,argv); // do a lax parse
c75a6a9e 2966
2d733c0f
CH
2967 string configname=::arg()["config-dir"]+"/recursor.conf";
2968 if(::arg()["config-name"]!="") {
2969 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
5124de27 2970 s_programname+="-"+::arg()["config-name"];
2d733c0f
CH
2971 }
2972 cleanSlashes(configname);
5124de27 2973
577cf284
BH
2974 if(::arg().mustDo("config")) {
2975 cout<<::arg().configstring()<<endl;
2976 exit(0);
2977 }
2978
3ddb9247 2979 if(!::arg().file(configname.c_str()))
c75a6a9e
BH
2980 L<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
2981
2e3d8a19 2982 ::arg().parse(argc,argv);
c836dc19 2983
f0f3f0b0
PL
2984 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() && !::arg().mustDo("api-readonly") ) {
2985 L<<Logger::Error<<"Using chroot and a writable API is not possible"<<endl;
2986 exit(EXIT_FAILURE);
2987 }
2988
2989 if (::arg()["socket-dir"].empty()) {
2990 if (::arg()["chroot"].empty())
2991 ::arg().set("socket-dir") = LOCALSTATEDIR;
2992 else
2993 ::arg().set("socket-dir") = "/";
2994 }
2995
2e3d8a19 2996 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
562588a3 2997
61d74169 2998 if(::arg().asNum("threads")==1)
2999 ::arg().set("pdns-distributes-queries")="no";
3000
2e3d8a19 3001 if(::arg().mustDo("help")) {
ff5ba4f9
WA
3002 cout<<"syntax:"<<endl<<endl;
3003 cout<<::arg().helpstring(::arg()["help"])<<endl;
3004 exit(0);
b636533b 3005 }
5e3de507 3006 if(::arg().mustDo("version")) {
ba1a571d 3007 showProductVersion();
3613a51c 3008 showBuildConfiguration();
67076869 3009 exit(0);
5e3de507 3010 }
b636533b 3011
34162f8f 3012 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
f48d7b65 3013
34162f8f
CH
3014 if (logUrgency < Logger::Error)
3015 logUrgency = Logger::Error;
f48d7b65 3016 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
3017 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
3018 }
34162f8f
CH
3019 L.setLoglevel(logUrgency);
3020 L.toConsole(logUrgency);
3021
f7c1d4e3 3022 serviceMain(argc, argv);
288f4aa9 3023 }
3f81d239 3024 catch(PDNSException &ae) {
c836dc19 3025 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
22030c37 3026 ret=EXIT_FAILURE;
288f4aa9 3027 }
fdbf35ac 3028 catch(std::exception &e) {
c836dc19 3029 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
22030c37 3030 ret=EXIT_FAILURE;
288f4aa9
BH
3031 }
3032 catch(...) {
c836dc19 3033 L<<Logger::Error<<"any other exception in main: "<<endl;
22030c37 3034 ret=EXIT_FAILURE;
288f4aa9 3035 }
3ddb9247 3036
22030c37 3037 return ret;
288f4aa9 3038}