]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/pdns_recursor.cc
shave off 23 mallocs per cache miss
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
CommitLineData
288f4aa9
BH
1/*
2 PowerDNS Versatile Database Driven Nameserver
183eb877 3 Copyright (C) 2003 - 2015 PowerDNS.COM BV
288f4aa9
BH
4
5 This program is free software; you can redistribute it and/or modify
3ddb9247 6 it under the terms of the GNU General Public License version 2
f28307ad 7 as published by the Free Software Foundation
288f4aa9 8
f782fe38
MH
9 Additionally, the license of this program contains a special
10 exception which allows to distribute the program in binary form when
11 it is linked against OpenSSL.
12
288f4aa9
BH
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
06bd9ccf 20 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
288f4aa9 21*/
caa6eefa 22
870a0fe4
AT
23#ifdef HAVE_CONFIG_H
24#include "config.h"
25#endif
76473b92
KM
26#include <netdb.h>
27#include <sys/stat.h>
28#include <unistd.h>
5a38281c 29#include <boost/foreach.hpp>
2470b36e 30#include "ws-recursor.hh"
49a699c4 31#include <pthread.h>
3ea54bf0 32#include "recpacketcache.hh"
3ddb9247 33#include "utility.hh"
51e2144e 34#include "dns_random.hh"
288f4aa9
BH
35#include <iostream>
36#include <errno.h>
81859ba5 37#include <boost/static_assert.hpp>
288f4aa9
BH
38#include <map>
39#include <set>
97bb160b 40#include "recursor_cache.hh"
38c9ceaa 41#include "cachecleaner.hh"
288f4aa9 42#include <stdio.h>
c75a6a9e 43#include <signal.h>
288f4aa9 44#include <stdlib.h>
bb4bdbaf 45#include "misc.hh"
288f4aa9
BH
46#include "mtasker.hh"
47#include <utility>
288f4aa9
BH
48#include "arguments.hh"
49#include "syncres.hh"
88def049
BH
50#include <fcntl.h>
51#include <fstream>
5c633640
BH
52#include "sstuff.hh"
53#include <boost/tuple/tuple.hpp>
54#include <boost/tuple/tuple_comparison.hpp>
72df400f 55#include <boost/shared_array.hpp>
ea634573 56#include <boost/lexical_cast.hpp>
7f1fa77d 57#include <boost/function.hpp>
5605c067 58#include <boost/algorithm/string.hpp>
40a3dd64 59#include <netinet/tcp.h>
ea634573
BH
60#include "dnsparser.hh"
61#include "dnswriter.hh"
62#include "dnsrecords.hh"
f814d7c8 63#include "zoneparser-tng.hh"
1d5b3ce6 64#include "rec_channel.hh"
aaacf7f2 65#include "logger.hh"
c8ddb7c2 66#include "iputils.hh"
09e6702a 67#include "mplexer.hh"
c038218b 68#include "config.h"
5704e107 69#include "lua-recursor.hh"
ba1a571d 70#include "version.hh"
79332bff 71#include "responsestats.hh"
d67620e4 72#include "secpoll-recursor.hh"
c5c066bf 73#include "dnsname.hh"
644dd1da 74#include "filterpo.hh"
75#include "rpzloader.hh"
a2bfc3ff
BH
76#ifndef RECURSOR
77#include "statbag.hh"
78StatBag S;
79#endif
80
bb4bdbaf 81__thread FDMultiplexer* t_fdm;
674cf0f6 82__thread unsigned int t_id;
09e6702a 83unsigned int g_maxTCPPerClient;
5b0ddd18 84unsigned int g_networkTimeoutMsec;
08f3f638 85uint64_t g_latencyStatSize;
09e6702a 86bool g_logCommonErrors;
e661a20b 87bool g_anyToTcp;
a09a8ce0 88uint16_t g_udpTruncationThreshold;
5704e107 89__thread shared_ptr<RecursorLua>* t_pdl;
60c8afa8 90
91__thread addrringbuf_t* t_remotes, *t_servfailremotes, *t_largeanswerremotes;
92
c5c066bf 93__thread boost::circular_buffer<pair<DNSName, uint16_t> >* t_queryring, *t_servfailqueryring;
77499b05 94__thread shared_ptr<Regex>* t_traceRegex;
674cf0f6 95
376effcf 96NetmaskGroup g_ednssubnets;
97SuffixMatchNode g_ednsdomains;
98
644dd1da 99DNSFilterEngine g_dfe;
100
d7dae798
BH
101RecursorControlChannel s_rcc; // only active in thread 0
102
103// for communicating with our threads
49a699c4
BH
104struct ThreadPipeSet
105{
106 int writeToThread;
107 int readToThread;
108 int writeFromThread;
109 int readFromThread;
110};
3ea54bf0 111
d7dae798 112vector<ThreadPipeSet> g_pipes; // effectively readonly after startup
5c633640 113
d7dae798 114SyncRes::domainmap_t* g_initialDomainMap; // new threads needs this to be setup
49a699c4
BH
115
116#include "namespaces.hh"
3ea54bf0 117
49a699c4 118__thread MemRecursorCache* t_RC;
16beeaa4 119__thread RecursorPacketCache* t_packetCache;
1d5b3ce6
BH
120RecursorStats g_stats;
121bool g_quiet;
49a699c4 122
1bc3c142
BH
123bool g_weDistributeQueries; // if true, only 1 thread listens on the incoming query sockets
124
41942bb3 125__thread NetmaskGroup* t_allowFrom;
49a699c4
BH
126static NetmaskGroup* g_initialAllowFrom; // new thread needs to be setup with this
127
eb5bae86 128NetmaskGroup* g_dontQuery;
2d733c0f 129string s_programname="pdns_recursor";
49a699c4 130
40a3dd64
BH
131typedef vector<int> tcpListenSockets_t;
132tcpListenSockets_t g_tcpListenSockets; // shared across threads, but this is fine, never written to from a thread. All threads listen on all sockets
3159c9ef 133int g_tcpTimeout;
85c32340 134unsigned int g_maxMThreads;
183eb877 135__thread struct timeval g_now; // timestamp, updated (too) frequently
84433b79 136typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
137listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
18af64a8 138
d7dae798
BH
139__thread MT_t* MT; // the big MTasker
140
f8f243b0 141unsigned int g_numThreads, g_numWorkerThreads;
c3828c03 142
12cd44ee 143#define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
144// Bad Nets taken from both:
3ddb9247 145// http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
12cd44ee 146// and
147// http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
148// where such a network may not be considered a valid destination
149#define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
150#define DONT_QUERY LOCAL_NETS ", " BAD_NETS
49a699c4 151
d7dae798 152//! used to send information to a newborn mthread
ea634573 153struct DNSComboWriter {
3ddb9247 154 DNSComboWriter(const char* data, uint16_t len, const struct timeval& now) : d_mdp(data, len), d_now(now),
232f0877 155 d_tcp(false), d_socket(-1)
ea634573
BH
156 {}
157 MOADNSParser d_mdp;
00c9b8c1 158 void setRemote(const ComboAddress* sa)
ea634573 159 {
37d3f960 160 d_remote=*sa;
ea634573
BH
161 }
162
b71b60ee 163 void setLocal(const ComboAddress& sa)
164 {
165 d_local=sa;
166 }
167
168
ea634573
BH
169 void setSocket(int sock)
170 {
171 d_socket=sock;
172 }
a1754c6a
BH
173
174 string getRemote() const
175 {
37d3f960 176 return d_remote.toString();
a1754c6a
BH
177 }
178
c9e9e5e0 179 struct timeval d_now;
b71b60ee 180 ComboAddress d_remote, d_local;
ea634573
BH
181 bool d_tcp;
182 int d_socket;
cd989c87 183 shared_ptr<TCPConnection> d_tcpConnection;
ea634573
BH
184};
185
186
288f4aa9
BH
187ArgvMap &arg()
188{
189 static ArgvMap theArg;
190 return theArg;
191}
4ef015cd 192
09e6702a 193
d8f6d49f 194void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 195
50c81227 196// -1 is error, 0 is timeout, 1 is success
3ddb9247 197int asendtcp(const string& data, Socket* sock)
5c633640
BH
198{
199 PacketID pident;
200 pident.sock=sock;
201 pident.outMSG=data;
3ddb9247 202
bb4bdbaf 203 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
50c81227 204 string packet;
5c633640 205
5b0ddd18 206 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
23db0a09 207
9170fbaf 208 if(!ret || ret==-1) { // timeout
bb4bdbaf 209 t_fdm->removeWriteFD(sock->getHandle());
5c633640 210 }
50c81227
BH
211 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
212 return -1;
213 }
9170fbaf 214 return ret;
5c633640
BH
215}
216
d8f6d49f 217void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 218
9170fbaf 219// -1 is error, 0 is timeout, 1 is success
825fa717 220int arecvtcp(string& data, int len, Socket* sock, bool incompleteOkay)
288f4aa9 221{
50c81227 222 data.clear();
5c633640
BH
223 PacketID pident;
224 pident.sock=sock;
225 pident.inNeeded=len;
825fa717 226 pident.inIncompleteOkay=incompleteOkay;
bb4bdbaf 227 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
5c633640 228
bb4bdbaf 229 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
9170fbaf 230 if(!ret || ret==-1) { // timeout
bb4bdbaf 231 t_fdm->removeReadFD(sock->getHandle());
288f4aa9 232 }
50c81227
BH
233 else if(data.empty()) {// error, EOF or other
234 return -1;
235 }
236
9170fbaf 237 return ret;
288f4aa9
BH
238}
239
fba1e944 240void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
4465e941 241{
fba1e944 242 PacketID pident=*any_cast<PacketID>(&var);
4465e941 243 char resp[512];
244 int ret=recv(fd, resp, sizeof(resp), 0);
245 t_fdm->removeReadFD(fd);
246 if(ret >= 0) {
247 string data(resp, ret);
fba1e944 248 MT->sendEvent(pident, &data);
4465e941 249 }
250 else {
fba1e944 251 string empty;
252 MT->sendEvent(pident, &empty);
253 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
4465e941 254 }
255}
fba1e944 256string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
4465e941 257{
4465e941 258 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
259 s.setNonBlocking();
260 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
261
262 s.bind(local);
263 s.connect(dest);
4465e941 264 s.send(query);
265
266 PacketID pident;
267 pident.sock=&s;
268 pident.type=0;
fba1e944 269 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
4465e941 270
271 string data;
fba1e944 272
4465e941 273 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
fba1e944 274
4465e941 275 if(!ret || ret==-1) { // timeout
4465e941 276 t_fdm->removeReadFD(s.getHandle());
277 }
278 else if(data.empty()) {// error, EOF or other
fba1e944 279 // we could special case this
4465e941 280 return data;
281 }
4465e941 282 return data;
283}
284
285
3ddb9247 286vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
046c5a5d 287const ComboAddress g_local4("0.0.0.0"), g_local6("::");
1652a63e 288
d7dae798 289//! pick a random query local address
1652a63e 290ComboAddress getQueryLocalAddress(int family, uint16_t port)
5a38281c 291{
1652a63e 292 ComboAddress ret;
5a38281c 293 if(family==AF_INET) {
3ddb9247 294 if(g_localQueryAddresses4.empty())
1652a63e 295 ret = g_local4;
3ddb9247 296 else
1652a63e
BH
297 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
298 ret.sin4.sin_port = htons(port);
5a38281c
BH
299 }
300 else {
301 if(g_localQueryAddresses6.empty())
1652a63e
BH
302 ret = g_local6;
303 else
304 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
3ddb9247 305
1652a63e 306 ret.sin6.sin6_port = htons(port);
5a38281c 307 }
1652a63e 308 return ret;
5a38281c 309}
4ef015cd 310
d8f6d49f 311void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
09e6702a 312
d7dae798
BH
313void setSocketBuffer(int fd, int optname, uint32_t size)
314{
315 uint32_t psize=0;
316 socklen_t len=sizeof(psize);
3ddb9247 317
d7dae798
BH
318 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
319 L<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
3ddb9247 320 return;
d7dae798
BH
321 }
322
323 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
c057bfaa 324 L<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
d7dae798
BH
325}
326
327
328static void setSocketReceiveBuffer(int fd, uint32_t size)
329{
330 setSocketBuffer(fd, SO_RCVBUF, size);
331}
332
333static void setSocketSendBuffer(int fd, uint32_t size)
334{
335 setSocketBuffer(fd, SO_SNDBUF, size);
336}
337
338
4ef015cd
BH
339// you can ask this class for a UDP socket to send a query from
340// this socket is not yours, don't even think about deleting it
341// but after you call 'returnSocket' on it, don't assume anything anymore
342class UDPClientSocks
343{
4ef015cd 344 unsigned int d_numsocks;
4ef015cd 345public:
e2642526 346 UDPClientSocks() : d_numsocks(0)
4ef015cd
BH
347 {
348 }
349
996c89cc 350 typedef set<int> socks_t;
4ef015cd
BH
351 socks_t d_socks;
352
d8f6d49f
BH
353 // returning -1 means: temporary OS error (ie, out of files), -2 means OS error
354 int getSocket(const ComboAddress& toaddr, int* fd)
4ef015cd 355 {
d8f6d49f
BH
356 *fd=makeClientSocket(toaddr.sin4.sin_family);
357 if(*fd < 0) // temporary error - receive exception otherwise
358 return -1;
359
360 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
361 int err = errno;
41ff43f8 362 // returnSocket(*fd);
3897b9e1 363 closesocket(*fd);
d8f6d49f 364 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
4957a608 365 return -2;
998a4334 366 return -1;
d8f6d49f 367 }
998a4334 368
d8f6d49f 369 d_socks.insert(*fd);
998a4334 370 d_numsocks++;
d8f6d49f 371 return 0;
4ef015cd
BH
372 }
373
095c3045
BH
374 void returnSocket(int fd)
375 {
376 socks_t::iterator i=d_socks.find(fd);
34801ab1 377 if(i==d_socks.end()) {
3f81d239 378 throw PDNSException("Trying to return a socket (fd="+lexical_cast<string>(fd)+") not in the pool");
34801ab1 379 }
bb4bdbaf 380 returnSocketLocked(i);
095c3045
BH
381 }
382
4ef015cd 383 // return a socket to the pool, or simply erase it
bb4bdbaf 384 void returnSocketLocked(socks_t::iterator& i)
4ef015cd 385 {
600fc20b 386 if(i==d_socks.end()) {
3f81d239 387 throw PDNSException("Trying to return a socket not in the pool");
600fc20b 388 }
80baf329 389 try {
bb4bdbaf 390 t_fdm->removeReadFD(*i);
80baf329
BH
391 }
392 catch(FDMultiplexerException& e) {
bb4bdbaf 393 // we sometimes return a socket that has not yet been assigned to t_fdm
80baf329 394 }
3897b9e1 395 closesocket(*i);
3ddb9247 396
998a4334
BH
397 d_socks.erase(i++);
398 --d_numsocks;
4ef015cd 399 }
d8f6d49f
BH
400
401 // returns -1 for errors which might go away, throws for ones that won't
bb4bdbaf 402 static int makeClientSocket(int family)
d8f6d49f 403 {
a903b39c 404 int ret=(int)socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
42c235e5 405
d8f6d49f
BH
406 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
407 return ret;
3ddb9247
PD
408
409 if(ret<0)
3f81d239 410 throw PDNSException("Making a socket for resolver (family = "+lexical_cast<string>(family)+"): "+stringerror());
36855b53 411
3897b9e1 412 setCloseOnExec(ret);
5a38281c 413
d8f6d49f 414 int tries=10;
3aa91c3e 415 ComboAddress sin;
d8f6d49f 416 while(--tries) {
1652a63e 417 uint16_t port;
3ddb9247 418
d8f6d49f 419 if(tries==1) // fall back to kernel 'random'
4957a608 420 port = 0;
1652a63e
BH
421 else
422 port = 1025 + dns_random(64510);
5a38281c 423
3aa91c3e 424 sin=getQueryLocalAddress(family, port); // does htons for us
5a38281c 425
3ddb9247 426 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
4957a608 427 break;
d8f6d49f
BH
428 }
429 if(!tries)
3aa91c3e 430 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
3ddb9247 431
3897b9e1 432 setNonBlocking(ret);
d8f6d49f
BH
433 return ret;
434 }
49a699c4
BH
435};
436
437static __thread UDPClientSocks* t_udpclientsocks;
4ef015cd 438
288f4aa9 439/* these two functions are used by LWRes */
34801ab1 440// -2 is OS error, -1 is error that depends on the remote, > 0 is success
3ddb9247
PD
441int asendto(const char *data, int len, int flags,
442 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
288f4aa9 443{
34801ab1
BH
444
445 PacketID pident;
787e5eab
BH
446 pident.domain = domain;
447 pident.remote = toaddr;
448 pident.type = qtype;
34801ab1
BH
449
450 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
451 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
452
453 for(; chain.first != chain.second; chain.first++) {
454 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
e27e91a8 455 /*
4665c31e
BH
456 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
457 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
4957a608 458 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
e27e91a8 459 */
34801ab1
BH
460 chain.first->key.chain.insert(id); // we can chain
461 *fd=-1; // gets used in waitEvent / sendEvent later on
462 return 1;
463 }
464 }
465
49a699c4 466 int ret=t_udpclientsocks->getSocket(toaddr, fd);
d8f6d49f
BH
467 if(ret < 0)
468 return ret;
34801ab1 469
998a4334
BH
470 pident.fd=*fd;
471 pident.id=id;
3ddb9247 472
bb4bdbaf
BH
473 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
474 ret = send(*fd, data, len, 0);
475
5b0ddd18 476 int tmp = errno;
bb4bdbaf 477
7302ed0a 478 if(ret < 0)
49a699c4 479 t_udpclientsocks->returnSocket(*fd);
bb4bdbaf 480
5b0ddd18 481 errno = tmp; // this is for logging purposes only
7302ed0a 482 return ret;
288f4aa9
BH
483}
484
9170fbaf 485// -1 is error, 0 is timeout, 1 is success
3ddb9247 486int arecvfrom(char *data, int len, int flags, const ComboAddress& fromaddr, int *d_len,
c5c066bf 487 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
288f4aa9 488{
0d5f0a9f 489 static optional<unsigned int> nearMissLimit;
3ddb9247 490 if(!nearMissLimit)
0d5f0a9f
BH
491 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
492
288f4aa9 493 PacketID pident;
4ef015cd 494 pident.fd=fd;
288f4aa9 495 pident.id=id;
0d5f0a9f 496 pident.domain=domain;
787e5eab 497 pident.type = qtype;
996c89cc 498 pident.remote=fromaddr;
b636533b 499
288f4aa9 500 string packet;
5b0ddd18 501 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
34801ab1 502
9170fbaf 503 if(ret > 0) {
996c89cc 504 if(packet.empty()) // means "error"
3ddb9247 505 return -1;
998a4334 506
705f31ae 507 *d_len=(int)packet.size();
9170fbaf 508 memcpy(data,packet.c_str(),min(len,*d_len));
0d5f0a9f 509 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
996c89cc 510 L<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
0d5f0a9f 511 g_stats.spoofCount++;
35ce8576
BH
512 return -1;
513 }
288f4aa9 514 }
09e6702a 515 else {
34801ab1 516 if(fd >= 0)
49a699c4 517 t_udpclientsocks->returnSocket(fd);
09e6702a 518 }
9170fbaf 519 return ret;
288f4aa9
BH
520}
521
aa4e4cbf 522
87a5ea63 523string s_pidfname;
88def049
BH
524static void writePid(void)
525{
191f2e47 526 if(!::arg().mustDo("write-pid"))
527 return;
18e7758c 528 ofstream of(s_pidfname.c_str(), std::ios_base::app);
88def049 529 if(of)
705f31ae 530 of<< Utility::getpid() <<endl;
88def049 531 else
c057bfaa 532 L<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
88def049
BH
533}
534
bd0289fc
BH
535typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
536tcpClientCounts_t __thread* t_tcpClientCounts;
0e9d9ce2 537
cd989c87 538TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : d_remote(addr), d_fd(fd)
3ddb9247
PD
539{
540 ++s_currentConnections;
cd989c87 541 (*t_tcpClientCounts)[d_remote]++;
0e408828 542}
cd989c87
BH
543
544TCPConnection::~TCPConnection()
0e408828 545{
3ddb9247 546 if(closesocket(d_fd) < 0)
cd989c87 547 unixDie("closing socket for TCPConnection");
3ddb9247 548 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
cd989c87 549 t_tcpClientCounts->erase(d_remote);
1bc9e6bd 550 --s_currentConnections;
0e408828 551}
0e9d9ce2 552
3ddb9247 553AtomicCounter TCPConnection::s_currentConnections;
d8f6d49f 554void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
6dcd28c3 555
92011b8f 556// the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
c5c066bf 557void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
2cc13433 558{
92011b8f 559 if(packetsize > 1000 && t_largeanswerremotes)
560 t_largeanswerremotes->push_back(remote);
2cc13433
BH
561 switch(res) {
562 case RCode::ServFail:
92011b8f 563 if(t_servfailremotes) {
564 t_servfailremotes->push_back(remote);
565 if(query) // packet cache
566 t_servfailqueryring->push_back(make_pair(*query, qtype));
567 }
2cc13433
BH
568 g_stats.servFails++;
569 break;
570 case RCode::NXDomain:
571 g_stats.nxDomains++;
572 break;
573 case RCode::NoError:
574 g_stats.noErrors++;
575 break;
576 }
577}
578
a903b39c 579static string makeLoginfo(DNSComboWriter* dc)
580try
581{
c5c066bf 582 return "("+dc->d_mdp.d_qname.toString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->d_remote.toString())+")";
a903b39c 583}
584catch(...)
585{
586 return "Exception making error message for exception";
587}
588
288f4aa9
BH
589void startDoResolve(void *p)
590{
7b1469bb 591 DNSComboWriter* dc=(DNSComboWriter *)p;
288f4aa9 592 try {
92011b8f 593 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
594
b18ace73 595 uint32_t maxanswersize= dc->d_tcp ? 65535 : min((uint16_t) 512, g_udpTruncationThreshold);
7f7b8d55 596 EDNSOpts edo;
56b4d21b 597 if(getEDNSOpts(dc->d_mdp, &edo) && !dc->d_tcp) {
b18ace73 598 maxanswersize = min(edo.d_packetsize, g_udpTruncationThreshold);
10321a98 599 }
3ddb9247 600 ComboAddress local;
84433b79 601 listenSocketsAddresses_t::const_iterator lociter;
e325f20c 602 vector<DNSRecord> ret;
ea634573 603 vector<uint8_t> packet;
b23b8614 604
3ddb9247 605 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
ea634573
BH
606
607 pw.getHeader()->aa=0;
608 pw.getHeader()->ra=1;
c154c8a4 609 pw.getHeader()->qr=1;
bb4bdbaf 610 pw.getHeader()->tc=0;
ea634573 611 pw.getHeader()->id=dc->d_mdp.d_header.id;
10321a98 612 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
57769f13 613 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
ea634573 614
904d3219
PD
615 uint32_t minTTL=std::numeric_limits<uint32_t>::max();
616
617 SyncRes sr(dc->d_now);
3457a2a0 618 if(t_pdl) {
619 sr.setLuaEngine(*t_pdl);
4ea94941 620 sr.d_requestor=dc->d_remote;
3457a2a0 621 }
57769f13 622
623 if(pw.getHeader()->cd || edo.d_Z & EDNSOpts::DNSSECOK)
624 sr.d_doDNSSEC=true;
625
904d3219
PD
626 bool tracedQuery=false; // we could consider letting Lua know about this too
627 bool variableAnswer = false;
628
56b4d21b 629 int res;
39ec5d29 630 DNSFilterEngine::Policy dfepol;
631 DNSRecord spoofed;
e661a20b 632 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
56b4d21b
PD
633 pw.getHeader()->tc = 1;
634 res = 0;
635 variableAnswer = true;
e661a20b
PD
636 goto sendit;
637 }
638
c5c066bf 639 if(t_traceRegex->get() && (*t_traceRegex)->match(dc->d_mdp.d_qname.toString())) {
77499b05
BH
640 sr.setLogMode(SyncRes::Store);
641 tracedQuery=true;
642 }
3ddb9247 643
77499b05 644 if(!g_quiet || tracedQuery)
461df9d2 645 L<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
8a63d3ce 646 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote()<<endl;
c75a6a9e 647
fededf47 648 sr.setId(MT->getTid());
67828389 649 if(!dc->d_mdp.d_header.rd)
c836dc19
BH
650 sr.setCacheOnly();
651
84433b79 652 local.sin4.sin_family = dc->d_remote.sin4.sin_family;
653
654 lociter = g_listenSocketsAddresses.find(dc->d_socket);
655 if(lociter != g_listenSocketsAddresses.end()) {
656 local = lociter->second;
657 }
658 else {
659 socklen_t len = local.getSocklen();
660 getsockname(dc->d_socket, (sockaddr*)&local, &len); // if this fails, we're ok with it
661 }
662
3ddb9247 663 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
e325f20c 664
39ec5d29 665 dfepol = g_dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_remote);
666
667 switch(dfepol.d_kind) {
668 case DNSFilterEngine::PolicyKind::NoAction:
644dd1da 669 break;
39ec5d29 670 case DNSFilterEngine::PolicyKind::Drop:
644dd1da 671 g_stats.policyDrops++;
672 delete dc;
673 dc=0;
674 return;
39ec5d29 675 case DNSFilterEngine::PolicyKind::NXDOMAIN:
644dd1da 676 res=RCode::NXDomain;
677 goto haveAnswer;
678
39ec5d29 679 case DNSFilterEngine::PolicyKind::NODATA:
680 res=RCode::NoError;
681 goto haveAnswer;
682
683 case DNSFilterEngine::PolicyKind::Custom:
644dd1da 684 res=RCode::NoError;
39ec5d29 685 spoofed.d_name=dc->d_mdp.d_qname;
5a1f298f 686 spoofed.d_type=dfepol.d_custom->getType();
39ec5d29 687 spoofed.d_ttl = 1234;
688 spoofed.d_class = 1;
689 spoofed.d_content = dfepol.d_custom;
589ad24b 690 spoofed.d_place = DNSResourceRecord::ANSWER;
39ec5d29 691 ret.push_back(spoofed);
644dd1da 692 goto haveAnswer;
693
39ec5d29 694
695 case DNSFilterEngine::PolicyKind::Truncate:
644dd1da 696 if(!dc->d_tcp) {
697 res=RCode::NoError;
698 pw.getHeader()->tc=1;
699 goto haveAnswer;
700 }
701 break;
702 }
703
84433b79 704 if(!t_pdl->get() || !(*t_pdl)->preresolve(dc->d_remote, local, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret, res, &variableAnswer)) {
44971ca0
PD
705 try {
706 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
707 }
708 catch(ImmediateServFailException &e) {
854d44e3 709 if(g_logCommonErrors)
710 L<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
44971ca0
PD
711 res = RCode::ServFail;
712 }
4485aa35 713
39ec5d29 714 dfepol = g_dfe.getPostPolicy(ret);
715 switch(dfepol.d_kind) {
716 case DNSFilterEngine::PolicyKind::NoAction:
644dd1da 717 break;
39ec5d29 718 case DNSFilterEngine::PolicyKind::Drop:
644dd1da 719 g_stats.policyDrops++;
720 delete dc;
721 dc=0;
722 return;
39ec5d29 723 case DNSFilterEngine::PolicyKind::NXDOMAIN:
644dd1da 724 ret.clear();
725 res=RCode::NXDomain;
726 goto haveAnswer;
727
39ec5d29 728 case DNSFilterEngine::PolicyKind::NODATA:
644dd1da 729 ret.clear();
730 res=RCode::NoError;
731 goto haveAnswer;
732
39ec5d29 733 case DNSFilterEngine::PolicyKind::Truncate:
644dd1da 734 if(!dc->d_tcp) {
735 ret.clear();
736 res=RCode::NoError;
737 pw.getHeader()->tc=1;
738 goto haveAnswer;
739 }
740 break;
39ec5d29 741
742 case DNSFilterEngine::PolicyKind::Custom:
743 res=RCode::NoError;
744 spoofed.d_name=dc->d_mdp.d_qname;
5a1f298f 745 spoofed.d_type=dfepol.d_custom->getType();
39ec5d29 746 spoofed.d_ttl = 1234;
747 spoofed.d_class = 1;
748 spoofed.d_content = dfepol.d_custom;
589ad24b 749 spoofed.d_place = DNSResourceRecord::ANSWER;
39ec5d29 750 ret.push_back(spoofed);
751 goto haveAnswer;
644dd1da 752 }
753
674cf0f6 754 if(t_pdl->get()) {
bd53ea9d 755 if(res == RCode::NoError) {
e325f20c 756 auto i=ret.cbegin();
757 for(; i!= ret.cend(); ++i)
e693ff5a 758 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
232f0877 759 break;
e325f20c 760 if(i == ret.cend())
84433b79 761 (*t_pdl)->nodata(dc->d_remote,local, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret, res, &variableAnswer);
232f0877
CH
762 }
763 else if(res == RCode::NXDomain)
644dd1da 764 (*t_pdl)->nxdomain(dc->d_remote,local, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret, res, &variableAnswer);
765
766
767 (*t_pdl)->postresolve(dc->d_remote,local, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret, res, &variableAnswer);
d2322a5e 768 }
4485aa35 769 }
644dd1da 770 haveAnswer:;
3e8216c8 771 if(res == PolicyDecision::DROP) {
e9c2ad3a 772 g_stats.policyDrops++;
ae7e77ad 773 delete dc;
774 dc=0;
775 return;
3ddb9247 776 }
3e8216c8 777 if(tracedQuery || res == PolicyDecision::PASS || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1dc8f4d0 778 {
85ffbc53
PD
779 string trace(sr.getTrace());
780 if(!trace.empty()) {
781 vector<string> lines;
782 boost::split(lines, trace, boost::is_any_of("\n"));
1dc8f4d0 783 for(const string& line : lines) {
85ffbc53
PD
784 if(!line.empty())
785 L<<Logger::Warning<< line << endl;
786 }
787 }
788 }
3ddb9247 789
3e8216c8 790 if(res == PolicyDecision::PASS) {
0fe1d080
PD
791 pw.getHeader()->rcode=RCode::ServFail;
792 // no commit here, because no record
793 g_stats.servFails++;
794 }
288f4aa9 795 else {
ea634573 796 pw.getHeader()->rcode=res;
92011b8f 797
c154c8a4 798 if(ret.size()) {
92476c8b 799 orderAndShuffle(ret);
e325f20c 800 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
e693ff5a 801 pw.startRecord(i->d_name, i->d_type, i->d_ttl, i->d_class, i->d_place);
e325f20c 802 minTTL = min(minTTL, i->d_ttl);
803 i->d_content->toPacket(pw);
dffbaa08 804 if(pw.size() > maxanswersize) {
4957a608 805 pw.rollback();
e693ff5a 806 if(i->d_place==DNSResourceRecord::ANSWER) // only truncate if we actually omitted parts of the answer
add935a2 807 {
4957a608 808 pw.getHeader()->tc=1;
add935a2
PD
809 pw.truncate();
810 }
4957a608
BH
811 goto sendit; // need to jump over pw.commit
812 }
813 }
b23b8614 814
18af64a8 815 pw.commit();
ea634573 816 }
288f4aa9 817 }
10321a98 818 sendit:;
79332bff 819 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
92011b8f 820 updateResponseStats(res, dc->d_remote, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
ea634573 821 if(!dc->d_tcp) {
b71b60ee 822 struct msghdr msgh;
823 struct iovec iov;
824 char cbuf[256];
825 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
826 if(dc->d_local.sin4.sin_family)
827 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local);
579cae19
PD
828 else
829 msgh.msg_control=NULL;
b71b60ee 830 sendmsg(dc->d_socket, &msgh, 0);
3762e821 831 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
79332bff 832 t_packetCache->insertResponsePacket(string((const char*)&*packet.begin(), packet.size()),
3ddb9247
PD
833 g_now.tv_sec,
834 min(minTTL,
79332bff 835 (pw.getHeader()->rcode == RCode::ServFail) ? SyncRes::s_packetcacheservfailttl : SyncRes::s_packetcachettl
3ddb9247 836 )
79332bff 837 );
1051f8a9 838 }
3762e821 839 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
feccc9fc 840 }
9c495589
BH
841 else {
842 char buf[2];
ea634573
BH
843 buf[0]=packet.size()/256;
844 buf[1]=packet.size()%256;
feccc9fc 845
c038218b 846 Utility::iovec iov[2];
feccc9fc 847
ea634573
BH
848 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
849 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
feccc9fc 850
c038218b 851 int ret=Utility::writev(dc->d_socket, iov, 2);
0e9d9ce2 852 bool hadError=true;
feccc9fc 853
3ddb9247 854 if(ret == 0)
18af64a8 855 L<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
3ddb9247 856 else if(ret < 0 )
18af64a8 857 L<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
ea634573 858 else if((unsigned int)ret != 2 + packet.size())
18af64a8 859 L<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<ret<<")"<<endl;
0e9d9ce2 860 else
18af64a8 861 hadError=false;
3ddb9247 862
09e6702a 863 // update tcp connection status, either by closing or moving to 'BYTE0'
3ddb9247 864
09e6702a 865 if(hadError) {
18af64a8 866 // no need to remove us from FDM, we weren't there
c36bc97a 867 dc->d_socket = -1;
09e6702a 868 }
a6ae6414 869 else {
cd989c87 870 dc->d_tcpConnection->state=TCPConnection::BYTE0;
18af64a8 871 Utility::gettimeofday(&g_now, 0); // needs to be updated
cd989c87
BH
872 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection);
873 t_fdm->setReadTTD(dc->d_socket, g_now, g_tcpTimeout);
0e9d9ce2 874 }
9c495589 875 }
3ddb9247 876
1d5b3ce6 877 if(!g_quiet) {
461df9d2 878 L<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
ea634573 879 L<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
9de3e034 880 sr.d_totUsec/1000.0<<" ms, "<<
881 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<<res<<endl;
c75a6a9e 882 }
b23b8614 883
3ddb9247 884 sr.d_outqueries ? t_RC->cacheMisses++ : t_RC->cacheHits++;
fe213470
BH
885 float spent=makeFloat(sr.d_now-dc->d_now);
886 if(spent < 0.001)
887 g_stats.answers0_1++;
888 else if(spent < 0.010)
889 g_stats.answers1_10++;
890 else if(spent < 0.1)
891 g_stats.answers10_100++;
892 else if(spent < 1.0)
893 g_stats.answers100_1000++;
894 else
895 g_stats.answersSlow++;
896
574af7ea 897 uint64_t newLat=(uint64_t)(spent*1000000);
08f3f638 898 newLat = min(newLat,(uint64_t)(g_networkTimeoutMsec*1000)); // outliers of several minutes exist..
899 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
0a6b1027 900 // no worries, we do this for packet cache hits elsewhere
c6d04bdc 901 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
ea634573 902 delete dc;
c36bc97a 903 dc=0;
288f4aa9 904 }
3f81d239 905 catch(PDNSException &ae) {
a903b39c 906 L<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
c36bc97a 907 delete dc;
288f4aa9 908 }
7b1469bb 909 catch(MOADNSException& e) {
a903b39c 910 L<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<e.what()<<endl;
c36bc97a 911 delete dc;
7b1469bb 912 }
fdbf35ac 913 catch(std::exception& e) {
a903b39c 914 L<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what()<<endl;
c36bc97a 915 delete dc;
c154c8a4 916 }
288f4aa9 917 catch(...) {
a903b39c 918 L<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
288f4aa9 919 }
3ddb9247 920
ec6eacbc 921 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
288f4aa9
BH
922}
923
677e2a46 924void makeControlChannelSocket(int processNum=-1)
1d5b3ce6 925{
2d733c0f 926 string sockname=::arg()["socket-dir"]+"/"+s_programname;
677e2a46
BH
927 if(processNum >= 0)
928 sockname += "."+lexical_cast<string>(processNum);
929 sockname+=".controlsocket";
41f7a068 930 s_rcc.listen(sockname);
3ddb9247 931
387de317
BH
932 int sockowner = -1;
933 int sockgroup = -1;
934
935 if (!::arg().isEmpty("socket-group"))
936 sockgroup=::arg().asGid("socket-group");
937 if (!::arg().isEmpty("socket-owner"))
938 sockowner=::arg().asUid("socket-owner");
3ddb9247 939
f838ad8d
BH
940 if (sockgroup > -1 || sockowner > -1) {
941 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
942 unixDie("Failed to chown control socket");
943 }
944 }
387de317
BH
945
946 // do mode change if socket-mode is given
947 if(!::arg().isEmpty("socket-mode")) {
948 mode_t sockmode=::arg().asMode("socket-mode");
949 chmod(sockname.c_str(), sockmode);
950 }
1d5b3ce6
BH
951}
952
d8f6d49f 953void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 954{
cd989c87 955 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
c038218b 956
879b3f70 957 if(conn->state==TCPConnection::BYTE0) {
cd989c87 958 int bytes=recv(conn->getFD(), conn->data, 2, 0);
09e6702a 959 if(bytes==1)
667f7e60 960 conn->state=TCPConnection::BYTE1;
3ddb9247 961 if(bytes==2) {
a0aa4f64 962 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
667f7e60
BH
963 conn->bytesread=0;
964 conn->state=TCPConnection::GETQUESTION;
09e6702a
BH
965 }
966 if(!bytes || bytes < 0) {
bb4bdbaf 967 t_fdm->removeReadFD(fd);
09e6702a
BH
968 return;
969 }
970 }
667f7e60 971 else if(conn->state==TCPConnection::BYTE1) {
cd989c87 972 int bytes=recv(conn->getFD(), conn->data+1, 1, 0);
09e6702a 973 if(bytes==1) {
667f7e60 974 conn->state=TCPConnection::GETQUESTION;
a0aa4f64 975 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
667f7e60 976 conn->bytesread=0;
09e6702a
BH
977 }
978 if(!bytes || bytes < 0) {
979 if(g_logCommonErrors)
cd989c87 980 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected after first byte"<<endl;
bb4bdbaf 981 t_fdm->removeReadFD(fd);
09e6702a
BH
982 return;
983 }
984 }
667f7e60 985 else if(conn->state==TCPConnection::GETQUESTION) {
cd989c87 986 int bytes=recv(conn->getFD(), conn->data + conn->bytesread, conn->qlen - conn->bytesread, 0);
09e6702a 987 if(!bytes || bytes < 0) {
cd989c87 988 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected while reading question body"<<endl;
bb4bdbaf 989 t_fdm->removeReadFD(fd);
09e6702a
BH
990 return;
991 }
667f7e60
BH
992 conn->bytesread+=bytes;
993 if(conn->bytesread==conn->qlen) {
bb4bdbaf 994 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
879b3f70 995
09e6702a
BH
996 DNSComboWriter* dc=0;
997 try {
cd989c87 998 dc=new DNSComboWriter(conn->data, conn->qlen, g_now);
09e6702a
BH
999 }
1000 catch(MOADNSException &mde) {
3ddb9247 1001 g_stats.clientParseError++;
4957a608 1002 if(g_logCommonErrors)
cd989c87 1003 L<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toString() <<endl;
4957a608 1004 return;
09e6702a 1005 }
cd989c87
BH
1006 dc->d_tcpConnection = conn; // carry the torch
1007 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
09e6702a 1008 dc->d_tcp=true;
cd989c87 1009 dc->setRemote(&conn->d_remote);
879b3f70 1010 if(dc->d_mdp.d_header.qr) {
4957a608 1011 delete dc;
4328f463 1012 L<<Logger::Error<<"Ignoring answer from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
4957a608 1013 return;
879b3f70 1014 }
3abcdab2
PD
1015 if(dc->d_mdp.d_header.opcode) {
1016 delete dc;
4328f463 1017 L<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
3abcdab2
PD
1018 return;
1019 }
09e6702a 1020 else {
4957a608
BH
1021 ++g_stats.qcounter;
1022 ++g_stats.tcpqcounter;
50a5ef72 1023 MT->makeThread(startDoResolve, dc); // deletes dc, will set state to BYTE0 again
4957a608 1024 return;
09e6702a
BH
1025 }
1026 }
1027 }
1028}
1029
6dcd28c3 1030//! Handle new incoming TCP connection
d8f6d49f 1031void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
09e6702a 1032{
37d3f960 1033 ComboAddress addr;
09e6702a 1034 socklen_t addrlen=sizeof(addr);
705f31ae 1035 int newsock=(int)accept(fd, (struct sockaddr*)&addr, &addrlen);
09e6702a 1036 if(newsock>0) {
85c32340
BH
1037 if(MT->numProcesses() > g_maxMThreads) {
1038 g_stats.overCapacityDrops++;
3897b9e1 1039 closesocket(newsock);
85c32340
BH
1040 return;
1041 }
1042
92011b8f 1043 if(t_remotes)
1044 t_remotes->push_back(addr);
49a699c4 1045 if(t_allowFrom && !t_allowFrom->match(&addr)) {
3ddb9247 1046 if(!g_quiet)
4957a608 1047 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2914b022 1048
09e6702a 1049 g_stats.unauthorizedTCP++;
3897b9e1 1050 closesocket(newsock);
09e6702a
BH
1051 return;
1052 }
bd0289fc 1053 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
09e6702a 1054 g_stats.tcpClientOverflow++;
3897b9e1 1055 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
09e6702a
BH
1056 return;
1057 }
3ddb9247 1058
3897b9e1 1059 setNonBlocking(newsock);
cd989c87
BH
1060 shared_ptr<TCPConnection> tc(new TCPConnection(newsock, addr));
1061 tc->state=TCPConnection::BYTE0;
3ddb9247 1062
cd989c87 1063 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc);
c038218b 1064
0bff046b 1065 struct timeval now;
c038218b 1066 Utility::gettimeofday(&now, 0);
cd989c87 1067 t_fdm->setReadTTD(tc->getFD(), now, g_tcpTimeout);
09e6702a
BH
1068 }
1069}
3ddb9247 1070
b71b60ee 1071string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
1bc3c142 1072{
183eb877 1073 gettimeofday(&g_now, 0);
b71b60ee 1074 struct timeval diff = g_now - tv;
1075 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
183eb877 1076
22cf1fda 1077 if(tv.tv_sec && delta > 1000.0) {
b71b60ee 1078 g_stats.tooOldDrops++;
1079 return 0;
1080 }
1081
1bc3c142 1082 ++g_stats.qcounter;
d7f10541
BH
1083 if(fromaddr.sin4.sin_family==AF_INET6)
1084 g_stats.ipv6qcounter++;
1bc3c142
BH
1085
1086 string response;
1087 try {
1088 uint32_t age;
1089 if(!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(question, g_now.tv_sec, &response, &age)) {
1090 if(!g_quiet)
d738f00f 1091 L<<Logger::Notice<<t_id<< " question answered from packet cache from "<<fromaddr.toString()<<endl;
92011b8f 1092 // t_queryring->push_back("packetcached");
3ddb9247 1093
1bc3c142
BH
1094 g_stats.packetCacheHits++;
1095 SyncRes::s_queries++;
1096 ageDNSPacket(response, age);
b71b60ee 1097 struct msghdr msgh;
1098 struct iovec iov;
1099 char cbuf[256];
1100 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
1101 if(destaddr.sin4.sin_family) {
b71b60ee 1102 addCMsgSrcAddr(&msgh, cbuf, &destaddr);
1103 }
579cae19
PD
1104 else {
1105 msgh.msg_control=NULL;
1106 }
b71b60ee 1107 sendmsg(fd, &msgh, 0);
1108
97bee66d
BH
1109 if(response.length() >= sizeof(struct dnsheader)) {
1110 struct dnsheader dh;
1111 memcpy(&dh, response.c_str(), sizeof(dh));
92011b8f 1112 updateResponseStats(dh.rcode, fromaddr, response.length(), 0, 0);
97bee66d 1113 }
08f3f638 1114 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
1bc3c142
BH
1115 return 0;
1116 }
3ddb9247 1117 }
1bc3c142
BH
1118 catch(std::exception& e) {
1119 L<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1120 return 0;
1121 }
3ddb9247 1122
4ea94941 1123 if(t_pdl->get()) {
1124 if((*t_pdl)->ipfilter(fromaddr, destaddr)) {
1125 if(!g_quiet)
1126 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<" based on policy"<<endl;
1127 g_stats.policyDrops++;
1128 return 0;
1129 }
1130 }
1131
1bc3c142 1132 if(MT->numProcesses() > g_maxMThreads) {
461df9d2 1133 if(!g_quiet)
854d44e3 1134 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<", over capacity"<<endl;
461df9d2 1135
1bc3c142
BH
1136 g_stats.overCapacityDrops++;
1137 return 0;
1138 }
3ddb9247 1139
1bc3c142
BH
1140 DNSComboWriter* dc = new DNSComboWriter(question.c_str(), question.size(), g_now);
1141 dc->setSocket(fd);
1142 dc->setRemote(&fromaddr);
b71b60ee 1143 dc->setLocal(destaddr);
1bc3c142
BH
1144
1145 dc->d_tcp=false;
1146 MT->makeThread(startDoResolve, (void*) dc); // deletes dc
1147 return 0;
3ddb9247
PD
1148}
1149
b71b60ee 1150
d8f6d49f 1151void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
5db529f8 1152{
a9af3782 1153 int len;
5db529f8
BH
1154 char data[1500];
1155 ComboAddress fromaddr;
b71b60ee 1156 struct msghdr msgh;
1157 struct iovec iov;
1158 char cbuf[256];
1159
1160 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
1161 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), data, sizeof(data), &fromaddr);
1162
3ddb9247 1163 for(;;)
b71b60ee 1164 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
92011b8f 1165 if(t_remotes)
1166 t_remotes->push_back(fromaddr);
b23b8614 1167
49a699c4 1168 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
3ddb9247 1169 if(!g_quiet)
4957a608 1170 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2914b022 1171
5db529f8 1172 g_stats.unauthorizedUDP++;
a9af3782 1173 return;
5db529f8 1174 }
15c01deb 1175 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
81859ba5 1176 if(!fromaddr.sin4.sin_port) { // also works for IPv6
3ddb9247 1177 if(!g_quiet)
81859ba5 1178 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
1179
1180 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
1181 return;
1182 }
5db529f8 1183 try {
b23b8614 1184 dnsheader* dh=(dnsheader*)data;
3ddb9247 1185
b23b8614 1186 if(dh->qr) {
4957a608
BH
1187 if(g_logCommonErrors)
1188 L<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
5db529f8 1189 }
3abcdab2
PD
1190 else if(dh->opcode) {
1191 if(g_logCommonErrors)
1192 L<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
1193 }
5db529f8 1194 else {
232f0877 1195 string question(data, len);
b71b60ee 1196 struct timeval tv={0,0};
1197 HarvestTimestamp(&msgh, &tv);
1198 ComboAddress dest;
1199 memset(&dest, 0, sizeof(dest)); // this makes sure we igore this address if not returned by recvmsg above
1200 HarvestDestinationAddress(&msgh, &dest);
232f0877 1201 if(g_weDistributeQueries)
b71b60ee 1202 distributeAsyncFunction(question, boost::bind(doProcessUDPQuestion, question, fromaddr, dest, tv, fd));
232f0877 1203 else
b71b60ee 1204 doProcessUDPQuestion(question, fromaddr, dest, tv, fd);
5db529f8
BH
1205 }
1206 }
1207 catch(MOADNSException& mde) {
3ddb9247 1208 g_stats.clientParseError++;
84e66a59 1209 if(g_logCommonErrors)
4957a608 1210 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
5db529f8 1211 }
0b602819
KM
1212 catch(std::runtime_error& e) {
1213 g_stats.clientParseError++;
1214 if(g_logCommonErrors)
1215 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
5db529f8
BH
1216 }
1217 }
ac0e821b
BH
1218 else {
1219 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
3ddb9247 1220 if(errno == EAGAIN)
9326cae1 1221 g_stats.noPacketError++;
bf3b0cec 1222 break;
ac0e821b 1223 }
5db529f8
BH
1224}
1225
1bc3c142 1226
5db529f8
BH
1227typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
1228deferredAdd_t deferredAdd;
1229
f28307ad 1230void makeTCPServerSockets()
9c495589 1231{
37d3f960 1232 int fd;
f28307ad 1233 vector<string>locals;
2e3d8a19 1234 stringtok(locals,::arg()["local-address"]," ,");
9c495589 1235
f28307ad 1236 if(locals.empty())
3f81d239 1237 throw PDNSException("No local address specified");
3ddb9247 1238
f28307ad 1239 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
1240 ServiceTuple st;
1241 st.port=::arg().asNum("local-port");
1242 parseService(*i, st);
3ddb9247 1243
32252594
BH
1244 ComboAddress sin;
1245
f28307ad 1246 memset((char *)&sin,0, sizeof(sin));
37d3f960 1247 sin.sin4.sin_family = AF_INET;
32252594 1248 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 1249 sin.sin6.sin6_family = AF_INET6;
f71bc087 1250 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 1251 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
37d3f960
BH
1252 }
1253
1254 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
3ddb9247 1255 if(fd<0)
3f81d239 1256 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
f28307ad 1257
3897b9e1 1258 setCloseOnExec(fd);
a903b39c 1259
f28307ad 1260 int tmp=1;
37d3f960 1261 if(setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&tmp,sizeof tmp)<0) {
f28307ad 1262 L<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
c8ddb7c2 1263 exit(1);
f28307ad 1264 }
0dfa94ab 1265 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
1266 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
1267 }
1268
c8ddb7c2 1269#ifdef TCP_DEFER_ACCEPT
37d3f960
BH
1270 if(setsockopt(fd, SOL_TCP,TCP_DEFER_ACCEPT,(char*)&tmp,sizeof tmp) >= 0) {
1271 if(i==locals.begin())
4957a608 1272 L<<Logger::Error<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
c8ddb7c2
BH
1273 }
1274#endif
1275
fec7dd5a
SS
1276 if( ::arg().mustDo("non-local-bind") )
1277 Utility::setBindAny(AF_INET, fd);
1278
32252594 1279 sin.sin4.sin_port = htons(st.port);
37d3f960 1280 int socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
3ddb9247 1281 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
3f81d239 1282 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
3ddb9247 1283
3897b9e1 1284 setNonBlocking(fd);
49a699c4 1285 setSocketSendBuffer(fd, 65000);
37d3f960 1286 listen(fd, 128);
5db529f8 1287 deferredAdd.push_back(make_pair(fd, handleNewTCPQuestion));
c2136bf0 1288 g_tcpListenSockets.push_back(fd);
84433b79 1289 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
1290 // - fd is not that which we know here, but returned from accept()
3ddb9247 1291 if(sin.sin4.sin_family == AF_INET)
32252594 1292 L<<Logger::Error<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 1293 else
32252594 1294 L<<Logger::Error<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 1295 }
9c495589
BH
1296}
1297
f28307ad 1298void makeUDPServerSockets()
288f4aa9 1299{
fec7dd5a 1300 int one=1;
f28307ad 1301 vector<string>locals;
2e3d8a19 1302 stringtok(locals,::arg()["local-address"]," ,");
288f4aa9 1303
f28307ad 1304 if(locals.empty())
3f81d239 1305 throw PDNSException("No local address specified");
3ddb9247 1306
f28307ad 1307 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
1308 ServiceTuple st;
1309 st.port=::arg().asNum("local-port");
1310 parseService(*i, st);
1311
37d3f960 1312 ComboAddress sin;
996c89cc 1313
37d3f960
BH
1314 memset(&sin, 0, sizeof(sin));
1315 sin.sin4.sin_family = AF_INET;
32252594 1316 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 1317 sin.sin6.sin6_family = AF_INET6;
f71bc087 1318 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 1319 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
37d3f960 1320 }
3ddb9247 1321
bb4bdbaf 1322 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
d3b4137e 1323 if(fd < 0) {
3f81d239 1324 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
d3b4137e 1325 }
915b0c39
AT
1326 if (!setSocketTimestamps(fd))
1327 L<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
0dfa94ab 1328
b71b60ee 1329 if(IsAnyAddress(sin)) {
1330 setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one)); // linux supports this, so why not - might fail on other systems
757d3179 1331#ifdef IPV6_RECVPKTINFO
3ddb9247 1332 setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one));
757d3179 1333#endif
0dfa94ab 1334 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
1335 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
1336 }
b71b60ee 1337 }
37d3f960 1338
fec7dd5a
SS
1339 if( ::arg().mustDo("non-local-bind") )
1340 Utility::setBindAny(AF_INET6, fd);
1341
3897b9e1 1342 setCloseOnExec(fd);
a903b39c 1343
4e9a20e6 1344 setSocketReceiveBuffer(fd, 250000);
32252594 1345 sin.sin4.sin_port = htons(st.port);
37d3f960
BH
1346
1347 int socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
3ddb9247 1348 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
3f81d239 1349 throw PDNSException("Resolver binding to server socket on port "+ lexical_cast<string>(st.port) +" for "+ st.host+": "+stringerror());
3ddb9247 1350
3897b9e1 1351 setNonBlocking(fd);
c2136bf0 1352
0aaecd50 1353 deferredAdd.push_back(make_pair(fd, handleNewUDPQuestion));
40a3dd64 1354 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
3ddb9247 1355 if(sin.sin4.sin_family == AF_INET)
32252594 1356 L<<Logger::Error<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 1357 else
32252594 1358 L<<Logger::Error<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 1359 }
c836dc19 1360}
caa6eefa 1361
9c495589 1362
c836dc19
BH
1363void daemonize(void)
1364{
1365 if(fork())
1366 exit(0); // bye bye
3ddb9247
PD
1367
1368 setsid();
c836dc19 1369
27a5ead5 1370 int i=open("/dev/null",O_RDWR); /* open stdin */
3ddb9247 1371 if(i < 0)
27a5ead5
BH
1372 L<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
1373 else {
1374 dup2(i,0); /* stdin */
1375 dup2(i,1); /* stderr */
1376 dup2(i,2); /* stderr */
1377 close(i);
1378 }
288f4aa9 1379}
caa6eefa 1380
cc59bce6 1381AtomicCounter counter;
c75a6a9e
BH
1382bool statsWanted;
1383
1384void usr1Handler(int)
1385{
1386 statsWanted=true;
1387}
ae1b2e98 1388
9170fbaf
BH
1389void usr2Handler(int)
1390{
f1f34cc2 1391 g_quiet= !g_quiet;
1392 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
1393 ::arg().set("quiet")=g_quiet ? "" : "no";
9170fbaf
BH
1394}
1395
c75a6a9e
BH
1396void doStats(void)
1397{
16beeaa4
BH
1398 static time_t lastOutputTime;
1399 static uint64_t lastQueryCount;
d299d4f5 1400
1401 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
1402 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
3ddb9247 1403
d299d4f5 1404 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
3427fa8a
BH
1405 L<<Logger::Warning<<"stats: "<<g_stats.qcounter<<" questions, "<<
1406 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
1407 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
3ddb9247
PD
1408 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
1409
3427fa8a
BH
1410 L<<Logger::Warning<<"stats: throttle map: "
1411 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
3ddb9247 1412 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
70c2c8b1
BH
1413 L<<Logger::Warning<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
1414 L<<Logger::Warning<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
525b8a7c 1415 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
3427fa8a
BH
1416 L<<Logger::Warning<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
1417 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
81883dcc 1418
16beeaa4
BH
1419 //L<<Logger::Warning<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
1420 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
3ddb9247 1421
16beeaa4
BH
1422 L<<Logger::Warning<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
1423 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
3ddb9247 1424
16beeaa4
BH
1425 time_t now = time(0);
1426 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
1427 L<<Logger::Warning<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
1428 }
1429 lastOutputTime = now;
1430 lastQueryCount = SyncRes::s_queries;
c75a6a9e 1431 }
3ddb9247 1432 else if(statsWanted)
70c2c8b1 1433 L<<Logger::Warning<<"stats: no stats yet!"<<endl;
7becf07f 1434
c75a6a9e
BH
1435 statsWanted=false;
1436}
c836dc19 1437
29f0b1ce 1438static void houseKeeping(void *)
c836dc19 1439{
d67620e4 1440 static __thread time_t last_stat, last_rootupdate, last_prune, last_secpoll;
8baca3fa 1441 static __thread int cleanCounter=0;
cc59bce6 1442 static __thread bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
1443 try {
1444 if(s_running)
1445 return;
1446 s_running=true;
3ddb9247 1447
cc59bce6 1448 struct timeval now;
1449 Utility::gettimeofday(&now, 0);
3ddb9247
PD
1450
1451 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
cc59bce6 1452 DTime dt;
1453 dt.setTimeval(now);
1454 t_RC->doPrune(); // this function is local to a thread, so fine anyhow
f8f243b0 1455 t_packetCache->doPruneTo(::arg().asNum("max-packetcache-entries") / g_numWorkerThreads);
3ddb9247 1456
f8f243b0 1457 pruneCollection(t_sstorage->negcache, ::arg().asNum("max-cache-entries") / (g_numWorkerThreads * 10), 200);
3ddb9247 1458
cc59bce6 1459 if(!((cleanCounter++)%40)) { // this is a full scan!
1460 time_t limit=now.tv_sec-300;
1461 for(SyncRes::nsspeeds_t::iterator i = t_sstorage->nsSpeeds.begin() ; i!= t_sstorage->nsSpeeds.end(); )
1462 if(i->second.stale(limit))
1463 t_sstorage->nsSpeeds.erase(i++);
1464 else
1465 ++i;
1466 }
1467 last_prune=time(0);
d67620e4 1468 }
3ddb9247 1469
cc59bce6 1470 if(now.tv_sec - last_rootupdate > 7200) {
1471 SyncRes sr(now);
1472 sr.setDoEDNS0(true);
e325f20c 1473 vector<DNSRecord> ret;
3ddb9247 1474
cc59bce6 1475 sr.setNoCache();
1476 int res=-1;
18b73338 1477 try {
6ed9a611 1478 res=sr.beginResolve(DNSName("."), QType(QType::NS), 1, ret);
cc59bce6 1479 }
3aa91c3e 1480 catch(PDNSException& e)
1481 {
1482 L<<Logger::Error<<"Failed to update . records, got an exception: "<<e.reason<<endl;
1483 }
1484
1485 catch(std::exception& e)
1486 {
1487 L<<Logger::Error<<"Failed to update . records, got an exception: "<<e.what()<<endl;
1488 }
1489
cc59bce6 1490 catch(...)
1491 {
1492 L<<Logger::Error<<"Failed to update . records, got an exception"<<endl;
1493 }
1494 if(!res) {
1495 L<<Logger::Notice<<"Refreshed . records"<<endl;
1496 last_rootupdate=now.tv_sec;
1497 }
1498 else
1499 L<<Logger::Error<<"Failed to update . records, RCODE="<<res<<endl;
1500 }
3ddb9247 1501
cc59bce6 1502 if(!t_id) {
3ddb9247 1503 if(now.tv_sec - last_stat >= 1800) {
cc59bce6 1504 doStats();
1505 last_stat=time(0);
1506 }
3ddb9247 1507
cc59bce6 1508 if(now.tv_sec - last_secpoll >= 3600) {
1509 try {
1510 doSecPoll(&last_secpoll);
1511 }
1512 catch(...) {}
18b73338 1513 }
d67620e4 1514 }
cc59bce6 1515 s_running=false;
d67620e4 1516 }
cc59bce6 1517 catch(PDNSException& ae)
1518 {
1519 s_running=false;
1520 L<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
1521 throw;
1522 }
779828c4 1523}
d6d5dea7 1524
49a699c4
BH
1525void makeThreadPipes()
1526{
c3828c03 1527 for(unsigned int n=0; n < g_numThreads; ++n) {
49a699c4
BH
1528 struct ThreadPipeSet tps;
1529 int fd[2];
1530 if(pipe(fd) < 0)
1531 unixDie("Creating pipe for inter-thread communications");
3ddb9247 1532
49a699c4
BH
1533 tps.readToThread = fd[0];
1534 tps.writeToThread = fd[1];
3ddb9247 1535
49a699c4
BH
1536 if(pipe(fd) < 0)
1537 unixDie("Creating pipe for inter-thread communications");
1538 tps.readFromThread = fd[0];
1539 tps.writeFromThread = fd[1];
3ddb9247 1540
49a699c4
BH
1541 g_pipes.push_back(tps);
1542 }
1543}
1544
00c9b8c1
BH
1545struct ThreadMSG
1546{
1547 pipefunc_t func;
1548 bool wantAnswer;
1549};
1550
49a699c4
BH
1551void broadcastFunction(const pipefunc_t& func, bool skipSelf)
1552{
49a699c4 1553 unsigned int n = 0;
1dc8f4d0 1554 for(ThreadPipeSet& tps : g_pipes)
49a699c4
BH
1555 {
1556 if(n++ == t_id) {
1557 if(!skipSelf)
1558 func(); // don't write to ourselves!
1559 continue;
1560 }
3ddb9247 1561
00c9b8c1
BH
1562 ThreadMSG* tmsg = new ThreadMSG();
1563 tmsg->func = func;
1564 tmsg->wantAnswer = true;
1565 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg))
49a699c4 1566 unixDie("write to thread pipe returned wrong size or error");
3ddb9247 1567
49a699c4
BH
1568 string* resp;
1569 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
1570 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 1571
49a699c4
BH
1572 if(resp) {
1573// cerr <<"got response: " << *resp << endl;
1574 delete resp;
1575 }
1576 }
1577}
06ea9015 1578
2fafb640 1579static uint32_t g_disthashseed;
8171ab83 1580void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
00c9b8c1 1581{
8171ab83 1582 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
06ea9015 1583 unsigned int target = 1 + (hash % (g_pipes.size()-1));
1584
00c9b8c1
BH
1585 if(target == t_id) {
1586 func();
1587 return;
1588 }
3ddb9247 1589 ThreadPipeSet& tps = g_pipes[target];
00c9b8c1
BH
1590 ThreadMSG* tmsg = new ThreadMSG();
1591 tmsg->func = func;
1592 tmsg->wantAnswer = false;
3ddb9247 1593
00c9b8c1 1594 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg))
3ddb9247 1595 unixDie("write to thread pipe returned wrong size or error");
00c9b8c1 1596}
3427fa8a 1597
49a699c4
BH
1598void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
1599{
00c9b8c1 1600 ThreadMSG* tmsg;
3ddb9247
PD
1601
1602 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread
49a699c4
BH
1603 unixDie("read from thread pipe returned wrong size or error");
1604 }
3ddb9247 1605
2f22827a 1606 void *resp=0;
1607 try {
1608 resp = tmsg->func();
1609 }
1610 catch(std::exception& e) {
1611 L<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
1612 }
1613 catch(PDNSException& e) {
1614 L<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
1615 }
00c9b8c1
BH
1616 if(tmsg->wantAnswer)
1617 if(write(g_pipes[t_id].writeFromThread, &resp, sizeof(resp)) != sizeof(resp))
1618 unixDie("write to thread pipe returned wrong size or error");
3ddb9247 1619
00c9b8c1 1620 delete tmsg;
49a699c4 1621}
09e6702a 1622
13034931
BH
1623template<class T> void *voider(const boost::function<T*()>& func)
1624{
1625 return func();
1626}
1627
b3b5459d
BH
1628vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
1629{
1630 a.insert(a.end(), b.begin(), b.end());
1631 return a;
1632}
1633
92011b8f 1634vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
1635{
1636 a.insert(a.end(), b.begin(), b.end());
1637 return a;
1638}
1639
3ddb9247
PD
1640vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
1641{
1642 a.insert(a.end(), b.begin(), b.end());
1643 return a;
1644}
1645
92011b8f 1646
13034931 1647template<class T> T broadcastAccFunction(const boost::function<T*()>& func, bool skipSelf)
3427fa8a
BH
1648{
1649 unsigned int n = 0;
1650 T ret=T();
1dc8f4d0 1651 for(ThreadPipeSet& tps : g_pipes)
3427fa8a
BH
1652 {
1653 if(n++ == t_id) {
1654 if(!skipSelf) {
1655 T* resp = (T*)func(); // don't write to ourselves!
1656 if(resp) {
1657 //~ cerr <<"got direct: " << *resp << endl;
1658 ret += *resp;
1659 delete resp;
1660 }
1661 }
1662 continue;
1663 }
3ddb9247 1664
00c9b8c1
BH
1665 ThreadMSG* tmsg = new ThreadMSG();
1666 tmsg->func = boost::bind(voider<T>, func);
1667 tmsg->wantAnswer = true;
3ddb9247 1668
00c9b8c1 1669 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg))
3427fa8a 1670 unixDie("write to thread pipe returned wrong size or error");
3ddb9247
PD
1671
1672
3427fa8a
BH
1673 T* resp;
1674 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
1675 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 1676
3427fa8a
BH
1677 if(resp) {
1678 //~ cerr <<"got response: " << *resp << endl;
1679 ret += *resp;
1680 delete resp;
1681 }
1682 }
1683 return ret;
1684}
1685
13034931
BH
1686template string broadcastAccFunction(const boost::function<string*()>& fun, bool skipSelf); // explicit instantiation
1687template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun, bool skipSelf); // explicit instantiation
b3b5459d 1688template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun, bool skipSelf); // explicit instantiation
3ddb9247 1689template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun, bool skipSelf); // explicit instantiation
3427fa8a 1690
d8f6d49f 1691void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
09e6702a
BH
1692{
1693 string remote;
1694 string msg=s_rcc.recv(&remote);
1695 RecursorControlParser rcp;
1696 RecursorControlParser::func_t* command;
3ddb9247 1697
09e6702a 1698 string answer=rcp.getAnswer(msg, &command);
ab5c053d
BH
1699 try {
1700 s_rcc.send(answer, &remote);
1701 command();
1702 }
fdbf35ac 1703 catch(std::exception& e) {
ab5c053d
BH
1704 L<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
1705 }
3f81d239 1706 catch(PDNSException& ae) {
ab5c053d
BH
1707 L<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
1708 }
09e6702a
BH
1709}
1710
d8f6d49f 1711void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1712{
0b18b22e 1713 PacketID* pident=any_cast<PacketID>(&var);
667f7e60 1714 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
09e6702a 1715
667f7e60 1716 shared_array<char> buffer(new char[pident->inNeeded]);
09e6702a 1717
705f31ae 1718 int ret=recv(fd, buffer.get(), pident->inNeeded,0);
09e6702a 1719 if(ret > 0) {
667f7e60
BH
1720 pident->inMSG.append(&buffer[0], &buffer[ret]);
1721 pident->inNeeded-=ret;
825fa717 1722 if(!pident->inNeeded || pident->inIncompleteOkay) {
667f7e60
BH
1723 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
1724 PacketID pid=*pident;
1725 string msg=pident->inMSG;
3ddb9247 1726
bb4bdbaf 1727 t_fdm->removeReadFD(fd);
3ddb9247 1728 MT->sendEvent(pid, &msg);
09e6702a
BH
1729 }
1730 else {
667f7e60 1731 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
09e6702a
BH
1732 }
1733 }
1734 else {
667f7e60 1735 PacketID tmp=*pident;
bb4bdbaf 1736 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
09e6702a
BH
1737 string empty;
1738 MT->sendEvent(tmp, &empty); // this conveys error status
1739 }
1740}
1741
d8f6d49f 1742void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1743{
0b18b22e 1744 PacketID* pid=any_cast<PacketID>(&var);
4ca15bca 1745 int ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
09e6702a 1746 if(ret > 0) {
667f7e60
BH
1747 pid->outPos+=ret;
1748 if(pid->outPos==pid->outMSG.size()) {
1749 PacketID tmp=*pid;
bb4bdbaf 1750 t_fdm->removeWriteFD(fd);
09e6702a
BH
1751 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
1752 }
1753 }
1754 else { // error or EOF
667f7e60 1755 PacketID tmp(*pid);
bb4bdbaf 1756 t_fdm->removeWriteFD(fd);
09e6702a 1757 string sent;
998a4334 1758 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
09e6702a
BH
1759 }
1760}
1761
34801ab1
BH
1762// resend event to everybody chained onto it
1763void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
1764{
1765 if(iter->key.chain.empty())
1766 return;
e27e91a8 1767 // cerr<<"doResends called!\n";
34801ab1
BH
1768 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
1769 resend.fd=-1;
1770 resend.id=*i;
e27e91a8 1771 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
4665c31e 1772
34801ab1
BH
1773 MT->sendEvent(resend, &content);
1774 g_stats.chainResends++;
34801ab1
BH
1775 }
1776}
1777
d8f6d49f 1778void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1779{
600fc20b 1780 PacketID pid=any_cast<PacketID>(var);
998a4334 1781 int len;
09e6702a 1782 char data[1500];
996c89cc 1783 ComboAddress fromaddr;
09e6702a
BH
1784 socklen_t addrlen=sizeof(fromaddr);
1785
998a4334 1786 len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen);
c1da7976 1787
998a4334
BH
1788 if(len < (int)sizeof(dnsheader)) {
1789 if(len < 0)
996c89cc 1790 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
09e6702a 1791 else {
3ddb9247 1792 g_stats.serverParseError++;
09e6702a 1793 if(g_logCommonErrors)
85db02c5 1794 L<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
e44d9fa7 1795 ": packet smaller than DNS header"<<endl;
998a4334 1796 }
34801ab1 1797
49a699c4 1798 t_udpclientsocks->returnSocket(fd);
34801ab1
BH
1799 string empty;
1800
1801 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3ddb9247 1802 if(iter != MT->d_waiters.end())
34801ab1 1803 doResends(iter, pid, empty);
3ddb9247 1804
34801ab1 1805 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
998a4334 1806 return;
3ddb9247 1807 }
998a4334
BH
1808
1809 dnsheader dh;
1810 memcpy(&dh, data, sizeof(dh));
3ddb9247 1811
6da3b3ad
PD
1812 PacketID pident;
1813 pident.remote=fromaddr;
1814 pident.id=dh.id;
1815 pident.fd=fd;
34801ab1 1816
33a928af 1817 if(!dh.qr && g_logCommonErrors) {
854d44e3 1818 L<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
6da3b3ad
PD
1819 }
1820
1821 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
1822 !dh.qr) { // one weird server
1823 pident.domain.clear();
1824 pident.type = 0;
1825 }
1826 else {
1827 try {
8171ab83 1828 pident.domain=DNSName(data, len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
6da3b3ad
PD
1829 }
1830 catch(std::exception& e) {
1831 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
1832 L<<Logger::Warning<<"Error in packet from "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
1833 return;
34801ab1 1834 }
6da3b3ad
PD
1835 }
1836 string packet;
1837 packet.assign(data, len);
34801ab1 1838
6da3b3ad
PD
1839 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
1840 if(iter != MT->d_waiters.end()) {
1841 doResends(iter, pident, packet);
1842 }
c1da7976 1843
6da3b3ad 1844retryWithName:
4957a608 1845
6da3b3ad
PD
1846 if(!MT->sendEvent(pident, &packet)) {
1847 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
1848 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
1849 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
e325f20c 1850 pident.domain == mthread->key.domain) {
6da3b3ad 1851 mthread->key.nearMisses++;
998a4334 1852 }
6da3b3ad
PD
1853
1854 // be a bit paranoid here since we're weakening our matching
3ddb9247 1855 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
6da3b3ad
PD
1856 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
1857 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
1858 pident.domain = mthread->key.domain;
1859 pident.type = mthread->key.type;
1860 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
d4fb76e9 1861 }
09e6702a 1862 }
6da3b3ad
PD
1863 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
1864 if(g_logCommonErrors) {
1865 L<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<<pident.domain<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
d8f6d49f 1866 }
09e6702a 1867 }
6da3b3ad
PD
1868 else if(fd >= 0) {
1869 t_udpclientsocks->returnSocket(fd);
1870 }
09e6702a
BH
1871}
1872
1f4abb20
BH
1873FDMultiplexer* getMultiplexer()
1874{
1875 FDMultiplexer* ret;
1876 for(FDMultiplexer::FDMultiplexermap_t::const_iterator i = FDMultiplexer::getMultiplexerMap().begin();
1877 i != FDMultiplexer::getMultiplexerMap().end(); ++i) {
1878 try {
1879 ret=i->second();
1f4abb20
BH
1880 return ret;
1881 }
98d0ee4a 1882 catch(FDMultiplexerException &fe) {
0a7f24cb 1883 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
98d0ee4a
BH
1884 }
1885 catch(...) {
1886 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
1887 }
1f4abb20
BH
1888 }
1889 L<<Logger::Error<<"No working multiplexer found!"<<endl;
1890 exit(1);
1891}
1892
3ddb9247 1893
0f39c1a3 1894string* doReloadLuaScript()
4485aa35 1895{
674cf0f6 1896 string fname= ::arg()["lua-dns-script"];
4485aa35 1897 try {
674cf0f6
BH
1898 if(fname.empty()) {
1899 t_pdl->reset();
1900 L<<Logger::Error<<t_id<<" Unloaded current lua script"<<endl;
0f39c1a3 1901 return new string("unloaded\n");
4485aa35
BH
1902 }
1903 else {
5704e107 1904 *t_pdl = shared_ptr<RecursorLua>(new RecursorLua(fname));
4485aa35
BH
1905 }
1906 }
fdbf35ac 1907 catch(std::exception& e) {
674cf0f6 1908 L<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
0f39c1a3 1909 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
4485aa35 1910 }
3ddb9247 1911
674cf0f6 1912 L<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
0f39c1a3 1913 return new string("(re)loaded '"+fname+"'\n");
4485aa35
BH
1914}
1915
49a699c4
BH
1916string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
1917{
3ddb9247 1918 if(begin != end)
49a699c4 1919 ::arg().set("lua-dns-script") = *begin;
3ddb9247 1920
0f39c1a3 1921 return broadcastAccFunction<string>(doReloadLuaScript);
3ddb9247 1922}
49a699c4 1923
77499b05
BH
1924string* pleaseUseNewTraceRegex(const std::string& newRegex)
1925try
1926{
1927 if(newRegex.empty()) {
1928 t_traceRegex->reset();
1929 return new string("unset\n");
1930 }
1931 else {
1932 (*t_traceRegex) = shared_ptr<Regex>(new Regex(newRegex));
1933 return new string("ok\n");
1934 }
1935}
3f81d239 1936catch(PDNSException& ae)
77499b05
BH
1937{
1938 return new string(ae.reason+"\n");
1939}
1940
1941string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
1942{
1943 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
1944}
1945
4e9a20e6 1946static void checkLinuxIPv6Limits()
1947{
1948#ifdef __linux__
1949 string line;
1950 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
1951 int lim=atoi(line.c_str());
1952 if(lim < 16384) {
36849ff2 1953 L<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
4e9a20e6 1954 }
1955 }
1956#endif
1957}
36849ff2 1958static void checkOrFixFDS()
4e9a20e6 1959{
f8f243b0 1960 unsigned int availFDs=getFilenumLimit()-10; // some healthy margin, thanks AJ ;-)
1961 if(g_maxMThreads * g_numWorkerThreads > availFDs) {
1962 if(getFilenumLimit(true) >= g_maxMThreads * g_numWorkerThreads) {
1963 setFilenumLimit(g_maxMThreads * g_numWorkerThreads);
1964 L<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<g_maxMThreads * g_numWorkerThreads<<" to match max-mthreads and threads settings"<<endl;
36849ff2 1965 }
1966 else {
f8f243b0 1967 int newval = getFilenumLimit(true) / g_numWorkerThreads;
1968 L<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<availFDs<<" < "<<g_maxMThreads*g_numWorkerThreads<<"), reducing max-mthreads to "<<newval<<endl;
36849ff2 1969 g_maxMThreads = newval;
f8f243b0 1970 setFilenumLimit(g_maxMThreads * g_numWorkerThreads);
36849ff2 1971 }
1972 }
4e9a20e6 1973}
77499b05 1974
bb4bdbaf 1975void* recursorThread(void*);
51e2144e 1976
3427fa8a 1977void* pleaseSupplantACLs(NetmaskGroup *ng)
49a699c4
BH
1978{
1979 t_allowFrom = ng;
3427fa8a 1980 return 0;
49a699c4
BH
1981}
1982
dbd23fc2
BH
1983int g_argc;
1984char** g_argv;
1985
18af64a8 1986void parseACLs()
f7c1d4e3 1987{
18af64a8 1988 static bool l_initialized;
3ddb9247 1989
49a699c4 1990 if(l_initialized) { // only reload configuration file on second call
18af64a8
BH
1991 string configname=::arg()["config-dir"]+"/recursor.conf";
1992 cleanSlashes(configname);
3ddb9247
PD
1993
1994 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
7e818521 1995 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
49a699c4 1996 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
242b90e1 1997 ::arg().preParseFile(configname.c_str(), "include-dir");
829849d6
AT
1998 ::arg().preParse(g_argc, g_argv, "include-dir");
1999
2000 // then process includes
2001 std::vector<std::string> extraConfigs;
242b90e1
AT
2002 ::arg().gatherIncludes(extraConfigs);
2003
1dc8f4d0 2004 for(const std::string& fn : extraConfigs) {
7e818521 2005 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
2006 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
2007 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
2008 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
829849d6 2009 }
ca2c884c
AT
2010
2011 ::arg().preParse(g_argc, g_argv, "allow-from-file");
2012 ::arg().preParse(g_argc, g_argv, "allow-from");
f27e6356 2013 }
49a699c4
BH
2014
2015 NetmaskGroup* oldAllowFrom = t_allowFrom, *allowFrom=new NetmaskGroup;
3ddb9247 2016
2c95fc65
BH
2017 if(!::arg()["allow-from-file"].empty()) {
2018 string line;
2c95fc65
BH
2019 ifstream ifs(::arg()["allow-from-file"].c_str());
2020 if(!ifs) {
3ddb9247 2021 delete allowFrom;
9c61b9d0 2022 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2c95fc65
BH
2023 }
2024
2025 string::size_type pos;
2026 while(getline(ifs,line)) {
2027 pos=line.find('#');
2028 if(pos!=string::npos)
2029 line.resize(pos);
2030 trim(line);
2031 if(line.empty())
2032 continue;
2033
18af64a8 2034 allowFrom->addMask(line);
2c95fc65 2035 }
49a699c4 2036 L<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2c95fc65
BH
2037 }
2038 else if(!::arg()["allow-from"].empty()) {
f7c1d4e3
BH
2039 vector<string> ips;
2040 stringtok(ips, ::arg()["allow-from"], ", ");
3ddb9247 2041
f7c1d4e3
BH
2042 L<<Logger::Warning<<"Only allowing queries from: ";
2043 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
18af64a8 2044 allowFrom->addMask(*i);
f7c1d4e3 2045 if(i!=ips.begin())
674cf0f6 2046 L<<Logger::Warning<<", ";
f7c1d4e3
BH
2047 L<<Logger::Warning<<*i;
2048 }
2049 L<<Logger::Warning<<endl;
2050 }
49a699c4 2051 else {
3ddb9247 2052 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
49a699c4
BH
2053 L<<Logger::Error<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
2054 delete allowFrom;
2055 allowFrom = 0;
2056 }
3ddb9247 2057
49a699c4 2058 g_initialAllowFrom = allowFrom;
d7dae798 2059 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
49a699c4 2060 delete oldAllowFrom;
3ddb9247 2061
49a699c4 2062 l_initialized = true;
18af64a8
BH
2063}
2064
795215f2 2065boost::optional<Netmask> getEDNSSubnetMask(const ComboAddress& local, const DNSName&dn, const ComboAddress& rem)
2066{
2067 if(local.sin4.sin_family != AF_INET || local.sin4.sin_addr.s_addr) { // detect unset 'requestor'
2068 if(g_ednsdomains.check(dn) || g_ednssubnets.match(rem)) {
2069 int bits =local.sin4.sin_family == AF_INET ? 24 : 64;
2070 ComboAddress trunc(local);
2071 trunc.truncate(bits);
2072 return boost::optional<Netmask>(Netmask(trunc, bits));
2073 }
2074 }
2075 return boost::optional<Netmask>();
2076}
2077
2078void parseEDNSSubnetWhitelist(const std::string& wlist)
2079{
2080 vector<string> parts;
2081 stringtok(parts, wlist, ",;");
2082 for(const auto& a : parts) {
2083 try {
2084 Netmask nm(a);
2085 g_ednssubnets.addMask(nm);
2086 }
2087 catch(...) {
2088 g_ednsdomains.add(DNSName(a));
2089 }
2090 }
2091}
2092
756e82cf 2093SuffixMatchNode g_delegationOnly;
2094static void setupDelegationOnly()
2095{
2096 vector<string> parts;
2097 stringtok(parts, ::arg()["delegation-only"], ", \t");
2098 for(const auto& p : parts) {
2099 g_delegationOnly.add(DNSName(p));
2100 }
2101}
795215f2 2102
18af64a8
BH
2103int serviceMain(int argc, char*argv[])
2104{
5124de27 2105 L.setName(s_programname);
18af64a8
BH
2106 L.setLoglevel((Logger::Urgency)(6)); // info and up
2107
2108 if(!::arg()["logging-facility"].empty()) {
f8499e52
BH
2109 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
2110 if(val >= 0)
2111 theL().setFacility(val);
18af64a8
BH
2112 else
2113 L<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
2114 }
2115
ba1a571d 2116 showProductVersion();
18af64a8 2117 seedRandom(::arg()["entropy-source"]);
06ea9015 2118 g_disthashseed=dns_random(0xffffffff);
2119
18af64a8 2120 parseACLs();
92011b8f 2121 sortPublicSuffixList();
2122
eb5bae86
BH
2123 if(!::arg()["dont-query"].empty()) {
2124 g_dontQuery=new NetmaskGroup;
2125 vector<string> ips;
2126 stringtok(ips, ::arg()["dont-query"], ", ");
66e0b6ea
BH
2127 ips.push_back("0.0.0.0");
2128 ips.push_back("::");
c36bc97a 2129
eb5bae86
BH
2130 L<<Logger::Warning<<"Will not send queries to: ";
2131 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
2132 g_dontQuery->addMask(*i);
2133 if(i!=ips.begin())
4957a608 2134 L<<Logger::Warning<<", ";
eb5bae86
BH
2135 L<<Logger::Warning<<*i;
2136 }
2137 L<<Logger::Warning<<endl;
2138 }
2139
f7c1d4e3 2140 g_quiet=::arg().mustDo("quiet");
3ddb9247 2141
1bc3c142
BH
2142 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
2143 if(g_weDistributeQueries) {
2144 L<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
2145 }
3ddb9247 2146
756e82cf 2147 setupDelegationOnly();
2148
2149
77499b05
BH
2150 if(::arg()["trace"]=="fail") {
2151 SyncRes::setDefaultLogMode(SyncRes::Store);
2152 }
2153 else if(::arg().mustDo("trace")) {
2154 SyncRes::setDefaultLogMode(SyncRes::Log);
f7c1d4e3
BH
2155 ::arg().set("quiet")="no";
2156 g_quiet=false;
2157 }
3ddb9247 2158
aadceba8 2159 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
2160
4e9a20e6 2161 checkLinuxIPv6Limits();
5a38281c 2162 try {
3ddb9247 2163 vector<string> addrs;
5a38281c
BH
2164 if(!::arg()["query-local-address6"].empty()) {
2165 SyncRes::s_doIPv6=true;
d4fb76e9 2166 L<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
3ddb9247 2167
5a38281c 2168 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
1dc8f4d0 2169 for(const string& addr : addrs) {
4957a608 2170 g_localQueryAddresses6.push_back(ComboAddress(addr));
5a38281c
BH
2171 }
2172 }
d4fb76e9
BH
2173 else {
2174 L<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
2175 }
5a38281c
BH
2176 addrs.clear();
2177 stringtok(addrs, ::arg()["query-local-address"], ", ;");
1dc8f4d0 2178 for(const string& addr : addrs) {
5a38281c
BH
2179 g_localQueryAddresses4.push_back(ComboAddress(addr));
2180 }
2181 }
2182 catch(std::exception& e) {
2183 L<<Logger::Error<<"Assigning local query addresses: "<<e.what();
2184 exit(99);
f7c1d4e3 2185 }
f555e92e 2186
1051f8a9
BH
2187 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
2188
f7c1d4e3 2189 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
c3e753c7 2190 SyncRes::s_maxcachettl=::arg().asNum("max-cache-ttl");
1051f8a9
BH
2191 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
2192 SyncRes::s_packetcacheservfailttl=::arg().asNum("packetcache-servfail-ttl");
628e2c7b
PA
2193 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
2194 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
f7c1d4e3 2195 SyncRes::s_serverID=::arg()["server-id"];
173d790e 2196 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
9de3e034 2197 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
01402d56 2198 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
f7c1d4e3
BH
2199 if(SyncRes::s_serverID.empty()) {
2200 char tmp[128];
2201 gethostname(tmp, sizeof(tmp)-1);
2202 SyncRes::s_serverID=tmp;
2203 }
3ddb9247 2204
5b0ddd18 2205 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
bb4bdbaf 2206
49a699c4 2207 g_initialDomainMap = parseAuthAndForwards();
3ddb9247 2208
08f3f638 2209 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3ddb9247 2210
f7c1d4e3 2211 g_logCommonErrors=::arg().mustDo("log-common-errors");
e661a20b
PD
2212
2213 g_anyToTcp = ::arg().mustDo("any-to-tcp");
a09a8ce0
PD
2214 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
2215
f7c1d4e3
BH
2216 makeUDPServerSockets();
2217 makeTCPServerSockets();
815099b2 2218
376effcf 2219 parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
2220
677e2a46
BH
2221 int forks;
2222 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
1bc3c142
BH
2223 if(!fork()) // we are child
2224 break;
2225 }
3ddb9247 2226
2d733c0f 2227 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
815099b2 2228 if(!s_pidfname.empty())
3ddb9247
PD
2229 unlink(s_pidfname.c_str()); // remove possible old pid file
2230
644dd1da 2231 loadRPZFiles();
2232
f7c1d4e3
BH
2233 if(::arg().mustDo("daemon")) {
2234 L<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
2235 L.toConsole(Logger::Critical);
f7c1d4e3
BH
2236 daemonize();
2237 }
2238 signal(SIGUSR1,usr1Handler);
2239 signal(SIGUSR2,usr2Handler);
2240 signal(SIGPIPE,SIG_IGN);
2241 writePid();
677e2a46 2242 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
a6414fdc
AT
2243 g_numThreads = ::arg().asNum("threads") + ::arg().mustDo("pdns-distributes-queries");
2244 g_maxMThreads = ::arg().asNum("max-mthreads");
2245 checkOrFixFDS();
3ddb9247 2246
644dd1da 2247
2248
138435cb
BH
2249 int newgid=0;
2250 if(!::arg()["setgid"].empty())
2251 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
2252 int newuid=0;
2253 if(!::arg()["setuid"].empty())
2254 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
2255
f1d6a7ce
KM
2256 Utility::dropGroupPrivs(newuid, newgid);
2257
138435cb
BH
2258 if (!::arg()["chroot"].empty()) {
2259 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
2260 L<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
2261 exit(1);
2262 }
2263 }
2264
f1d6a7ce 2265 Utility::dropUserPrivs(newuid);
f8f243b0 2266 g_numThreads = ::arg().asNum("threads") + ::arg().mustDo("pdns-distributes-queries");
2267 g_numWorkerThreads = ::arg().asNum("threads");
49a699c4 2268 makeThreadPipes();
3ddb9247 2269
5d4dd7fe
BH
2270 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
2271 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
343257a4 2272
c3828c03 2273 if(g_numThreads == 1) {
76698c6e 2274 L<<Logger::Warning<<"Operating unthreaded"<<endl;
76698c6e
BH
2275 recursorThread(0);
2276 }
2277 else {
2278 pthread_t tid;
c3828c03
BH
2279 L<<Logger::Warning<<"Launching "<< g_numThreads <<" threads"<<endl;
2280 for(unsigned int n=0; n < g_numThreads; ++n) {
77499b05 2281 pthread_create(&tid, 0, recursorThread, (void*)(long)n);
76698c6e
BH
2282 }
2283 void* res;
49a699c4 2284
3ddb9247 2285
76698c6e 2286 pthread_join(tid, &res);
bb4bdbaf 2287 }
bb4bdbaf
BH
2288 return 0;
2289}
2290
2291void* recursorThread(void* ptr)
2292try
2293{
2e2cd8ec 2294 t_id=(int) (long) ptr;
49a699c4 2295 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
ac0e821b 2296 t_sstorage->domainmap = g_initialDomainMap;
49a699c4
BH
2297 t_allowFrom = g_initialAllowFrom;
2298 t_udpclientsocks = new UDPClientSocks();
bd0289fc 2299 t_tcpClientCounts = new tcpClientCounts_t();
49a699c4 2300 primeHints();
3ddb9247 2301
49a699c4 2302 t_packetCache = new RecursorPacketCache();
3ddb9247 2303
49a699c4 2304 L<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3ddb9247 2305
5704e107 2306 t_pdl = new shared_ptr<RecursorLua>();
3ddb9247 2307
674cf0f6
BH
2308 try {
2309 if(!::arg()["lua-dns-script"].empty()) {
5704e107 2310 *t_pdl = shared_ptr<RecursorLua>(new RecursorLua(::arg()["lua-dns-script"]));
674cf0f6
BH
2311 L<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
2312 }
674cf0f6
BH
2313 }
2314 catch(std::exception &e) {
2315 L<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
62f0ae62 2316 _exit(99);
674cf0f6 2317 }
3ddb9247 2318
77499b05 2319 t_traceRegex = new shared_ptr<Regex>();
f8f243b0 2320 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
92011b8f 2321 if(ringsize) {
60c8afa8 2322 t_remotes = new addrringbuf_t();
f8f243b0 2323 if(g_weDistributeQueries) // if so, only 1 thread does recvfrom
3ddb9247 2324 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries"));
f8f243b0 2325 else
3ddb9247 2326 t_remotes->set_capacity(ringsize);
60c8afa8 2327 t_servfailremotes = new addrringbuf_t();
3ddb9247 2328 t_servfailremotes->set_capacity(ringsize);
60c8afa8 2329 t_largeanswerremotes = new addrringbuf_t();
3ddb9247 2330 t_largeanswerremotes->set_capacity(ringsize);
92011b8f 2331
c5c066bf 2332 t_queryring = new boost::circular_buffer<pair<DNSName, uint16_t> >();
3ddb9247 2333 t_queryring->set_capacity(ringsize);
c5c066bf 2334 t_servfailqueryring = new boost::circular_buffer<pair<DNSName, uint16_t> >();
3ddb9247 2335 t_servfailqueryring->set_capacity(ringsize);
92011b8f 2336 }
3ddb9247 2337
bb4bdbaf 2338 MT=new MTasker<PacketID,string>(::arg().asNum("stack-size"));
3ddb9247 2339
bb4bdbaf
BH
2340 PacketID pident;
2341
2342 t_fdm=getMultiplexer();
f3d1d67b 2343 if(!t_id) {
30a1aa92 2344 if(::arg().mustDo("experimental-webserver")) {
2345 L<<Logger::Warning << "Enabling web server" << endl;
8989097d 2346 try {
1ce57618 2347 new RecursorWebServer(t_fdm);
8989097d
CH
2348 }
2349 catch(PDNSException &e) {
2350 L<<Logger::Error<<"Exception: "<<e.reason<<endl;
2351 exit(99);
2352 }
f3d1d67b 2353 }
83252304 2354 L<<Logger::Error<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
f3d1d67b 2355 }
83252304 2356
49a699c4 2357 t_fdm->addReadFD(g_pipes[t_id].readToThread, handlePipeRequest);
83252304 2358
1bc3c142 2359 if(!g_weDistributeQueries || !t_id) // if we distribute queries, only t_id = 0 listens
3ddb9247 2360 for(deferredAdd_t::const_iterator i=deferredAdd.begin(); i!=deferredAdd.end(); ++i)
1bc3c142 2361 t_fdm->addReadFD(i->first, i->second);
3ddb9247 2362
674cf0f6 2363 if(!t_id) {
674cf0f6
BH
2364 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
2365 }
1bc3c142 2366
f7c1d4e3 2367 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3ddb9247 2368
f7c1d4e3 2369 bool listenOnTCP(true);
49a699c4 2370
2c78bd57 2371 time_t last_carbon=0;
2372 time_t carbonInterval=::arg().asNum("carbon-interval");
cc59bce6 2373 counter=AtomicCounter(0); // used to periodically execute certain tasks
f7c1d4e3 2374 for(;;) {
ac0e821b 2375 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3ddb9247 2376
3427fa8a
BH
2377 if(!(counter%500)) {
2378 MT->makeThread(houseKeeping, 0);
f7c1d4e3
BH
2379 }
2380
d2392145 2381 if(!(counter%55)) {
d8f6d49f 2382 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
bb4bdbaf 2383 expired_t expired=t_fdm->getTimeouts(g_now);
3ddb9247 2384
f7c1d4e3 2385 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
cd989c87 2386 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4957a608 2387 if(g_logCommonErrors)
cd989c87 2388 L<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toString() <<endl;
4957a608 2389 t_fdm->removeReadFD(i->first);
f7c1d4e3
BH
2390 }
2391 }
3ddb9247 2392
f7c1d4e3
BH
2393 counter++;
2394
3427fa8a 2395 if(!t_id && statsWanted) {
f7c1d4e3
BH
2396 doStats();
2397 }
2398
2399 Utility::gettimeofday(&g_now, 0);
2c78bd57 2400
2401 if(!t_id && (g_now.tv_sec - last_carbon >= carbonInterval)) {
2402 MT->makeThread(doCarbonDump, 0);
2403 last_carbon = g_now.tv_sec;
2404 }
2405
bb4bdbaf 2406 t_fdm->run(&g_now);
3ea54bf0 2407 // 'run' updates g_now for us
f7c1d4e3 2408
b8ef5c5c 2409 if(!g_weDistributeQueries || !t_id) { // if pdns distributes queries, only tid 0 should do this
5c889cf5 2410 if(listenOnTCP) {
2411 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
2412 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
2413 t_fdm->removeReadFD(*i);
2414 listenOnTCP=false;
2415 }
f7c1d4e3 2416 }
5c889cf5 2417 else {
2418 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
2419 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
2420 t_fdm->addReadFD(*i, handleNewTCPQuestion);
2421 listenOnTCP=true;
2422 }
f7c1d4e3
BH
2423 }
2424 }
2425 }
2426}
3f81d239 2427catch(PDNSException &ae) {
bb4bdbaf
BH
2428 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
2429 return 0;
2430}
2431catch(std::exception &e) {
2432 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
2433 return 0;
2434}
2435catch(...) {
2436 L<<Logger::Error<<"any other exception in main: "<<endl;
2437 return 0;
2438}
2439
51e2144e 2440
3ddb9247 2441int main(int argc, char **argv)
288f4aa9 2442{
dbd23fc2
BH
2443 g_argc = argc;
2444 g_argv = argv;
5e3de507 2445 g_stats.startupTime=time(0);
3e135495 2446 versionSetProduct(ProductRecursor);
8a63d3ce 2447 reportBasicTypes();
0007c2e5 2448 reportOtherTypes();
ea634573 2449
22030c37 2450 int ret = EXIT_SUCCESS;
caa6eefa 2451
288f4aa9 2452 try {
f888311c 2453 ::arg().set("stack-size","stack size per mthread")="200000";
2e3d8a19 2454 ::arg().set("soa-minimum-ttl","Don't change")="0";
2e3d8a19 2455 ::arg().set("no-shuffle","Don't change")="off";
2e3d8a19 2456 ::arg().set("local-port","port to listen on")="53";
32252594 2457 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
fec7dd5a 2458 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
77499b05 2459 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
2e3d8a19 2460 ::arg().set("daemon","Operate as a daemon")="yes";
191f2e47 2461 ::arg().setSwitch("write-pid","Write a PID file")="yes";
34162f8f 2462 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="4";
0e9d9ce2 2463 ::arg().set("log-common-errors","If we should log rather common errors")="yes";
2e3d8a19
BH
2464 ::arg().set("chroot","switch to chroot jail")="";
2465 ::arg().set("setgid","If set, change group id to this gid for more security")="";
2466 ::arg().set("setuid","If set, change user id to this uid for more security")="";
5b0ddd18 2467 ::arg().set("network-timeout", "Wait this nummer of milliseconds for network i/o")="1500";
bb4bdbaf 2468 ::arg().set("threads", "Launch this number of threads")="2";
1bc3c142 2469 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1";
5124de27 2470 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
3ddb9247 2471 ::arg().set( "experimental-logfile", "Filename of the log file for JSON parser" )= "/var/log/pdns.log";
88d77d73
CH
2472 ::arg().setSwitch("experimental-webserver", "Start a webserver for monitoring") = "no";
2473 ::arg().set("experimental-webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
2474 ::arg().set("experimental-webserver-port", "Port of webserver to listen on") = "8082";
2475 ::arg().set("experimental-webserver-password", "Password required for accessing the webserver") = "";
69e7f117 2476 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="0.0.0.0/0,::/0";
c348c0c8 2477 ::arg().set("experimental-api-config-dir", "Directory where REST API stores config and zones") = "";
bbef8f04 2478 ::arg().set("experimental-api-key", "REST API Static authentication key (required for API use)") = "";
cc08b5a9 2479 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
2c78bd57 2480 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server")="";
2481 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
b4ae7322 2482 ::arg().set("experimental-api-readonly", "If the JSON API should disallow data modification") = "no";
c038218b 2483 ::arg().set("quiet","Suppress logging of questions and answers")="";
f27e6356 2484 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
2e3d8a19 2485 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
fdbf35ac
BH
2486 ::arg().set("socket-owner","Owner of socket")="";
2487 ::arg().set("socket-group","Group of socket")="";
2488 ::arg().set("socket-mode", "Permissions for socket")="";
3ddb9247 2489
2e3d8a19
BH
2490 ::arg().set("socket-dir","Where the controlsocket will live")=LOCALSTATEDIR;
2491 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
2492 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
d4fb76e9 2493 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
2e3d8a19 2494 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
85c32340 2495 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
2e3d8a19 2496 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
324dc148 2497 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
979edd70 2498 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
2e3d8a19 2499 ::arg().set("hint-file", "If set, load root hints from this file")="";
b45eb27c 2500 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
a9af3782 2501 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
c3e753c7 2502 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
1051f8a9 2503 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
927c12b0 2504 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
1051f8a9 2505 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
7f7b8d55 2506 ::arg().set("server-id", "Returned when queried for 'server.id' TXT or NSID, defaults to hostname")="";
92011b8f 2507 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
ba1a571d 2508 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
49a699c4 2509 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
2c95fc65 2510 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
51e2144e 2511 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3ddb9247 2512 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4e120339 2513 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
0d5f0a9f 2514 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4ef015cd 2515 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
5605c067 2516 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
644dd1da 2517 ::arg().set("rpz-files", "RPZ files to load in order, domain or domain=policy pairs separated by commas")="";
39ec5d29 2518 ::arg().set("rpz-masters", "RPZ master servers, address:name pairs separated by commas")="";
644dd1da 2519
5605c067 2520 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
927c12b0
BH
2521 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
2522 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
5605c067 2523 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
ac0b4eb3 2524 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3ea54bf0 2525 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
9bc8c14c 2526 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="";
4485aa35 2527 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
08f3f638 2528 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3ddb9247
PD
2529// ::arg().setSwitch( "disable-edns-ping", "Disable EDNSPing - EXPERIMENTAL, LEAVE DISABLED" )= "no";
2530 ::arg().setSwitch( "disable-edns", "Disable EDNS - EXPERIMENTAL, LEAVE DISABLED" )= "";
2531 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
376effcf 2532 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="powerdns.com,82.94.213.34,2001:888:2000:1d::2";
966d3ba8 2533 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="";
cd6310a8 2534 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="no";
e661a20b 2535 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
a09a8ce0 2536 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1680";
aadceba8 2537 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
173d790e 2538 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
c5950146 2539 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
a09a8ce0 2540
68e6df3c 2541 ::arg().set("include-dir","Include *.conf files from this directory")="";
d67620e4 2542 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
2e3d8a19
BH
2543
2544 ::arg().setCmd("help","Provide a helpful message");
ba1a571d 2545 ::arg().setCmd("version","Print version string");
d5141417 2546 ::arg().setCmd("config","Output blank configuration");
f27e6356 2547 L.toConsole(Logger::Info);
2e3d8a19 2548 ::arg().laxParse(argc,argv); // do a lax parse
c75a6a9e 2549
2d733c0f
CH
2550 string configname=::arg()["config-dir"]+"/recursor.conf";
2551 if(::arg()["config-name"]!="") {
2552 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
5124de27 2553 s_programname+="-"+::arg()["config-name"];
2d733c0f
CH
2554 }
2555 cleanSlashes(configname);
5124de27 2556
577cf284
BH
2557 if(::arg().mustDo("config")) {
2558 cout<<::arg().configstring()<<endl;
2559 exit(0);
2560 }
2561
3ddb9247 2562 if(!::arg().file(configname.c_str()))
c75a6a9e
BH
2563 L<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
2564
2e3d8a19 2565 ::arg().parse(argc,argv);
c836dc19 2566
2e3d8a19 2567 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
562588a3 2568
61d74169 2569 if(::arg().asNum("threads")==1)
2570 ::arg().set("pdns-distributes-queries")="no";
2571
2e3d8a19 2572 if(::arg().mustDo("help")) {
ff5ba4f9
WA
2573 cout<<"syntax:"<<endl<<endl;
2574 cout<<::arg().helpstring(::arg()["help"])<<endl;
2575 exit(0);
b636533b 2576 }
5e3de507 2577 if(::arg().mustDo("version")) {
ba1a571d 2578 showProductVersion();
3613a51c 2579 showBuildConfiguration();
5e3de507
BH
2580 exit(99);
2581 }
b636533b 2582
34162f8f 2583 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
f48d7b65 2584
34162f8f
CH
2585 if (logUrgency < Logger::Error)
2586 logUrgency = Logger::Error;
f48d7b65 2587 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
2588 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
2589 }
34162f8f
CH
2590 L.setLoglevel(logUrgency);
2591 L.toConsole(logUrgency);
2592
f7c1d4e3 2593 serviceMain(argc, argv);
288f4aa9 2594 }
3f81d239 2595 catch(PDNSException &ae) {
c836dc19 2596 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
22030c37 2597 ret=EXIT_FAILURE;
288f4aa9 2598 }
fdbf35ac 2599 catch(std::exception &e) {
c836dc19 2600 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
22030c37 2601 ret=EXIT_FAILURE;
288f4aa9
BH
2602 }
2603 catch(...) {
c836dc19 2604 L<<Logger::Error<<"any other exception in main: "<<endl;
22030c37 2605 ret=EXIT_FAILURE;
288f4aa9 2606 }
3ddb9247 2607
22030c37 2608 return ret;
288f4aa9 2609}