]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/pdns_recursor.cc
make sure we have enough space to receive those huge dnssec laden packets!
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
CommitLineData
288f4aa9
BH
1/*
2 PowerDNS Versatile Database Driven Nameserver
183eb877 3 Copyright (C) 2003 - 2015 PowerDNS.COM BV
288f4aa9
BH
4
5 This program is free software; you can redistribute it and/or modify
3ddb9247 6 it under the terms of the GNU General Public License version 2
f28307ad 7 as published by the Free Software Foundation
288f4aa9 8
f782fe38
MH
9 Additionally, the license of this program contains a special
10 exception which allows to distribute the program in binary form when
11 it is linked against OpenSSL.
12
288f4aa9
BH
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
06bd9ccf 20 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
288f4aa9 21*/
caa6eefa 22
870a0fe4
AT
23#ifdef HAVE_CONFIG_H
24#include "config.h"
25#endif
76473b92
KM
26#include <netdb.h>
27#include <sys/stat.h>
28#include <unistd.h>
5a38281c 29#include <boost/foreach.hpp>
2470b36e 30#include "ws-recursor.hh"
49a699c4 31#include <pthread.h>
3ea54bf0 32#include "recpacketcache.hh"
3ddb9247 33#include "utility.hh"
51e2144e 34#include "dns_random.hh"
288f4aa9
BH
35#include <iostream>
36#include <errno.h>
81859ba5 37#include <boost/static_assert.hpp>
288f4aa9
BH
38#include <map>
39#include <set>
97bb160b 40#include "recursor_cache.hh"
38c9ceaa 41#include "cachecleaner.hh"
288f4aa9 42#include <stdio.h>
c75a6a9e 43#include <signal.h>
288f4aa9 44#include <stdlib.h>
bb4bdbaf 45#include "misc.hh"
288f4aa9
BH
46#include "mtasker.hh"
47#include <utility>
288f4aa9
BH
48#include "arguments.hh"
49#include "syncres.hh"
88def049
BH
50#include <fcntl.h>
51#include <fstream>
5c633640
BH
52#include "sstuff.hh"
53#include <boost/tuple/tuple.hpp>
54#include <boost/tuple/tuple_comparison.hpp>
72df400f 55#include <boost/shared_array.hpp>
ea634573 56#include <boost/lexical_cast.hpp>
7f1fa77d 57#include <boost/function.hpp>
5605c067 58#include <boost/algorithm/string.hpp>
8f7473d7 59#ifdef MALLOC_TRACE
60#include "malloctrace.hh"
61#endif
40a3dd64 62#include <netinet/tcp.h>
ea634573
BH
63#include "dnsparser.hh"
64#include "dnswriter.hh"
65#include "dnsrecords.hh"
f814d7c8 66#include "zoneparser-tng.hh"
1d5b3ce6 67#include "rec_channel.hh"
aaacf7f2 68#include "logger.hh"
c8ddb7c2 69#include "iputils.hh"
09e6702a 70#include "mplexer.hh"
c038218b 71#include "config.h"
5704e107 72#include "lua-recursor.hh"
ba1a571d 73#include "version.hh"
79332bff 74#include "responsestats.hh"
d67620e4 75#include "secpoll-recursor.hh"
c5c066bf 76#include "dnsname.hh"
644dd1da 77#include "filterpo.hh"
78#include "rpzloader.hh"
a2bfc3ff
BH
79#ifndef RECURSOR
80#include "statbag.hh"
81StatBag S;
82#endif
83
bb4bdbaf 84__thread FDMultiplexer* t_fdm;
674cf0f6 85__thread unsigned int t_id;
09e6702a 86unsigned int g_maxTCPPerClient;
5b0ddd18 87unsigned int g_networkTimeoutMsec;
08f3f638 88uint64_t g_latencyStatSize;
09e6702a 89bool g_logCommonErrors;
e661a20b 90bool g_anyToTcp;
b33c2462 91uint16_t g_udpTruncationThreshold, g_outgoingEDNSBufsize;
5704e107 92__thread shared_ptr<RecursorLua>* t_pdl;
60c8afa8 93
94__thread addrringbuf_t* t_remotes, *t_servfailremotes, *t_largeanswerremotes;
95
c5c066bf 96__thread boost::circular_buffer<pair<DNSName, uint16_t> >* t_queryring, *t_servfailqueryring;
77499b05 97__thread shared_ptr<Regex>* t_traceRegex;
674cf0f6 98
376effcf 99NetmaskGroup g_ednssubnets;
100SuffixMatchNode g_ednsdomains;
101
644dd1da 102DNSFilterEngine g_dfe;
103
d7dae798
BH
104RecursorControlChannel s_rcc; // only active in thread 0
105
106// for communicating with our threads
49a699c4
BH
107struct ThreadPipeSet
108{
109 int writeToThread;
110 int readToThread;
111 int writeFromThread;
112 int readFromThread;
113};
3ea54bf0 114
d7dae798 115vector<ThreadPipeSet> g_pipes; // effectively readonly after startup
5c633640 116
d7dae798 117SyncRes::domainmap_t* g_initialDomainMap; // new threads needs this to be setup
49a699c4
BH
118
119#include "namespaces.hh"
3ea54bf0 120
49a699c4 121__thread MemRecursorCache* t_RC;
16beeaa4 122__thread RecursorPacketCache* t_packetCache;
1d5b3ce6
BH
123RecursorStats g_stats;
124bool g_quiet;
49a699c4 125
1bc3c142
BH
126bool g_weDistributeQueries; // if true, only 1 thread listens on the incoming query sockets
127
41942bb3 128__thread NetmaskGroup* t_allowFrom;
49a699c4
BH
129static NetmaskGroup* g_initialAllowFrom; // new thread needs to be setup with this
130
eb5bae86 131NetmaskGroup* g_dontQuery;
2d733c0f 132string s_programname="pdns_recursor";
49a699c4 133
40a3dd64
BH
134typedef vector<int> tcpListenSockets_t;
135tcpListenSockets_t g_tcpListenSockets; // shared across threads, but this is fine, never written to from a thread. All threads listen on all sockets
3159c9ef 136int g_tcpTimeout;
85c32340 137unsigned int g_maxMThreads;
183eb877 138__thread struct timeval g_now; // timestamp, updated (too) frequently
84433b79 139typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
140listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
18af64a8 141
d7dae798
BH
142__thread MT_t* MT; // the big MTasker
143
f8f243b0 144unsigned int g_numThreads, g_numWorkerThreads;
c3828c03 145
12cd44ee 146#define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
147// Bad Nets taken from both:
3ddb9247 148// http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
12cd44ee 149// and
150// http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
151// where such a network may not be considered a valid destination
152#define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
153#define DONT_QUERY LOCAL_NETS ", " BAD_NETS
49a699c4 154
d7dae798 155//! used to send information to a newborn mthread
ea634573 156struct DNSComboWriter {
3ddb9247 157 DNSComboWriter(const char* data, uint16_t len, const struct timeval& now) : d_mdp(data, len), d_now(now),
232f0877 158 d_tcp(false), d_socket(-1)
ea634573
BH
159 {}
160 MOADNSParser d_mdp;
00c9b8c1 161 void setRemote(const ComboAddress* sa)
ea634573 162 {
37d3f960 163 d_remote=*sa;
ea634573
BH
164 }
165
b71b60ee 166 void setLocal(const ComboAddress& sa)
167 {
168 d_local=sa;
169 }
170
171
ea634573
BH
172 void setSocket(int sock)
173 {
174 d_socket=sock;
175 }
a1754c6a
BH
176
177 string getRemote() const
178 {
37d3f960 179 return d_remote.toString();
a1754c6a
BH
180 }
181
c9e9e5e0 182 struct timeval d_now;
b71b60ee 183 ComboAddress d_remote, d_local;
ea634573
BH
184 bool d_tcp;
185 int d_socket;
cd989c87 186 shared_ptr<TCPConnection> d_tcpConnection;
ea634573
BH
187};
188
189
288f4aa9
BH
190ArgvMap &arg()
191{
192 static ArgvMap theArg;
193 return theArg;
194}
4ef015cd 195
09e6702a 196
d8f6d49f 197void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 198
50c81227 199// -1 is error, 0 is timeout, 1 is success
3ddb9247 200int asendtcp(const string& data, Socket* sock)
5c633640
BH
201{
202 PacketID pident;
203 pident.sock=sock;
204 pident.outMSG=data;
3ddb9247 205
bb4bdbaf 206 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
50c81227 207 string packet;
5c633640 208
5b0ddd18 209 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
23db0a09 210
9170fbaf 211 if(!ret || ret==-1) { // timeout
bb4bdbaf 212 t_fdm->removeWriteFD(sock->getHandle());
5c633640 213 }
50c81227
BH
214 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
215 return -1;
216 }
9170fbaf 217 return ret;
5c633640
BH
218}
219
d8f6d49f 220void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 221
9170fbaf 222// -1 is error, 0 is timeout, 1 is success
825fa717 223int arecvtcp(string& data, int len, Socket* sock, bool incompleteOkay)
288f4aa9 224{
50c81227 225 data.clear();
5c633640
BH
226 PacketID pident;
227 pident.sock=sock;
228 pident.inNeeded=len;
825fa717 229 pident.inIncompleteOkay=incompleteOkay;
bb4bdbaf 230 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
5c633640 231
bb4bdbaf 232 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
9170fbaf 233 if(!ret || ret==-1) { // timeout
bb4bdbaf 234 t_fdm->removeReadFD(sock->getHandle());
288f4aa9 235 }
50c81227
BH
236 else if(data.empty()) {// error, EOF or other
237 return -1;
238 }
239
9170fbaf 240 return ret;
288f4aa9
BH
241}
242
fba1e944 243void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
4465e941 244{
fba1e944 245 PacketID pident=*any_cast<PacketID>(&var);
4465e941 246 char resp[512];
247 int ret=recv(fd, resp, sizeof(resp), 0);
248 t_fdm->removeReadFD(fd);
249 if(ret >= 0) {
250 string data(resp, ret);
fba1e944 251 MT->sendEvent(pident, &data);
4465e941 252 }
253 else {
fba1e944 254 string empty;
255 MT->sendEvent(pident, &empty);
256 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
4465e941 257 }
258}
fba1e944 259string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
4465e941 260{
4465e941 261 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
262 s.setNonBlocking();
263 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
264
265 s.bind(local);
266 s.connect(dest);
4465e941 267 s.send(query);
268
269 PacketID pident;
270 pident.sock=&s;
271 pident.type=0;
fba1e944 272 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
4465e941 273
274 string data;
fba1e944 275
4465e941 276 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
fba1e944 277
4465e941 278 if(!ret || ret==-1) { // timeout
4465e941 279 t_fdm->removeReadFD(s.getHandle());
280 }
281 else if(data.empty()) {// error, EOF or other
fba1e944 282 // we could special case this
4465e941 283 return data;
284 }
4465e941 285 return data;
286}
287
288
3ddb9247 289vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
046c5a5d 290const ComboAddress g_local4("0.0.0.0"), g_local6("::");
1652a63e 291
d7dae798 292//! pick a random query local address
1652a63e 293ComboAddress getQueryLocalAddress(int family, uint16_t port)
5a38281c 294{
1652a63e 295 ComboAddress ret;
5a38281c 296 if(family==AF_INET) {
3ddb9247 297 if(g_localQueryAddresses4.empty())
1652a63e 298 ret = g_local4;
3ddb9247 299 else
1652a63e
BH
300 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
301 ret.sin4.sin_port = htons(port);
5a38281c
BH
302 }
303 else {
304 if(g_localQueryAddresses6.empty())
1652a63e
BH
305 ret = g_local6;
306 else
307 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
3ddb9247 308
1652a63e 309 ret.sin6.sin6_port = htons(port);
5a38281c 310 }
1652a63e 311 return ret;
5a38281c 312}
4ef015cd 313
d8f6d49f 314void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
09e6702a 315
d7dae798
BH
316void setSocketBuffer(int fd, int optname, uint32_t size)
317{
318 uint32_t psize=0;
319 socklen_t len=sizeof(psize);
3ddb9247 320
d7dae798
BH
321 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
322 L<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
3ddb9247 323 return;
d7dae798
BH
324 }
325
326 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
c057bfaa 327 L<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
d7dae798
BH
328}
329
330
331static void setSocketReceiveBuffer(int fd, uint32_t size)
332{
333 setSocketBuffer(fd, SO_RCVBUF, size);
334}
335
336static void setSocketSendBuffer(int fd, uint32_t size)
337{
338 setSocketBuffer(fd, SO_SNDBUF, size);
339}
340
341
4ef015cd
BH
342// you can ask this class for a UDP socket to send a query from
343// this socket is not yours, don't even think about deleting it
344// but after you call 'returnSocket' on it, don't assume anything anymore
345class UDPClientSocks
346{
4ef015cd 347 unsigned int d_numsocks;
4ef015cd 348public:
e2642526 349 UDPClientSocks() : d_numsocks(0)
4ef015cd
BH
350 {
351 }
352
996c89cc 353 typedef set<int> socks_t;
4ef015cd
BH
354 socks_t d_socks;
355
d8f6d49f
BH
356 // returning -1 means: temporary OS error (ie, out of files), -2 means OS error
357 int getSocket(const ComboAddress& toaddr, int* fd)
4ef015cd 358 {
d8f6d49f
BH
359 *fd=makeClientSocket(toaddr.sin4.sin_family);
360 if(*fd < 0) // temporary error - receive exception otherwise
361 return -1;
362
363 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
364 int err = errno;
41ff43f8 365 // returnSocket(*fd);
3897b9e1 366 closesocket(*fd);
d8f6d49f 367 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
4957a608 368 return -2;
998a4334 369 return -1;
d8f6d49f 370 }
998a4334 371
d8f6d49f 372 d_socks.insert(*fd);
998a4334 373 d_numsocks++;
d8f6d49f 374 return 0;
4ef015cd
BH
375 }
376
095c3045
BH
377 void returnSocket(int fd)
378 {
379 socks_t::iterator i=d_socks.find(fd);
34801ab1 380 if(i==d_socks.end()) {
3f81d239 381 throw PDNSException("Trying to return a socket (fd="+lexical_cast<string>(fd)+") not in the pool");
34801ab1 382 }
bb4bdbaf 383 returnSocketLocked(i);
095c3045
BH
384 }
385
4ef015cd 386 // return a socket to the pool, or simply erase it
bb4bdbaf 387 void returnSocketLocked(socks_t::iterator& i)
4ef015cd 388 {
600fc20b 389 if(i==d_socks.end()) {
3f81d239 390 throw PDNSException("Trying to return a socket not in the pool");
600fc20b 391 }
80baf329 392 try {
bb4bdbaf 393 t_fdm->removeReadFD(*i);
80baf329
BH
394 }
395 catch(FDMultiplexerException& e) {
bb4bdbaf 396 // we sometimes return a socket that has not yet been assigned to t_fdm
80baf329 397 }
3897b9e1 398 closesocket(*i);
3ddb9247 399
998a4334
BH
400 d_socks.erase(i++);
401 --d_numsocks;
4ef015cd 402 }
d8f6d49f
BH
403
404 // returns -1 for errors which might go away, throws for ones that won't
bb4bdbaf 405 static int makeClientSocket(int family)
d8f6d49f 406 {
a903b39c 407 int ret=(int)socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
42c235e5 408
d8f6d49f
BH
409 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
410 return ret;
3ddb9247
PD
411
412 if(ret<0)
3f81d239 413 throw PDNSException("Making a socket for resolver (family = "+lexical_cast<string>(family)+"): "+stringerror());
36855b53 414
3897b9e1 415 setCloseOnExec(ret);
5a38281c 416
d8f6d49f 417 int tries=10;
3aa91c3e 418 ComboAddress sin;
d8f6d49f 419 while(--tries) {
1652a63e 420 uint16_t port;
3ddb9247 421
d8f6d49f 422 if(tries==1) // fall back to kernel 'random'
4957a608 423 port = 0;
1652a63e
BH
424 else
425 port = 1025 + dns_random(64510);
5a38281c 426
3aa91c3e 427 sin=getQueryLocalAddress(family, port); // does htons for us
5a38281c 428
3ddb9247 429 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
4957a608 430 break;
d8f6d49f
BH
431 }
432 if(!tries)
3aa91c3e 433 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
3ddb9247 434
3897b9e1 435 setNonBlocking(ret);
d8f6d49f
BH
436 return ret;
437 }
49a699c4
BH
438};
439
440static __thread UDPClientSocks* t_udpclientsocks;
4ef015cd 441
288f4aa9 442/* these two functions are used by LWRes */
34801ab1 443// -2 is OS error, -1 is error that depends on the remote, > 0 is success
3ddb9247
PD
444int asendto(const char *data, int len, int flags,
445 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
288f4aa9 446{
34801ab1
BH
447
448 PacketID pident;
787e5eab
BH
449 pident.domain = domain;
450 pident.remote = toaddr;
451 pident.type = qtype;
34801ab1
BH
452
453 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
454 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
455
456 for(; chain.first != chain.second; chain.first++) {
457 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
e27e91a8 458 /*
4665c31e
BH
459 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
460 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
4957a608 461 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
e27e91a8 462 */
34801ab1
BH
463 chain.first->key.chain.insert(id); // we can chain
464 *fd=-1; // gets used in waitEvent / sendEvent later on
465 return 1;
466 }
467 }
468
49a699c4 469 int ret=t_udpclientsocks->getSocket(toaddr, fd);
d8f6d49f
BH
470 if(ret < 0)
471 return ret;
34801ab1 472
998a4334
BH
473 pident.fd=*fd;
474 pident.id=id;
3ddb9247 475
bb4bdbaf
BH
476 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
477 ret = send(*fd, data, len, 0);
478
5b0ddd18 479 int tmp = errno;
bb4bdbaf 480
7302ed0a 481 if(ret < 0)
49a699c4 482 t_udpclientsocks->returnSocket(*fd);
bb4bdbaf 483
5b0ddd18 484 errno = tmp; // this is for logging purposes only
7302ed0a 485 return ret;
288f4aa9
BH
486}
487
9170fbaf 488// -1 is error, 0 is timeout, 1 is success
3ddb9247 489int arecvfrom(char *data, int len, int flags, const ComboAddress& fromaddr, int *d_len,
c5c066bf 490 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
288f4aa9 491{
0d5f0a9f 492 static optional<unsigned int> nearMissLimit;
3ddb9247 493 if(!nearMissLimit)
0d5f0a9f
BH
494 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
495
288f4aa9 496 PacketID pident;
4ef015cd 497 pident.fd=fd;
288f4aa9 498 pident.id=id;
0d5f0a9f 499 pident.domain=domain;
787e5eab 500 pident.type = qtype;
996c89cc 501 pident.remote=fromaddr;
b636533b 502
288f4aa9 503 string packet;
5b0ddd18 504 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
34801ab1 505
9170fbaf 506 if(ret > 0) {
996c89cc 507 if(packet.empty()) // means "error"
3ddb9247 508 return -1;
998a4334 509
705f31ae 510 *d_len=(int)packet.size();
9170fbaf 511 memcpy(data,packet.c_str(),min(len,*d_len));
0d5f0a9f 512 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
996c89cc 513 L<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
0d5f0a9f 514 g_stats.spoofCount++;
35ce8576
BH
515 return -1;
516 }
288f4aa9 517 }
09e6702a 518 else {
34801ab1 519 if(fd >= 0)
49a699c4 520 t_udpclientsocks->returnSocket(fd);
09e6702a 521 }
9170fbaf 522 return ret;
288f4aa9
BH
523}
524
aa4e4cbf 525
87a5ea63 526string s_pidfname;
88def049
BH
527static void writePid(void)
528{
191f2e47 529 if(!::arg().mustDo("write-pid"))
530 return;
18e7758c 531 ofstream of(s_pidfname.c_str(), std::ios_base::app);
88def049 532 if(of)
705f31ae 533 of<< Utility::getpid() <<endl;
88def049 534 else
c057bfaa 535 L<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
88def049
BH
536}
537
bd0289fc
BH
538typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
539tcpClientCounts_t __thread* t_tcpClientCounts;
0e9d9ce2 540
cd989c87 541TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : d_remote(addr), d_fd(fd)
3ddb9247
PD
542{
543 ++s_currentConnections;
cd989c87 544 (*t_tcpClientCounts)[d_remote]++;
0e408828 545}
cd989c87
BH
546
547TCPConnection::~TCPConnection()
0e408828 548{
3ddb9247 549 if(closesocket(d_fd) < 0)
cd989c87 550 unixDie("closing socket for TCPConnection");
3ddb9247 551 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
cd989c87 552 t_tcpClientCounts->erase(d_remote);
1bc9e6bd 553 --s_currentConnections;
0e408828 554}
0e9d9ce2 555
3ddb9247 556AtomicCounter TCPConnection::s_currentConnections;
d8f6d49f 557void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
6dcd28c3 558
92011b8f 559// the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
c5c066bf 560void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
2cc13433 561{
92011b8f 562 if(packetsize > 1000 && t_largeanswerremotes)
563 t_largeanswerremotes->push_back(remote);
2cc13433
BH
564 switch(res) {
565 case RCode::ServFail:
92011b8f 566 if(t_servfailremotes) {
567 t_servfailremotes->push_back(remote);
568 if(query) // packet cache
569 t_servfailqueryring->push_back(make_pair(*query, qtype));
570 }
2cc13433
BH
571 g_stats.servFails++;
572 break;
573 case RCode::NXDomain:
574 g_stats.nxDomains++;
575 break;
576 case RCode::NoError:
577 g_stats.noErrors++;
578 break;
579 }
580}
581
a903b39c 582static string makeLoginfo(DNSComboWriter* dc)
583try
584{
c5c066bf 585 return "("+dc->d_mdp.d_qname.toString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->d_remote.toString())+")";
a903b39c 586}
587catch(...)
588{
589 return "Exception making error message for exception";
590}
591
288f4aa9
BH
592void startDoResolve(void *p)
593{
7b1469bb 594 DNSComboWriter* dc=(DNSComboWriter *)p;
288f4aa9 595 try {
92011b8f 596 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
597
b18ace73 598 uint32_t maxanswersize= dc->d_tcp ? 65535 : min((uint16_t) 512, g_udpTruncationThreshold);
7f7b8d55 599 EDNSOpts edo;
56b4d21b 600 if(getEDNSOpts(dc->d_mdp, &edo) && !dc->d_tcp) {
b18ace73 601 maxanswersize = min(edo.d_packetsize, g_udpTruncationThreshold);
10321a98 602 }
3ddb9247 603 ComboAddress local;
84433b79 604 listenSocketsAddresses_t::const_iterator lociter;
e325f20c 605 vector<DNSRecord> ret;
ea634573 606 vector<uint8_t> packet;
b23b8614 607
3ddb9247 608 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
ea634573
BH
609
610 pw.getHeader()->aa=0;
611 pw.getHeader()->ra=1;
c154c8a4 612 pw.getHeader()->qr=1;
bb4bdbaf 613 pw.getHeader()->tc=0;
ea634573 614 pw.getHeader()->id=dc->d_mdp.d_header.id;
10321a98 615 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
57769f13 616 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
ea634573 617
904d3219
PD
618 uint32_t minTTL=std::numeric_limits<uint32_t>::max();
619
620 SyncRes sr(dc->d_now);
3457a2a0 621 if(t_pdl) {
622 sr.setLuaEngine(*t_pdl);
4ea94941 623 sr.d_requestor=dc->d_remote;
3457a2a0 624 }
57769f13 625
626 if(pw.getHeader()->cd || edo.d_Z & EDNSOpts::DNSSECOK)
627 sr.d_doDNSSEC=true;
628
904d3219
PD
629 bool tracedQuery=false; // we could consider letting Lua know about this too
630 bool variableAnswer = false;
631
56b4d21b 632 int res;
39ec5d29 633 DNSFilterEngine::Policy dfepol;
634 DNSRecord spoofed;
e661a20b 635 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
56b4d21b
PD
636 pw.getHeader()->tc = 1;
637 res = 0;
638 variableAnswer = true;
e661a20b
PD
639 goto sendit;
640 }
641
c5c066bf 642 if(t_traceRegex->get() && (*t_traceRegex)->match(dc->d_mdp.d_qname.toString())) {
77499b05
BH
643 sr.setLogMode(SyncRes::Store);
644 tracedQuery=true;
645 }
3ddb9247 646
8f7473d7 647
77499b05 648 if(!g_quiet || tracedQuery)
461df9d2 649 L<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
8a63d3ce 650 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote()<<endl;
c75a6a9e 651
fededf47 652 sr.setId(MT->getTid());
67828389 653 if(!dc->d_mdp.d_header.rd)
c836dc19
BH
654 sr.setCacheOnly();
655
84433b79 656 local.sin4.sin_family = dc->d_remote.sin4.sin_family;
657
658 lociter = g_listenSocketsAddresses.find(dc->d_socket);
659 if(lociter != g_listenSocketsAddresses.end()) {
660 local = lociter->second;
661 }
662 else {
663 socklen_t len = local.getSocklen();
664 getsockname(dc->d_socket, (sockaddr*)&local, &len); // if this fails, we're ok with it
665 }
666
3ddb9247 667 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
e325f20c 668
39ec5d29 669 dfepol = g_dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_remote);
670
671 switch(dfepol.d_kind) {
672 case DNSFilterEngine::PolicyKind::NoAction:
644dd1da 673 break;
39ec5d29 674 case DNSFilterEngine::PolicyKind::Drop:
644dd1da 675 g_stats.policyDrops++;
676 delete dc;
677 dc=0;
678 return;
39ec5d29 679 case DNSFilterEngine::PolicyKind::NXDOMAIN:
644dd1da 680 res=RCode::NXDomain;
681 goto haveAnswer;
682
39ec5d29 683 case DNSFilterEngine::PolicyKind::NODATA:
684 res=RCode::NoError;
685 goto haveAnswer;
686
687 case DNSFilterEngine::PolicyKind::Custom:
644dd1da 688 res=RCode::NoError;
39ec5d29 689 spoofed.d_name=dc->d_mdp.d_qname;
5a1f298f 690 spoofed.d_type=dfepol.d_custom->getType();
39ec5d29 691 spoofed.d_ttl = 1234;
692 spoofed.d_class = 1;
693 spoofed.d_content = dfepol.d_custom;
589ad24b 694 spoofed.d_place = DNSResourceRecord::ANSWER;
39ec5d29 695 ret.push_back(spoofed);
644dd1da 696 goto haveAnswer;
697
39ec5d29 698
699 case DNSFilterEngine::PolicyKind::Truncate:
644dd1da 700 if(!dc->d_tcp) {
701 res=RCode::NoError;
702 pw.getHeader()->tc=1;
703 goto haveAnswer;
704 }
705 break;
706 }
707
84433b79 708 if(!t_pdl->get() || !(*t_pdl)->preresolve(dc->d_remote, local, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret, res, &variableAnswer)) {
44971ca0
PD
709 try {
710 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
711 }
712 catch(ImmediateServFailException &e) {
854d44e3 713 if(g_logCommonErrors)
714 L<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
44971ca0
PD
715 res = RCode::ServFail;
716 }
4485aa35 717
39ec5d29 718 dfepol = g_dfe.getPostPolicy(ret);
719 switch(dfepol.d_kind) {
720 case DNSFilterEngine::PolicyKind::NoAction:
644dd1da 721 break;
39ec5d29 722 case DNSFilterEngine::PolicyKind::Drop:
644dd1da 723 g_stats.policyDrops++;
724 delete dc;
725 dc=0;
726 return;
39ec5d29 727 case DNSFilterEngine::PolicyKind::NXDOMAIN:
644dd1da 728 ret.clear();
729 res=RCode::NXDomain;
730 goto haveAnswer;
731
39ec5d29 732 case DNSFilterEngine::PolicyKind::NODATA:
644dd1da 733 ret.clear();
734 res=RCode::NoError;
735 goto haveAnswer;
736
39ec5d29 737 case DNSFilterEngine::PolicyKind::Truncate:
644dd1da 738 if(!dc->d_tcp) {
739 ret.clear();
740 res=RCode::NoError;
741 pw.getHeader()->tc=1;
742 goto haveAnswer;
743 }
744 break;
39ec5d29 745
746 case DNSFilterEngine::PolicyKind::Custom:
747 res=RCode::NoError;
748 spoofed.d_name=dc->d_mdp.d_qname;
5a1f298f 749 spoofed.d_type=dfepol.d_custom->getType();
39ec5d29 750 spoofed.d_ttl = 1234;
751 spoofed.d_class = 1;
752 spoofed.d_content = dfepol.d_custom;
589ad24b 753 spoofed.d_place = DNSResourceRecord::ANSWER;
39ec5d29 754 ret.push_back(spoofed);
755 goto haveAnswer;
644dd1da 756 }
757
674cf0f6 758 if(t_pdl->get()) {
bd53ea9d 759 if(res == RCode::NoError) {
e325f20c 760 auto i=ret.cbegin();
761 for(; i!= ret.cend(); ++i)
e693ff5a 762 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
232f0877 763 break;
e325f20c 764 if(i == ret.cend())
84433b79 765 (*t_pdl)->nodata(dc->d_remote,local, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret, res, &variableAnswer);
232f0877
CH
766 }
767 else if(res == RCode::NXDomain)
644dd1da 768 (*t_pdl)->nxdomain(dc->d_remote,local, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret, res, &variableAnswer);
769
770
771 (*t_pdl)->postresolve(dc->d_remote,local, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret, res, &variableAnswer);
d2322a5e 772 }
4485aa35 773 }
644dd1da 774 haveAnswer:;
3e8216c8 775 if(res == PolicyDecision::DROP) {
e9c2ad3a 776 g_stats.policyDrops++;
ae7e77ad 777 delete dc;
778 dc=0;
779 return;
3ddb9247 780 }
3e8216c8 781 if(tracedQuery || res == PolicyDecision::PASS || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1dc8f4d0 782 {
85ffbc53
PD
783 string trace(sr.getTrace());
784 if(!trace.empty()) {
785 vector<string> lines;
786 boost::split(lines, trace, boost::is_any_of("\n"));
1dc8f4d0 787 for(const string& line : lines) {
85ffbc53
PD
788 if(!line.empty())
789 L<<Logger::Warning<< line << endl;
790 }
791 }
792 }
3ddb9247 793
3e8216c8 794 if(res == PolicyDecision::PASS) {
0fe1d080
PD
795 pw.getHeader()->rcode=RCode::ServFail;
796 // no commit here, because no record
797 g_stats.servFails++;
798 }
288f4aa9 799 else {
ea634573 800 pw.getHeader()->rcode=res;
92011b8f 801
c154c8a4 802 if(ret.size()) {
92476c8b 803 orderAndShuffle(ret);
e325f20c 804 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
e693ff5a 805 pw.startRecord(i->d_name, i->d_type, i->d_ttl, i->d_class, i->d_place);
e325f20c 806 minTTL = min(minTTL, i->d_ttl);
807 i->d_content->toPacket(pw);
dffbaa08 808 if(pw.size() > maxanswersize) {
4957a608 809 pw.rollback();
e693ff5a 810 if(i->d_place==DNSResourceRecord::ANSWER) // only truncate if we actually omitted parts of the answer
add935a2 811 {
4957a608 812 pw.getHeader()->tc=1;
add935a2
PD
813 pw.truncate();
814 }
4957a608
BH
815 goto sendit; // need to jump over pw.commit
816 }
817 }
b23b8614 818
18af64a8 819 pw.commit();
ea634573 820 }
288f4aa9 821 }
10321a98 822 sendit:;
79332bff 823 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
92011b8f 824 updateResponseStats(res, dc->d_remote, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
ea634573 825 if(!dc->d_tcp) {
b71b60ee 826 struct msghdr msgh;
827 struct iovec iov;
828 char cbuf[256];
829 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
830 if(dc->d_local.sin4.sin_family)
831 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local);
579cae19
PD
832 else
833 msgh.msg_control=NULL;
b71b60ee 834 sendmsg(dc->d_socket, &msgh, 0);
3762e821 835 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
79332bff 836 t_packetCache->insertResponsePacket(string((const char*)&*packet.begin(), packet.size()),
3ddb9247
PD
837 g_now.tv_sec,
838 min(minTTL,
79332bff 839 (pw.getHeader()->rcode == RCode::ServFail) ? SyncRes::s_packetcacheservfailttl : SyncRes::s_packetcachettl
3ddb9247 840 )
79332bff 841 );
1051f8a9 842 }
3762e821 843 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
feccc9fc 844 }
9c495589
BH
845 else {
846 char buf[2];
ea634573
BH
847 buf[0]=packet.size()/256;
848 buf[1]=packet.size()%256;
feccc9fc 849
c038218b 850 Utility::iovec iov[2];
feccc9fc 851
ea634573
BH
852 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
853 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
feccc9fc 854
c038218b 855 int ret=Utility::writev(dc->d_socket, iov, 2);
0e9d9ce2 856 bool hadError=true;
feccc9fc 857
3ddb9247 858 if(ret == 0)
18af64a8 859 L<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
3ddb9247 860 else if(ret < 0 )
18af64a8 861 L<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
ea634573 862 else if((unsigned int)ret != 2 + packet.size())
18af64a8 863 L<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<ret<<")"<<endl;
0e9d9ce2 864 else
18af64a8 865 hadError=false;
3ddb9247 866
09e6702a 867 // update tcp connection status, either by closing or moving to 'BYTE0'
3ddb9247 868
09e6702a 869 if(hadError) {
18af64a8 870 // no need to remove us from FDM, we weren't there
c36bc97a 871 dc->d_socket = -1;
09e6702a 872 }
a6ae6414 873 else {
cd989c87 874 dc->d_tcpConnection->state=TCPConnection::BYTE0;
18af64a8 875 Utility::gettimeofday(&g_now, 0); // needs to be updated
cd989c87
BH
876 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection);
877 t_fdm->setReadTTD(dc->d_socket, g_now, g_tcpTimeout);
0e9d9ce2 878 }
9c495589 879 }
3ddb9247 880
1d5b3ce6 881 if(!g_quiet) {
461df9d2 882 L<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
ea634573 883 L<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
9de3e034 884 sr.d_totUsec/1000.0<<" ms, "<<
885 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<<res<<endl;
c75a6a9e 886 }
b23b8614 887
3ddb9247 888 sr.d_outqueries ? t_RC->cacheMisses++ : t_RC->cacheHits++;
fe213470
BH
889 float spent=makeFloat(sr.d_now-dc->d_now);
890 if(spent < 0.001)
891 g_stats.answers0_1++;
892 else if(spent < 0.010)
893 g_stats.answers1_10++;
894 else if(spent < 0.1)
895 g_stats.answers10_100++;
896 else if(spent < 1.0)
897 g_stats.answers100_1000++;
898 else
899 g_stats.answersSlow++;
900
574af7ea 901 uint64_t newLat=(uint64_t)(spent*1000000);
08f3f638 902 newLat = min(newLat,(uint64_t)(g_networkTimeoutMsec*1000)); // outliers of several minutes exist..
903 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
0a6b1027 904 // no worries, we do this for packet cache hits elsewhere
c6d04bdc 905 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
ea634573 906 delete dc;
c36bc97a 907 dc=0;
288f4aa9 908 }
3f81d239 909 catch(PDNSException &ae) {
a903b39c 910 L<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
c36bc97a 911 delete dc;
288f4aa9 912 }
7b1469bb 913 catch(MOADNSException& e) {
a903b39c 914 L<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<e.what()<<endl;
c36bc97a 915 delete dc;
7b1469bb 916 }
fdbf35ac 917 catch(std::exception& e) {
a903b39c 918 L<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what()<<endl;
c36bc97a 919 delete dc;
c154c8a4 920 }
288f4aa9 921 catch(...) {
a903b39c 922 L<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
288f4aa9 923 }
3ddb9247 924
ec6eacbc 925 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
288f4aa9
BH
926}
927
677e2a46 928void makeControlChannelSocket(int processNum=-1)
1d5b3ce6 929{
2d733c0f 930 string sockname=::arg()["socket-dir"]+"/"+s_programname;
677e2a46
BH
931 if(processNum >= 0)
932 sockname += "."+lexical_cast<string>(processNum);
933 sockname+=".controlsocket";
41f7a068 934 s_rcc.listen(sockname);
3ddb9247 935
387de317
BH
936 int sockowner = -1;
937 int sockgroup = -1;
938
939 if (!::arg().isEmpty("socket-group"))
940 sockgroup=::arg().asGid("socket-group");
941 if (!::arg().isEmpty("socket-owner"))
942 sockowner=::arg().asUid("socket-owner");
3ddb9247 943
f838ad8d
BH
944 if (sockgroup > -1 || sockowner > -1) {
945 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
946 unixDie("Failed to chown control socket");
947 }
948 }
387de317
BH
949
950 // do mode change if socket-mode is given
951 if(!::arg().isEmpty("socket-mode")) {
952 mode_t sockmode=::arg().asMode("socket-mode");
953 chmod(sockname.c_str(), sockmode);
954 }
1d5b3ce6
BH
955}
956
d8f6d49f 957void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 958{
cd989c87 959 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
c038218b 960
879b3f70 961 if(conn->state==TCPConnection::BYTE0) {
cd989c87 962 int bytes=recv(conn->getFD(), conn->data, 2, 0);
09e6702a 963 if(bytes==1)
667f7e60 964 conn->state=TCPConnection::BYTE1;
3ddb9247 965 if(bytes==2) {
a0aa4f64 966 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
667f7e60
BH
967 conn->bytesread=0;
968 conn->state=TCPConnection::GETQUESTION;
09e6702a
BH
969 }
970 if(!bytes || bytes < 0) {
bb4bdbaf 971 t_fdm->removeReadFD(fd);
09e6702a
BH
972 return;
973 }
974 }
667f7e60 975 else if(conn->state==TCPConnection::BYTE1) {
cd989c87 976 int bytes=recv(conn->getFD(), conn->data+1, 1, 0);
09e6702a 977 if(bytes==1) {
667f7e60 978 conn->state=TCPConnection::GETQUESTION;
a0aa4f64 979 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
667f7e60 980 conn->bytesread=0;
09e6702a
BH
981 }
982 if(!bytes || bytes < 0) {
983 if(g_logCommonErrors)
cd989c87 984 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected after first byte"<<endl;
bb4bdbaf 985 t_fdm->removeReadFD(fd);
09e6702a
BH
986 return;
987 }
988 }
667f7e60 989 else if(conn->state==TCPConnection::GETQUESTION) {
cd989c87 990 int bytes=recv(conn->getFD(), conn->data + conn->bytesread, conn->qlen - conn->bytesread, 0);
09e6702a 991 if(!bytes || bytes < 0) {
cd989c87 992 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected while reading question body"<<endl;
bb4bdbaf 993 t_fdm->removeReadFD(fd);
09e6702a
BH
994 return;
995 }
667f7e60
BH
996 conn->bytesread+=bytes;
997 if(conn->bytesread==conn->qlen) {
bb4bdbaf 998 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
879b3f70 999
09e6702a
BH
1000 DNSComboWriter* dc=0;
1001 try {
cd989c87 1002 dc=new DNSComboWriter(conn->data, conn->qlen, g_now);
09e6702a
BH
1003 }
1004 catch(MOADNSException &mde) {
3ddb9247 1005 g_stats.clientParseError++;
4957a608 1006 if(g_logCommonErrors)
cd989c87 1007 L<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toString() <<endl;
4957a608 1008 return;
09e6702a 1009 }
cd989c87
BH
1010 dc->d_tcpConnection = conn; // carry the torch
1011 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
09e6702a 1012 dc->d_tcp=true;
cd989c87 1013 dc->setRemote(&conn->d_remote);
879b3f70 1014 if(dc->d_mdp.d_header.qr) {
4957a608 1015 delete dc;
4328f463 1016 L<<Logger::Error<<"Ignoring answer from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
4957a608 1017 return;
879b3f70 1018 }
3abcdab2
PD
1019 if(dc->d_mdp.d_header.opcode) {
1020 delete dc;
4328f463 1021 L<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
3abcdab2
PD
1022 return;
1023 }
09e6702a 1024 else {
4957a608
BH
1025 ++g_stats.qcounter;
1026 ++g_stats.tcpqcounter;
50a5ef72 1027 MT->makeThread(startDoResolve, dc); // deletes dc, will set state to BYTE0 again
4957a608 1028 return;
09e6702a
BH
1029 }
1030 }
1031 }
1032}
1033
6dcd28c3 1034//! Handle new incoming TCP connection
d8f6d49f 1035void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
09e6702a 1036{
37d3f960 1037 ComboAddress addr;
09e6702a 1038 socklen_t addrlen=sizeof(addr);
705f31ae 1039 int newsock=(int)accept(fd, (struct sockaddr*)&addr, &addrlen);
09e6702a 1040 if(newsock>0) {
85c32340
BH
1041 if(MT->numProcesses() > g_maxMThreads) {
1042 g_stats.overCapacityDrops++;
3897b9e1 1043 closesocket(newsock);
85c32340
BH
1044 return;
1045 }
1046
92011b8f 1047 if(t_remotes)
1048 t_remotes->push_back(addr);
49a699c4 1049 if(t_allowFrom && !t_allowFrom->match(&addr)) {
3ddb9247 1050 if(!g_quiet)
4957a608 1051 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2914b022 1052
09e6702a 1053 g_stats.unauthorizedTCP++;
3897b9e1 1054 closesocket(newsock);
09e6702a
BH
1055 return;
1056 }
bd0289fc 1057 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
09e6702a 1058 g_stats.tcpClientOverflow++;
3897b9e1 1059 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
09e6702a
BH
1060 return;
1061 }
3ddb9247 1062
3897b9e1 1063 setNonBlocking(newsock);
cd989c87
BH
1064 shared_ptr<TCPConnection> tc(new TCPConnection(newsock, addr));
1065 tc->state=TCPConnection::BYTE0;
3ddb9247 1066
cd989c87 1067 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc);
c038218b 1068
0bff046b 1069 struct timeval now;
c038218b 1070 Utility::gettimeofday(&now, 0);
cd989c87 1071 t_fdm->setReadTTD(tc->getFD(), now, g_tcpTimeout);
09e6702a
BH
1072 }
1073}
3ddb9247 1074
b71b60ee 1075string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
1bc3c142 1076{
183eb877 1077 gettimeofday(&g_now, 0);
b71b60ee 1078 struct timeval diff = g_now - tv;
1079 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
183eb877 1080
22cf1fda 1081 if(tv.tv_sec && delta > 1000.0) {
b71b60ee 1082 g_stats.tooOldDrops++;
1083 return 0;
1084 }
1085
1bc3c142 1086 ++g_stats.qcounter;
d7f10541
BH
1087 if(fromaddr.sin4.sin_family==AF_INET6)
1088 g_stats.ipv6qcounter++;
1bc3c142
BH
1089
1090 string response;
1091 try {
1092 uint32_t age;
8f7473d7 1093#ifdef MALLOC_TRACE
1094 /*
1095 static uint64_t last=0;
1096 if(!last)
1097 g_mtracer->clearAllocators();
1098 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
1099 last=g_mtracer->getAllocs();
1100 cout<<g_mtracer->topAllocatorsString()<<endl;
1101 g_mtracer->clearAllocators();
1102 */
1103#endif
1104
1bc3c142
BH
1105 if(!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(question, g_now.tv_sec, &response, &age)) {
1106 if(!g_quiet)
d738f00f 1107 L<<Logger::Notice<<t_id<< " question answered from packet cache from "<<fromaddr.toString()<<endl;
92011b8f 1108 // t_queryring->push_back("packetcached");
3ddb9247 1109
8f7473d7 1110
1111
1bc3c142
BH
1112 g_stats.packetCacheHits++;
1113 SyncRes::s_queries++;
1114 ageDNSPacket(response, age);
b71b60ee 1115 struct msghdr msgh;
1116 struct iovec iov;
1117 char cbuf[256];
1118 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
1119 if(destaddr.sin4.sin_family) {
b71b60ee 1120 addCMsgSrcAddr(&msgh, cbuf, &destaddr);
1121 }
579cae19
PD
1122 else {
1123 msgh.msg_control=NULL;
1124 }
b71b60ee 1125 sendmsg(fd, &msgh, 0);
1126
97bee66d
BH
1127 if(response.length() >= sizeof(struct dnsheader)) {
1128 struct dnsheader dh;
1129 memcpy(&dh, response.c_str(), sizeof(dh));
92011b8f 1130 updateResponseStats(dh.rcode, fromaddr, response.length(), 0, 0);
97bee66d 1131 }
08f3f638 1132 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
1bc3c142
BH
1133 return 0;
1134 }
3ddb9247 1135 }
1bc3c142
BH
1136 catch(std::exception& e) {
1137 L<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1138 return 0;
1139 }
3ddb9247 1140
4ea94941 1141 if(t_pdl->get()) {
1142 if((*t_pdl)->ipfilter(fromaddr, destaddr)) {
1143 if(!g_quiet)
1144 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<" based on policy"<<endl;
1145 g_stats.policyDrops++;
1146 return 0;
1147 }
1148 }
1149
1bc3c142 1150 if(MT->numProcesses() > g_maxMThreads) {
461df9d2 1151 if(!g_quiet)
854d44e3 1152 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<", over capacity"<<endl;
461df9d2 1153
1bc3c142
BH
1154 g_stats.overCapacityDrops++;
1155 return 0;
1156 }
3ddb9247 1157
1bc3c142
BH
1158 DNSComboWriter* dc = new DNSComboWriter(question.c_str(), question.size(), g_now);
1159 dc->setSocket(fd);
1160 dc->setRemote(&fromaddr);
b71b60ee 1161 dc->setLocal(destaddr);
1bc3c142
BH
1162
1163 dc->d_tcp=false;
1164 MT->makeThread(startDoResolve, (void*) dc); // deletes dc
1165 return 0;
3ddb9247
PD
1166}
1167
b71b60ee 1168
d8f6d49f 1169void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
5db529f8 1170{
a9af3782 1171 int len;
5db529f8
BH
1172 char data[1500];
1173 ComboAddress fromaddr;
b71b60ee 1174 struct msghdr msgh;
1175 struct iovec iov;
1176 char cbuf[256];
1177
1178 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
1179 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), data, sizeof(data), &fromaddr);
1180
3ddb9247 1181 for(;;)
b71b60ee 1182 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
92011b8f 1183 if(t_remotes)
1184 t_remotes->push_back(fromaddr);
b23b8614 1185
49a699c4 1186 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
3ddb9247 1187 if(!g_quiet)
4957a608 1188 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2914b022 1189
5db529f8 1190 g_stats.unauthorizedUDP++;
a9af3782 1191 return;
5db529f8 1192 }
15c01deb 1193 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
81859ba5 1194 if(!fromaddr.sin4.sin_port) { // also works for IPv6
3ddb9247 1195 if(!g_quiet)
81859ba5 1196 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
1197
1198 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
1199 return;
1200 }
5db529f8 1201 try {
b23b8614 1202 dnsheader* dh=(dnsheader*)data;
3ddb9247 1203
b23b8614 1204 if(dh->qr) {
4957a608
BH
1205 if(g_logCommonErrors)
1206 L<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
5db529f8 1207 }
3abcdab2
PD
1208 else if(dh->opcode) {
1209 if(g_logCommonErrors)
1210 L<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
1211 }
5db529f8 1212 else {
232f0877 1213 string question(data, len);
b71b60ee 1214 struct timeval tv={0,0};
1215 HarvestTimestamp(&msgh, &tv);
1216 ComboAddress dest;
1217 memset(&dest, 0, sizeof(dest)); // this makes sure we igore this address if not returned by recvmsg above
1218 HarvestDestinationAddress(&msgh, &dest);
232f0877 1219 if(g_weDistributeQueries)
b71b60ee 1220 distributeAsyncFunction(question, boost::bind(doProcessUDPQuestion, question, fromaddr, dest, tv, fd));
232f0877 1221 else
b71b60ee 1222 doProcessUDPQuestion(question, fromaddr, dest, tv, fd);
5db529f8
BH
1223 }
1224 }
1225 catch(MOADNSException& mde) {
3ddb9247 1226 g_stats.clientParseError++;
84e66a59 1227 if(g_logCommonErrors)
4957a608 1228 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
5db529f8 1229 }
0b602819
KM
1230 catch(std::runtime_error& e) {
1231 g_stats.clientParseError++;
1232 if(g_logCommonErrors)
1233 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
5db529f8
BH
1234 }
1235 }
ac0e821b
BH
1236 else {
1237 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
3ddb9247 1238 if(errno == EAGAIN)
9326cae1 1239 g_stats.noPacketError++;
bf3b0cec 1240 break;
ac0e821b 1241 }
5db529f8
BH
1242}
1243
1bc3c142 1244
5db529f8
BH
1245typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
1246deferredAdd_t deferredAdd;
1247
f28307ad 1248void makeTCPServerSockets()
9c495589 1249{
37d3f960 1250 int fd;
f28307ad 1251 vector<string>locals;
2e3d8a19 1252 stringtok(locals,::arg()["local-address"]," ,");
9c495589 1253
f28307ad 1254 if(locals.empty())
3f81d239 1255 throw PDNSException("No local address specified");
3ddb9247 1256
f28307ad 1257 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
1258 ServiceTuple st;
1259 st.port=::arg().asNum("local-port");
1260 parseService(*i, st);
3ddb9247 1261
32252594
BH
1262 ComboAddress sin;
1263
f28307ad 1264 memset((char *)&sin,0, sizeof(sin));
37d3f960 1265 sin.sin4.sin_family = AF_INET;
32252594 1266 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 1267 sin.sin6.sin6_family = AF_INET6;
f71bc087 1268 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 1269 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
37d3f960
BH
1270 }
1271
1272 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
3ddb9247 1273 if(fd<0)
3f81d239 1274 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
f28307ad 1275
3897b9e1 1276 setCloseOnExec(fd);
a903b39c 1277
f28307ad 1278 int tmp=1;
37d3f960 1279 if(setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&tmp,sizeof tmp)<0) {
f28307ad 1280 L<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
c8ddb7c2 1281 exit(1);
f28307ad 1282 }
0dfa94ab 1283 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
1284 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
1285 }
1286
c8ddb7c2 1287#ifdef TCP_DEFER_ACCEPT
37d3f960
BH
1288 if(setsockopt(fd, SOL_TCP,TCP_DEFER_ACCEPT,(char*)&tmp,sizeof tmp) >= 0) {
1289 if(i==locals.begin())
4957a608 1290 L<<Logger::Error<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
c8ddb7c2
BH
1291 }
1292#endif
1293
fec7dd5a
SS
1294 if( ::arg().mustDo("non-local-bind") )
1295 Utility::setBindAny(AF_INET, fd);
1296
32252594 1297 sin.sin4.sin_port = htons(st.port);
37d3f960 1298 int socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
3ddb9247 1299 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
3f81d239 1300 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
3ddb9247 1301
3897b9e1 1302 setNonBlocking(fd);
49a699c4 1303 setSocketSendBuffer(fd, 65000);
37d3f960 1304 listen(fd, 128);
5db529f8 1305 deferredAdd.push_back(make_pair(fd, handleNewTCPQuestion));
c2136bf0 1306 g_tcpListenSockets.push_back(fd);
84433b79 1307 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
1308 // - fd is not that which we know here, but returned from accept()
3ddb9247 1309 if(sin.sin4.sin_family == AF_INET)
32252594 1310 L<<Logger::Error<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 1311 else
32252594 1312 L<<Logger::Error<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 1313 }
9c495589
BH
1314}
1315
f28307ad 1316void makeUDPServerSockets()
288f4aa9 1317{
fec7dd5a 1318 int one=1;
f28307ad 1319 vector<string>locals;
2e3d8a19 1320 stringtok(locals,::arg()["local-address"]," ,");
288f4aa9 1321
f28307ad 1322 if(locals.empty())
3f81d239 1323 throw PDNSException("No local address specified");
3ddb9247 1324
f28307ad 1325 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
1326 ServiceTuple st;
1327 st.port=::arg().asNum("local-port");
1328 parseService(*i, st);
1329
37d3f960 1330 ComboAddress sin;
996c89cc 1331
37d3f960
BH
1332 memset(&sin, 0, sizeof(sin));
1333 sin.sin4.sin_family = AF_INET;
32252594 1334 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 1335 sin.sin6.sin6_family = AF_INET6;
f71bc087 1336 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 1337 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
37d3f960 1338 }
3ddb9247 1339
bb4bdbaf 1340 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
d3b4137e 1341 if(fd < 0) {
3f81d239 1342 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
d3b4137e 1343 }
915b0c39
AT
1344 if (!setSocketTimestamps(fd))
1345 L<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
0dfa94ab 1346
b71b60ee 1347 if(IsAnyAddress(sin)) {
1348 setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one)); // linux supports this, so why not - might fail on other systems
757d3179 1349#ifdef IPV6_RECVPKTINFO
3ddb9247 1350 setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one));
757d3179 1351#endif
0dfa94ab 1352 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
1353 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
1354 }
b71b60ee 1355 }
37d3f960 1356
fec7dd5a
SS
1357 if( ::arg().mustDo("non-local-bind") )
1358 Utility::setBindAny(AF_INET6, fd);
1359
3897b9e1 1360 setCloseOnExec(fd);
a903b39c 1361
4e9a20e6 1362 setSocketReceiveBuffer(fd, 250000);
32252594 1363 sin.sin4.sin_port = htons(st.port);
37d3f960
BH
1364
1365 int socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
3ddb9247 1366 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
3f81d239 1367 throw PDNSException("Resolver binding to server socket on port "+ lexical_cast<string>(st.port) +" for "+ st.host+": "+stringerror());
3ddb9247 1368
3897b9e1 1369 setNonBlocking(fd);
c2136bf0 1370
0aaecd50 1371 deferredAdd.push_back(make_pair(fd, handleNewUDPQuestion));
40a3dd64 1372 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
3ddb9247 1373 if(sin.sin4.sin_family == AF_INET)
32252594 1374 L<<Logger::Error<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 1375 else
32252594 1376 L<<Logger::Error<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 1377 }
c836dc19 1378}
caa6eefa 1379
9c495589 1380
c836dc19
BH
1381void daemonize(void)
1382{
1383 if(fork())
1384 exit(0); // bye bye
3ddb9247
PD
1385
1386 setsid();
c836dc19 1387
27a5ead5 1388 int i=open("/dev/null",O_RDWR); /* open stdin */
3ddb9247 1389 if(i < 0)
27a5ead5
BH
1390 L<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
1391 else {
1392 dup2(i,0); /* stdin */
1393 dup2(i,1); /* stderr */
1394 dup2(i,2); /* stderr */
1395 close(i);
1396 }
288f4aa9 1397}
caa6eefa 1398
cc59bce6 1399AtomicCounter counter;
c75a6a9e
BH
1400bool statsWanted;
1401
1402void usr1Handler(int)
1403{
1404 statsWanted=true;
1405}
ae1b2e98 1406
9170fbaf
BH
1407void usr2Handler(int)
1408{
f1f34cc2 1409 g_quiet= !g_quiet;
1410 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
1411 ::arg().set("quiet")=g_quiet ? "" : "no";
9170fbaf
BH
1412}
1413
c75a6a9e
BH
1414void doStats(void)
1415{
16beeaa4
BH
1416 static time_t lastOutputTime;
1417 static uint64_t lastQueryCount;
d299d4f5 1418
1419 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
1420 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
3ddb9247 1421
d299d4f5 1422 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
3427fa8a
BH
1423 L<<Logger::Warning<<"stats: "<<g_stats.qcounter<<" questions, "<<
1424 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
1425 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
3ddb9247
PD
1426 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
1427
3427fa8a
BH
1428 L<<Logger::Warning<<"stats: throttle map: "
1429 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
3ddb9247 1430 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
70c2c8b1
BH
1431 L<<Logger::Warning<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
1432 L<<Logger::Warning<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
525b8a7c 1433 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
3427fa8a
BH
1434 L<<Logger::Warning<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
1435 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
81883dcc 1436
16beeaa4
BH
1437 //L<<Logger::Warning<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
1438 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
3ddb9247 1439
16beeaa4
BH
1440 L<<Logger::Warning<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
1441 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
3ddb9247 1442
16beeaa4
BH
1443 time_t now = time(0);
1444 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
1445 L<<Logger::Warning<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
1446 }
1447 lastOutputTime = now;
1448 lastQueryCount = SyncRes::s_queries;
c75a6a9e 1449 }
3ddb9247 1450 else if(statsWanted)
70c2c8b1 1451 L<<Logger::Warning<<"stats: no stats yet!"<<endl;
7becf07f 1452
c75a6a9e
BH
1453 statsWanted=false;
1454}
c836dc19 1455
29f0b1ce 1456static void houseKeeping(void *)
c836dc19 1457{
d67620e4 1458 static __thread time_t last_stat, last_rootupdate, last_prune, last_secpoll;
8baca3fa 1459 static __thread int cleanCounter=0;
cc59bce6 1460 static __thread bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
1461 try {
1462 if(s_running)
1463 return;
1464 s_running=true;
3ddb9247 1465
cc59bce6 1466 struct timeval now;
1467 Utility::gettimeofday(&now, 0);
3ddb9247
PD
1468
1469 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
cc59bce6 1470 DTime dt;
1471 dt.setTimeval(now);
1472 t_RC->doPrune(); // this function is local to a thread, so fine anyhow
f8f243b0 1473 t_packetCache->doPruneTo(::arg().asNum("max-packetcache-entries") / g_numWorkerThreads);
3ddb9247 1474
f8f243b0 1475 pruneCollection(t_sstorage->negcache, ::arg().asNum("max-cache-entries") / (g_numWorkerThreads * 10), 200);
3ddb9247 1476
cc59bce6 1477 if(!((cleanCounter++)%40)) { // this is a full scan!
1478 time_t limit=now.tv_sec-300;
1479 for(SyncRes::nsspeeds_t::iterator i = t_sstorage->nsSpeeds.begin() ; i!= t_sstorage->nsSpeeds.end(); )
1480 if(i->second.stale(limit))
1481 t_sstorage->nsSpeeds.erase(i++);
1482 else
1483 ++i;
1484 }
1485 last_prune=time(0);
d67620e4 1486 }
3ddb9247 1487
cc59bce6 1488 if(now.tv_sec - last_rootupdate > 7200) {
1489 SyncRes sr(now);
1490 sr.setDoEDNS0(true);
e325f20c 1491 vector<DNSRecord> ret;
3ddb9247 1492
cc59bce6 1493 sr.setNoCache();
1494 int res=-1;
18b73338 1495 try {
6ed9a611 1496 res=sr.beginResolve(DNSName("."), QType(QType::NS), 1, ret);
cc59bce6 1497 }
3aa91c3e 1498 catch(PDNSException& e)
1499 {
1500 L<<Logger::Error<<"Failed to update . records, got an exception: "<<e.reason<<endl;
1501 }
1502
1503 catch(std::exception& e)
1504 {
1505 L<<Logger::Error<<"Failed to update . records, got an exception: "<<e.what()<<endl;
1506 }
1507
cc59bce6 1508 catch(...)
1509 {
1510 L<<Logger::Error<<"Failed to update . records, got an exception"<<endl;
1511 }
1512 if(!res) {
1513 L<<Logger::Notice<<"Refreshed . records"<<endl;
1514 last_rootupdate=now.tv_sec;
1515 }
1516 else
1517 L<<Logger::Error<<"Failed to update . records, RCODE="<<res<<endl;
1518 }
3ddb9247 1519
cc59bce6 1520 if(!t_id) {
3ddb9247 1521 if(now.tv_sec - last_stat >= 1800) {
cc59bce6 1522 doStats();
1523 last_stat=time(0);
1524 }
3ddb9247 1525
cc59bce6 1526 if(now.tv_sec - last_secpoll >= 3600) {
1527 try {
1528 doSecPoll(&last_secpoll);
1529 }
1530 catch(...) {}
18b73338 1531 }
d67620e4 1532 }
cc59bce6 1533 s_running=false;
d67620e4 1534 }
cc59bce6 1535 catch(PDNSException& ae)
1536 {
1537 s_running=false;
1538 L<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
1539 throw;
1540 }
779828c4 1541}
d6d5dea7 1542
49a699c4
BH
1543void makeThreadPipes()
1544{
c3828c03 1545 for(unsigned int n=0; n < g_numThreads; ++n) {
49a699c4
BH
1546 struct ThreadPipeSet tps;
1547 int fd[2];
1548 if(pipe(fd) < 0)
1549 unixDie("Creating pipe for inter-thread communications");
3ddb9247 1550
49a699c4
BH
1551 tps.readToThread = fd[0];
1552 tps.writeToThread = fd[1];
3ddb9247 1553
49a699c4
BH
1554 if(pipe(fd) < 0)
1555 unixDie("Creating pipe for inter-thread communications");
1556 tps.readFromThread = fd[0];
1557 tps.writeFromThread = fd[1];
3ddb9247 1558
49a699c4
BH
1559 g_pipes.push_back(tps);
1560 }
1561}
1562
00c9b8c1
BH
1563struct ThreadMSG
1564{
1565 pipefunc_t func;
1566 bool wantAnswer;
1567};
1568
49a699c4
BH
1569void broadcastFunction(const pipefunc_t& func, bool skipSelf)
1570{
49a699c4 1571 unsigned int n = 0;
1dc8f4d0 1572 for(ThreadPipeSet& tps : g_pipes)
49a699c4
BH
1573 {
1574 if(n++ == t_id) {
1575 if(!skipSelf)
1576 func(); // don't write to ourselves!
1577 continue;
1578 }
3ddb9247 1579
00c9b8c1
BH
1580 ThreadMSG* tmsg = new ThreadMSG();
1581 tmsg->func = func;
1582 tmsg->wantAnswer = true;
1583 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg))
49a699c4 1584 unixDie("write to thread pipe returned wrong size or error");
3ddb9247 1585
49a699c4
BH
1586 string* resp;
1587 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
1588 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 1589
49a699c4
BH
1590 if(resp) {
1591// cerr <<"got response: " << *resp << endl;
1592 delete resp;
1593 }
1594 }
1595}
06ea9015 1596
2fafb640 1597static uint32_t g_disthashseed;
8171ab83 1598void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
00c9b8c1 1599{
8171ab83 1600 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
06ea9015 1601 unsigned int target = 1 + (hash % (g_pipes.size()-1));
1602
00c9b8c1
BH
1603 if(target == t_id) {
1604 func();
1605 return;
1606 }
3ddb9247 1607 ThreadPipeSet& tps = g_pipes[target];
00c9b8c1
BH
1608 ThreadMSG* tmsg = new ThreadMSG();
1609 tmsg->func = func;
1610 tmsg->wantAnswer = false;
3ddb9247 1611
00c9b8c1 1612 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg))
3ddb9247 1613 unixDie("write to thread pipe returned wrong size or error");
00c9b8c1 1614}
3427fa8a 1615
49a699c4
BH
1616void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
1617{
00c9b8c1 1618 ThreadMSG* tmsg;
3ddb9247
PD
1619
1620 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread
49a699c4
BH
1621 unixDie("read from thread pipe returned wrong size or error");
1622 }
3ddb9247 1623
2f22827a 1624 void *resp=0;
1625 try {
1626 resp = tmsg->func();
1627 }
1628 catch(std::exception& e) {
1629 L<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
1630 }
1631 catch(PDNSException& e) {
1632 L<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
1633 }
00c9b8c1
BH
1634 if(tmsg->wantAnswer)
1635 if(write(g_pipes[t_id].writeFromThread, &resp, sizeof(resp)) != sizeof(resp))
1636 unixDie("write to thread pipe returned wrong size or error");
3ddb9247 1637
00c9b8c1 1638 delete tmsg;
49a699c4 1639}
09e6702a 1640
13034931
BH
1641template<class T> void *voider(const boost::function<T*()>& func)
1642{
1643 return func();
1644}
1645
b3b5459d
BH
1646vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
1647{
1648 a.insert(a.end(), b.begin(), b.end());
1649 return a;
1650}
1651
92011b8f 1652vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
1653{
1654 a.insert(a.end(), b.begin(), b.end());
1655 return a;
1656}
1657
3ddb9247
PD
1658vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
1659{
1660 a.insert(a.end(), b.begin(), b.end());
1661 return a;
1662}
1663
92011b8f 1664
13034931 1665template<class T> T broadcastAccFunction(const boost::function<T*()>& func, bool skipSelf)
3427fa8a
BH
1666{
1667 unsigned int n = 0;
1668 T ret=T();
1dc8f4d0 1669 for(ThreadPipeSet& tps : g_pipes)
3427fa8a
BH
1670 {
1671 if(n++ == t_id) {
1672 if(!skipSelf) {
1673 T* resp = (T*)func(); // don't write to ourselves!
1674 if(resp) {
1675 //~ cerr <<"got direct: " << *resp << endl;
1676 ret += *resp;
1677 delete resp;
1678 }
1679 }
1680 continue;
1681 }
3ddb9247 1682
00c9b8c1
BH
1683 ThreadMSG* tmsg = new ThreadMSG();
1684 tmsg->func = boost::bind(voider<T>, func);
1685 tmsg->wantAnswer = true;
3ddb9247 1686
00c9b8c1 1687 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg))
3427fa8a 1688 unixDie("write to thread pipe returned wrong size or error");
3ddb9247
PD
1689
1690
3427fa8a
BH
1691 T* resp;
1692 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
1693 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 1694
3427fa8a
BH
1695 if(resp) {
1696 //~ cerr <<"got response: " << *resp << endl;
1697 ret += *resp;
1698 delete resp;
1699 }
1700 }
1701 return ret;
1702}
1703
13034931
BH
1704template string broadcastAccFunction(const boost::function<string*()>& fun, bool skipSelf); // explicit instantiation
1705template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun, bool skipSelf); // explicit instantiation
b3b5459d 1706template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun, bool skipSelf); // explicit instantiation
3ddb9247 1707template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun, bool skipSelf); // explicit instantiation
3427fa8a 1708
d8f6d49f 1709void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
09e6702a
BH
1710{
1711 string remote;
1712 string msg=s_rcc.recv(&remote);
1713 RecursorControlParser rcp;
1714 RecursorControlParser::func_t* command;
3ddb9247 1715
09e6702a 1716 string answer=rcp.getAnswer(msg, &command);
ab5c053d
BH
1717 try {
1718 s_rcc.send(answer, &remote);
1719 command();
1720 }
fdbf35ac 1721 catch(std::exception& e) {
ab5c053d
BH
1722 L<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
1723 }
3f81d239 1724 catch(PDNSException& ae) {
ab5c053d
BH
1725 L<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
1726 }
09e6702a
BH
1727}
1728
d8f6d49f 1729void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1730{
0b18b22e 1731 PacketID* pident=any_cast<PacketID>(&var);
667f7e60 1732 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
09e6702a 1733
667f7e60 1734 shared_array<char> buffer(new char[pident->inNeeded]);
09e6702a 1735
705f31ae 1736 int ret=recv(fd, buffer.get(), pident->inNeeded,0);
09e6702a 1737 if(ret > 0) {
667f7e60
BH
1738 pident->inMSG.append(&buffer[0], &buffer[ret]);
1739 pident->inNeeded-=ret;
825fa717 1740 if(!pident->inNeeded || pident->inIncompleteOkay) {
667f7e60
BH
1741 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
1742 PacketID pid=*pident;
1743 string msg=pident->inMSG;
3ddb9247 1744
bb4bdbaf 1745 t_fdm->removeReadFD(fd);
3ddb9247 1746 MT->sendEvent(pid, &msg);
09e6702a
BH
1747 }
1748 else {
667f7e60 1749 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
09e6702a
BH
1750 }
1751 }
1752 else {
667f7e60 1753 PacketID tmp=*pident;
bb4bdbaf 1754 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
09e6702a
BH
1755 string empty;
1756 MT->sendEvent(tmp, &empty); // this conveys error status
1757 }
1758}
1759
d8f6d49f 1760void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1761{
0b18b22e 1762 PacketID* pid=any_cast<PacketID>(&var);
4ca15bca 1763 int ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
09e6702a 1764 if(ret > 0) {
667f7e60
BH
1765 pid->outPos+=ret;
1766 if(pid->outPos==pid->outMSG.size()) {
1767 PacketID tmp=*pid;
bb4bdbaf 1768 t_fdm->removeWriteFD(fd);
09e6702a
BH
1769 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
1770 }
1771 }
1772 else { // error or EOF
667f7e60 1773 PacketID tmp(*pid);
bb4bdbaf 1774 t_fdm->removeWriteFD(fd);
09e6702a 1775 string sent;
998a4334 1776 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
09e6702a
BH
1777 }
1778}
1779
34801ab1
BH
1780// resend event to everybody chained onto it
1781void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
1782{
1783 if(iter->key.chain.empty())
1784 return;
e27e91a8 1785 // cerr<<"doResends called!\n";
34801ab1
BH
1786 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
1787 resend.fd=-1;
1788 resend.id=*i;
e27e91a8 1789 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
4665c31e 1790
34801ab1
BH
1791 MT->sendEvent(resend, &content);
1792 g_stats.chainResends++;
34801ab1
BH
1793 }
1794}
1795
d8f6d49f 1796void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1797{
600fc20b 1798 PacketID pid=any_cast<PacketID>(var);
998a4334 1799 int len;
e45beeda 1800 char data[g_outgoingEDNSBufsize];
996c89cc 1801 ComboAddress fromaddr;
09e6702a
BH
1802 socklen_t addrlen=sizeof(fromaddr);
1803
998a4334 1804 len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen);
c1da7976 1805
998a4334
BH
1806 if(len < (int)sizeof(dnsheader)) {
1807 if(len < 0)
996c89cc 1808 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
09e6702a 1809 else {
3ddb9247 1810 g_stats.serverParseError++;
09e6702a 1811 if(g_logCommonErrors)
85db02c5 1812 L<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
e44d9fa7 1813 ": packet smaller than DNS header"<<endl;
998a4334 1814 }
34801ab1 1815
49a699c4 1816 t_udpclientsocks->returnSocket(fd);
34801ab1
BH
1817 string empty;
1818
1819 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3ddb9247 1820 if(iter != MT->d_waiters.end())
34801ab1 1821 doResends(iter, pid, empty);
3ddb9247 1822
34801ab1 1823 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
998a4334 1824 return;
3ddb9247 1825 }
998a4334
BH
1826
1827 dnsheader dh;
1828 memcpy(&dh, data, sizeof(dh));
3ddb9247 1829
6da3b3ad
PD
1830 PacketID pident;
1831 pident.remote=fromaddr;
1832 pident.id=dh.id;
1833 pident.fd=fd;
34801ab1 1834
33a928af 1835 if(!dh.qr && g_logCommonErrors) {
854d44e3 1836 L<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
6da3b3ad
PD
1837 }
1838
1839 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
1840 !dh.qr) { // one weird server
1841 pident.domain.clear();
1842 pident.type = 0;
1843 }
1844 else {
1845 try {
8171ab83 1846 pident.domain=DNSName(data, len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
6da3b3ad
PD
1847 }
1848 catch(std::exception& e) {
1849 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
1850 L<<Logger::Warning<<"Error in packet from "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
1851 return;
34801ab1 1852 }
6da3b3ad
PD
1853 }
1854 string packet;
1855 packet.assign(data, len);
34801ab1 1856
6da3b3ad
PD
1857 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
1858 if(iter != MT->d_waiters.end()) {
1859 doResends(iter, pident, packet);
1860 }
c1da7976 1861
6da3b3ad 1862retryWithName:
4957a608 1863
6da3b3ad
PD
1864 if(!MT->sendEvent(pident, &packet)) {
1865 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
1866 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
1867 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
e325f20c 1868 pident.domain == mthread->key.domain) {
6da3b3ad 1869 mthread->key.nearMisses++;
998a4334 1870 }
6da3b3ad
PD
1871
1872 // be a bit paranoid here since we're weakening our matching
3ddb9247 1873 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
6da3b3ad
PD
1874 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
1875 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
1876 pident.domain = mthread->key.domain;
1877 pident.type = mthread->key.type;
1878 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
d4fb76e9 1879 }
09e6702a 1880 }
6da3b3ad
PD
1881 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
1882 if(g_logCommonErrors) {
1883 L<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<<pident.domain<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
d8f6d49f 1884 }
09e6702a 1885 }
6da3b3ad
PD
1886 else if(fd >= 0) {
1887 t_udpclientsocks->returnSocket(fd);
1888 }
09e6702a
BH
1889}
1890
1f4abb20
BH
1891FDMultiplexer* getMultiplexer()
1892{
1893 FDMultiplexer* ret;
1894 for(FDMultiplexer::FDMultiplexermap_t::const_iterator i = FDMultiplexer::getMultiplexerMap().begin();
1895 i != FDMultiplexer::getMultiplexerMap().end(); ++i) {
1896 try {
1897 ret=i->second();
1f4abb20
BH
1898 return ret;
1899 }
98d0ee4a 1900 catch(FDMultiplexerException &fe) {
0a7f24cb 1901 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
98d0ee4a
BH
1902 }
1903 catch(...) {
1904 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
1905 }
1f4abb20
BH
1906 }
1907 L<<Logger::Error<<"No working multiplexer found!"<<endl;
1908 exit(1);
1909}
1910
3ddb9247 1911
0f39c1a3 1912string* doReloadLuaScript()
4485aa35 1913{
674cf0f6 1914 string fname= ::arg()["lua-dns-script"];
4485aa35 1915 try {
674cf0f6
BH
1916 if(fname.empty()) {
1917 t_pdl->reset();
1918 L<<Logger::Error<<t_id<<" Unloaded current lua script"<<endl;
0f39c1a3 1919 return new string("unloaded\n");
4485aa35
BH
1920 }
1921 else {
5704e107 1922 *t_pdl = shared_ptr<RecursorLua>(new RecursorLua(fname));
4485aa35
BH
1923 }
1924 }
fdbf35ac 1925 catch(std::exception& e) {
674cf0f6 1926 L<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
0f39c1a3 1927 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
4485aa35 1928 }
3ddb9247 1929
674cf0f6 1930 L<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
0f39c1a3 1931 return new string("(re)loaded '"+fname+"'\n");
4485aa35
BH
1932}
1933
49a699c4
BH
1934string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
1935{
3ddb9247 1936 if(begin != end)
49a699c4 1937 ::arg().set("lua-dns-script") = *begin;
3ddb9247 1938
0f39c1a3 1939 return broadcastAccFunction<string>(doReloadLuaScript);
3ddb9247 1940}
49a699c4 1941
77499b05
BH
1942string* pleaseUseNewTraceRegex(const std::string& newRegex)
1943try
1944{
1945 if(newRegex.empty()) {
1946 t_traceRegex->reset();
1947 return new string("unset\n");
1948 }
1949 else {
1950 (*t_traceRegex) = shared_ptr<Regex>(new Regex(newRegex));
1951 return new string("ok\n");
1952 }
1953}
3f81d239 1954catch(PDNSException& ae)
77499b05
BH
1955{
1956 return new string(ae.reason+"\n");
1957}
1958
1959string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
1960{
1961 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
1962}
1963
4e9a20e6 1964static void checkLinuxIPv6Limits()
1965{
1966#ifdef __linux__
1967 string line;
1968 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
1969 int lim=atoi(line.c_str());
1970 if(lim < 16384) {
36849ff2 1971 L<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
4e9a20e6 1972 }
1973 }
1974#endif
1975}
36849ff2 1976static void checkOrFixFDS()
4e9a20e6 1977{
f8f243b0 1978 unsigned int availFDs=getFilenumLimit()-10; // some healthy margin, thanks AJ ;-)
1979 if(g_maxMThreads * g_numWorkerThreads > availFDs) {
1980 if(getFilenumLimit(true) >= g_maxMThreads * g_numWorkerThreads) {
1981 setFilenumLimit(g_maxMThreads * g_numWorkerThreads);
1982 L<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<g_maxMThreads * g_numWorkerThreads<<" to match max-mthreads and threads settings"<<endl;
36849ff2 1983 }
1984 else {
f8f243b0 1985 int newval = getFilenumLimit(true) / g_numWorkerThreads;
1986 L<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<availFDs<<" < "<<g_maxMThreads*g_numWorkerThreads<<"), reducing max-mthreads to "<<newval<<endl;
36849ff2 1987 g_maxMThreads = newval;
f8f243b0 1988 setFilenumLimit(g_maxMThreads * g_numWorkerThreads);
36849ff2 1989 }
1990 }
4e9a20e6 1991}
77499b05 1992
bb4bdbaf 1993void* recursorThread(void*);
51e2144e 1994
3427fa8a 1995void* pleaseSupplantACLs(NetmaskGroup *ng)
49a699c4
BH
1996{
1997 t_allowFrom = ng;
3427fa8a 1998 return 0;
49a699c4
BH
1999}
2000
dbd23fc2
BH
2001int g_argc;
2002char** g_argv;
2003
18af64a8 2004void parseACLs()
f7c1d4e3 2005{
18af64a8 2006 static bool l_initialized;
3ddb9247 2007
49a699c4 2008 if(l_initialized) { // only reload configuration file on second call
18af64a8
BH
2009 string configname=::arg()["config-dir"]+"/recursor.conf";
2010 cleanSlashes(configname);
3ddb9247
PD
2011
2012 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
7e818521 2013 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
49a699c4 2014 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
242b90e1 2015 ::arg().preParseFile(configname.c_str(), "include-dir");
829849d6
AT
2016 ::arg().preParse(g_argc, g_argv, "include-dir");
2017
2018 // then process includes
2019 std::vector<std::string> extraConfigs;
242b90e1
AT
2020 ::arg().gatherIncludes(extraConfigs);
2021
1dc8f4d0 2022 for(const std::string& fn : extraConfigs) {
7e818521 2023 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
2024 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
2025 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
2026 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
829849d6 2027 }
ca2c884c
AT
2028
2029 ::arg().preParse(g_argc, g_argv, "allow-from-file");
2030 ::arg().preParse(g_argc, g_argv, "allow-from");
f27e6356 2031 }
49a699c4
BH
2032
2033 NetmaskGroup* oldAllowFrom = t_allowFrom, *allowFrom=new NetmaskGroup;
3ddb9247 2034
2c95fc65
BH
2035 if(!::arg()["allow-from-file"].empty()) {
2036 string line;
2c95fc65
BH
2037 ifstream ifs(::arg()["allow-from-file"].c_str());
2038 if(!ifs) {
3ddb9247 2039 delete allowFrom;
9c61b9d0 2040 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2c95fc65
BH
2041 }
2042
2043 string::size_type pos;
2044 while(getline(ifs,line)) {
2045 pos=line.find('#');
2046 if(pos!=string::npos)
2047 line.resize(pos);
2048 trim(line);
2049 if(line.empty())
2050 continue;
2051
18af64a8 2052 allowFrom->addMask(line);
2c95fc65 2053 }
49a699c4 2054 L<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2c95fc65
BH
2055 }
2056 else if(!::arg()["allow-from"].empty()) {
f7c1d4e3
BH
2057 vector<string> ips;
2058 stringtok(ips, ::arg()["allow-from"], ", ");
3ddb9247 2059
f7c1d4e3
BH
2060 L<<Logger::Warning<<"Only allowing queries from: ";
2061 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
18af64a8 2062 allowFrom->addMask(*i);
f7c1d4e3 2063 if(i!=ips.begin())
674cf0f6 2064 L<<Logger::Warning<<", ";
f7c1d4e3
BH
2065 L<<Logger::Warning<<*i;
2066 }
2067 L<<Logger::Warning<<endl;
2068 }
49a699c4 2069 else {
3ddb9247 2070 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
49a699c4
BH
2071 L<<Logger::Error<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
2072 delete allowFrom;
2073 allowFrom = 0;
2074 }
3ddb9247 2075
49a699c4 2076 g_initialAllowFrom = allowFrom;
d7dae798 2077 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
49a699c4 2078 delete oldAllowFrom;
3ddb9247 2079
49a699c4 2080 l_initialized = true;
18af64a8
BH
2081}
2082
795215f2 2083boost::optional<Netmask> getEDNSSubnetMask(const ComboAddress& local, const DNSName&dn, const ComboAddress& rem)
2084{
2085 if(local.sin4.sin_family != AF_INET || local.sin4.sin_addr.s_addr) { // detect unset 'requestor'
2086 if(g_ednsdomains.check(dn) || g_ednssubnets.match(rem)) {
2087 int bits =local.sin4.sin_family == AF_INET ? 24 : 64;
2088 ComboAddress trunc(local);
2089 trunc.truncate(bits);
2090 return boost::optional<Netmask>(Netmask(trunc, bits));
2091 }
2092 }
2093 return boost::optional<Netmask>();
2094}
2095
2096void parseEDNSSubnetWhitelist(const std::string& wlist)
2097{
2098 vector<string> parts;
39588f55 2099 stringtok(parts, wlist, ",; ");
795215f2 2100 for(const auto& a : parts) {
2101 try {
2102 Netmask nm(a);
2103 g_ednssubnets.addMask(nm);
2104 }
2105 catch(...) {
2106 g_ednsdomains.add(DNSName(a));
2107 }
2108 }
2109}
2110
756e82cf 2111SuffixMatchNode g_delegationOnly;
2112static void setupDelegationOnly()
2113{
2114 vector<string> parts;
2115 stringtok(parts, ::arg()["delegation-only"], ", \t");
2116 for(const auto& p : parts) {
2117 g_delegationOnly.add(DNSName(p));
2118 }
2119}
795215f2 2120
18af64a8
BH
2121int serviceMain(int argc, char*argv[])
2122{
5124de27 2123 L.setName(s_programname);
18af64a8
BH
2124 L.setLoglevel((Logger::Urgency)(6)); // info and up
2125
2126 if(!::arg()["logging-facility"].empty()) {
f8499e52
BH
2127 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
2128 if(val >= 0)
2129 theL().setFacility(val);
18af64a8
BH
2130 else
2131 L<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
2132 }
2133
ba1a571d 2134 showProductVersion();
18af64a8 2135 seedRandom(::arg()["entropy-source"]);
06ea9015 2136 g_disthashseed=dns_random(0xffffffff);
2137
18af64a8 2138 parseACLs();
92011b8f 2139 sortPublicSuffixList();
2140
eb5bae86
BH
2141 if(!::arg()["dont-query"].empty()) {
2142 g_dontQuery=new NetmaskGroup;
2143 vector<string> ips;
2144 stringtok(ips, ::arg()["dont-query"], ", ");
66e0b6ea
BH
2145 ips.push_back("0.0.0.0");
2146 ips.push_back("::");
c36bc97a 2147
eb5bae86
BH
2148 L<<Logger::Warning<<"Will not send queries to: ";
2149 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
2150 g_dontQuery->addMask(*i);
2151 if(i!=ips.begin())
4957a608 2152 L<<Logger::Warning<<", ";
eb5bae86
BH
2153 L<<Logger::Warning<<*i;
2154 }
2155 L<<Logger::Warning<<endl;
2156 }
2157
f7c1d4e3 2158 g_quiet=::arg().mustDo("quiet");
3ddb9247 2159
1bc3c142
BH
2160 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
2161 if(g_weDistributeQueries) {
2162 L<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
2163 }
3ddb9247 2164
756e82cf 2165 setupDelegationOnly();
b33c2462 2166 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
756e82cf 2167
77499b05
BH
2168 if(::arg()["trace"]=="fail") {
2169 SyncRes::setDefaultLogMode(SyncRes::Store);
2170 }
2171 else if(::arg().mustDo("trace")) {
2172 SyncRes::setDefaultLogMode(SyncRes::Log);
f7c1d4e3
BH
2173 ::arg().set("quiet")="no";
2174 g_quiet=false;
2175 }
3ddb9247 2176
aadceba8 2177 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
2178
4e9a20e6 2179 checkLinuxIPv6Limits();
5a38281c 2180 try {
3ddb9247 2181 vector<string> addrs;
5a38281c
BH
2182 if(!::arg()["query-local-address6"].empty()) {
2183 SyncRes::s_doIPv6=true;
d4fb76e9 2184 L<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
3ddb9247 2185
5a38281c 2186 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
1dc8f4d0 2187 for(const string& addr : addrs) {
4957a608 2188 g_localQueryAddresses6.push_back(ComboAddress(addr));
5a38281c
BH
2189 }
2190 }
d4fb76e9
BH
2191 else {
2192 L<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
2193 }
5a38281c
BH
2194 addrs.clear();
2195 stringtok(addrs, ::arg()["query-local-address"], ", ;");
1dc8f4d0 2196 for(const string& addr : addrs) {
5a38281c
BH
2197 g_localQueryAddresses4.push_back(ComboAddress(addr));
2198 }
2199 }
2200 catch(std::exception& e) {
2201 L<<Logger::Error<<"Assigning local query addresses: "<<e.what();
2202 exit(99);
f7c1d4e3 2203 }
f555e92e 2204
1051f8a9
BH
2205 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
2206
f7c1d4e3 2207 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
c3e753c7 2208 SyncRes::s_maxcachettl=::arg().asNum("max-cache-ttl");
1051f8a9
BH
2209 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
2210 SyncRes::s_packetcacheservfailttl=::arg().asNum("packetcache-servfail-ttl");
628e2c7b
PA
2211 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
2212 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
f7c1d4e3 2213 SyncRes::s_serverID=::arg()["server-id"];
173d790e 2214 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
9de3e034 2215 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
01402d56 2216 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
f7c1d4e3
BH
2217 if(SyncRes::s_serverID.empty()) {
2218 char tmp[128];
2219 gethostname(tmp, sizeof(tmp)-1);
2220 SyncRes::s_serverID=tmp;
2221 }
3ddb9247 2222
5b0ddd18 2223 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
bb4bdbaf 2224
49a699c4 2225 g_initialDomainMap = parseAuthAndForwards();
3ddb9247 2226
08f3f638 2227 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3ddb9247 2228
f7c1d4e3 2229 g_logCommonErrors=::arg().mustDo("log-common-errors");
e661a20b
PD
2230
2231 g_anyToTcp = ::arg().mustDo("any-to-tcp");
a09a8ce0
PD
2232 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
2233
f7c1d4e3
BH
2234 makeUDPServerSockets();
2235 makeTCPServerSockets();
815099b2 2236
376effcf 2237 parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
2238
677e2a46
BH
2239 int forks;
2240 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
1bc3c142
BH
2241 if(!fork()) // we are child
2242 break;
2243 }
3ddb9247 2244
2d733c0f 2245 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
815099b2 2246 if(!s_pidfname.empty())
3ddb9247
PD
2247 unlink(s_pidfname.c_str()); // remove possible old pid file
2248
644dd1da 2249 loadRPZFiles();
2250
f7c1d4e3
BH
2251 if(::arg().mustDo("daemon")) {
2252 L<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
2253 L.toConsole(Logger::Critical);
f7c1d4e3
BH
2254 daemonize();
2255 }
2256 signal(SIGUSR1,usr1Handler);
2257 signal(SIGUSR2,usr2Handler);
2258 signal(SIGPIPE,SIG_IGN);
2259 writePid();
677e2a46 2260 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
a6414fdc
AT
2261 g_numThreads = ::arg().asNum("threads") + ::arg().mustDo("pdns-distributes-queries");
2262 g_maxMThreads = ::arg().asNum("max-mthreads");
2263 checkOrFixFDS();
3ddb9247 2264
644dd1da 2265
2266
138435cb
BH
2267 int newgid=0;
2268 if(!::arg()["setgid"].empty())
2269 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
2270 int newuid=0;
2271 if(!::arg()["setuid"].empty())
2272 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
2273
f1d6a7ce
KM
2274 Utility::dropGroupPrivs(newuid, newgid);
2275
138435cb
BH
2276 if (!::arg()["chroot"].empty()) {
2277 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
2278 L<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
2279 exit(1);
2280 }
2281 }
2282
f1d6a7ce 2283 Utility::dropUserPrivs(newuid);
f8f243b0 2284 g_numThreads = ::arg().asNum("threads") + ::arg().mustDo("pdns-distributes-queries");
2285 g_numWorkerThreads = ::arg().asNum("threads");
49a699c4 2286 makeThreadPipes();
3ddb9247 2287
5d4dd7fe
BH
2288 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
2289 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
343257a4 2290
c3828c03 2291 if(g_numThreads == 1) {
76698c6e 2292 L<<Logger::Warning<<"Operating unthreaded"<<endl;
76698c6e
BH
2293 recursorThread(0);
2294 }
2295 else {
2296 pthread_t tid;
c3828c03
BH
2297 L<<Logger::Warning<<"Launching "<< g_numThreads <<" threads"<<endl;
2298 for(unsigned int n=0; n < g_numThreads; ++n) {
77499b05 2299 pthread_create(&tid, 0, recursorThread, (void*)(long)n);
76698c6e
BH
2300 }
2301 void* res;
49a699c4 2302
3ddb9247 2303
76698c6e 2304 pthread_join(tid, &res);
bb4bdbaf 2305 }
bb4bdbaf
BH
2306 return 0;
2307}
2308
2309void* recursorThread(void* ptr)
2310try
2311{
2e2cd8ec 2312 t_id=(int) (long) ptr;
49a699c4 2313 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
ac0e821b 2314 t_sstorage->domainmap = g_initialDomainMap;
49a699c4
BH
2315 t_allowFrom = g_initialAllowFrom;
2316 t_udpclientsocks = new UDPClientSocks();
bd0289fc 2317 t_tcpClientCounts = new tcpClientCounts_t();
49a699c4 2318 primeHints();
3ddb9247 2319
49a699c4 2320 t_packetCache = new RecursorPacketCache();
3ddb9247 2321
49a699c4 2322 L<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3ddb9247 2323
5704e107 2324 t_pdl = new shared_ptr<RecursorLua>();
3ddb9247 2325
674cf0f6
BH
2326 try {
2327 if(!::arg()["lua-dns-script"].empty()) {
5704e107 2328 *t_pdl = shared_ptr<RecursorLua>(new RecursorLua(::arg()["lua-dns-script"]));
674cf0f6
BH
2329 L<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
2330 }
674cf0f6
BH
2331 }
2332 catch(std::exception &e) {
2333 L<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
62f0ae62 2334 _exit(99);
674cf0f6 2335 }
3ddb9247 2336
77499b05 2337 t_traceRegex = new shared_ptr<Regex>();
f8f243b0 2338 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
92011b8f 2339 if(ringsize) {
60c8afa8 2340 t_remotes = new addrringbuf_t();
f8f243b0 2341 if(g_weDistributeQueries) // if so, only 1 thread does recvfrom
3ddb9247 2342 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries"));
f8f243b0 2343 else
3ddb9247 2344 t_remotes->set_capacity(ringsize);
60c8afa8 2345 t_servfailremotes = new addrringbuf_t();
3ddb9247 2346 t_servfailremotes->set_capacity(ringsize);
60c8afa8 2347 t_largeanswerremotes = new addrringbuf_t();
3ddb9247 2348 t_largeanswerremotes->set_capacity(ringsize);
92011b8f 2349
c5c066bf 2350 t_queryring = new boost::circular_buffer<pair<DNSName, uint16_t> >();
3ddb9247 2351 t_queryring->set_capacity(ringsize);
c5c066bf 2352 t_servfailqueryring = new boost::circular_buffer<pair<DNSName, uint16_t> >();
3ddb9247 2353 t_servfailqueryring->set_capacity(ringsize);
92011b8f 2354 }
3ddb9247 2355
bb4bdbaf 2356 MT=new MTasker<PacketID,string>(::arg().asNum("stack-size"));
3ddb9247 2357
bb4bdbaf
BH
2358 PacketID pident;
2359
2360 t_fdm=getMultiplexer();
f3d1d67b 2361 if(!t_id) {
30a1aa92 2362 if(::arg().mustDo("experimental-webserver")) {
2363 L<<Logger::Warning << "Enabling web server" << endl;
8989097d 2364 try {
1ce57618 2365 new RecursorWebServer(t_fdm);
8989097d
CH
2366 }
2367 catch(PDNSException &e) {
2368 L<<Logger::Error<<"Exception: "<<e.reason<<endl;
2369 exit(99);
2370 }
f3d1d67b 2371 }
83252304 2372 L<<Logger::Error<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
f3d1d67b 2373 }
83252304 2374
49a699c4 2375 t_fdm->addReadFD(g_pipes[t_id].readToThread, handlePipeRequest);
83252304 2376
1bc3c142 2377 if(!g_weDistributeQueries || !t_id) // if we distribute queries, only t_id = 0 listens
3ddb9247 2378 for(deferredAdd_t::const_iterator i=deferredAdd.begin(); i!=deferredAdd.end(); ++i)
1bc3c142 2379 t_fdm->addReadFD(i->first, i->second);
3ddb9247 2380
674cf0f6 2381 if(!t_id) {
674cf0f6
BH
2382 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
2383 }
1bc3c142 2384
f7c1d4e3 2385 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3ddb9247 2386
f7c1d4e3 2387 bool listenOnTCP(true);
49a699c4 2388
2c78bd57 2389 time_t last_carbon=0;
2390 time_t carbonInterval=::arg().asNum("carbon-interval");
cc59bce6 2391 counter=AtomicCounter(0); // used to periodically execute certain tasks
f7c1d4e3 2392 for(;;) {
ac0e821b 2393 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3ddb9247 2394
3427fa8a
BH
2395 if(!(counter%500)) {
2396 MT->makeThread(houseKeeping, 0);
f7c1d4e3
BH
2397 }
2398
d2392145 2399 if(!(counter%55)) {
d8f6d49f 2400 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
bb4bdbaf 2401 expired_t expired=t_fdm->getTimeouts(g_now);
3ddb9247 2402
f7c1d4e3 2403 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
cd989c87 2404 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4957a608 2405 if(g_logCommonErrors)
cd989c87 2406 L<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toString() <<endl;
4957a608 2407 t_fdm->removeReadFD(i->first);
f7c1d4e3
BH
2408 }
2409 }
3ddb9247 2410
f7c1d4e3
BH
2411 counter++;
2412
3427fa8a 2413 if(!t_id && statsWanted) {
f7c1d4e3
BH
2414 doStats();
2415 }
2416
2417 Utility::gettimeofday(&g_now, 0);
2c78bd57 2418
2419 if(!t_id && (g_now.tv_sec - last_carbon >= carbonInterval)) {
2420 MT->makeThread(doCarbonDump, 0);
2421 last_carbon = g_now.tv_sec;
2422 }
2423
bb4bdbaf 2424 t_fdm->run(&g_now);
3ea54bf0 2425 // 'run' updates g_now for us
f7c1d4e3 2426
b8ef5c5c 2427 if(!g_weDistributeQueries || !t_id) { // if pdns distributes queries, only tid 0 should do this
5c889cf5 2428 if(listenOnTCP) {
2429 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
2430 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
2431 t_fdm->removeReadFD(*i);
2432 listenOnTCP=false;
2433 }
f7c1d4e3 2434 }
5c889cf5 2435 else {
2436 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
2437 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
2438 t_fdm->addReadFD(*i, handleNewTCPQuestion);
2439 listenOnTCP=true;
2440 }
f7c1d4e3
BH
2441 }
2442 }
2443 }
2444}
3f81d239 2445catch(PDNSException &ae) {
bb4bdbaf
BH
2446 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
2447 return 0;
2448}
2449catch(std::exception &e) {
2450 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
2451 return 0;
2452}
2453catch(...) {
2454 L<<Logger::Error<<"any other exception in main: "<<endl;
2455 return 0;
2456}
2457
51e2144e 2458
3ddb9247 2459int main(int argc, char **argv)
288f4aa9 2460{
dbd23fc2
BH
2461 g_argc = argc;
2462 g_argv = argv;
5e3de507 2463 g_stats.startupTime=time(0);
3e135495 2464 versionSetProduct(ProductRecursor);
8a63d3ce 2465 reportBasicTypes();
0007c2e5 2466 reportOtherTypes();
ea634573 2467
22030c37 2468 int ret = EXIT_SUCCESS;
caa6eefa 2469
288f4aa9 2470 try {
f888311c 2471 ::arg().set("stack-size","stack size per mthread")="200000";
2e3d8a19 2472 ::arg().set("soa-minimum-ttl","Don't change")="0";
2e3d8a19 2473 ::arg().set("no-shuffle","Don't change")="off";
2e3d8a19 2474 ::arg().set("local-port","port to listen on")="53";
32252594 2475 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
fec7dd5a 2476 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
77499b05 2477 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
2e3d8a19 2478 ::arg().set("daemon","Operate as a daemon")="yes";
191f2e47 2479 ::arg().setSwitch("write-pid","Write a PID file")="yes";
34162f8f 2480 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="4";
0e9d9ce2 2481 ::arg().set("log-common-errors","If we should log rather common errors")="yes";
2e3d8a19
BH
2482 ::arg().set("chroot","switch to chroot jail")="";
2483 ::arg().set("setgid","If set, change group id to this gid for more security")="";
2484 ::arg().set("setuid","If set, change user id to this uid for more security")="";
5b0ddd18 2485 ::arg().set("network-timeout", "Wait this nummer of milliseconds for network i/o")="1500";
bb4bdbaf 2486 ::arg().set("threads", "Launch this number of threads")="2";
1bc3c142 2487 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1";
5124de27 2488 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
3ddb9247 2489 ::arg().set( "experimental-logfile", "Filename of the log file for JSON parser" )= "/var/log/pdns.log";
88d77d73
CH
2490 ::arg().setSwitch("experimental-webserver", "Start a webserver for monitoring") = "no";
2491 ::arg().set("experimental-webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
2492 ::arg().set("experimental-webserver-port", "Port of webserver to listen on") = "8082";
2493 ::arg().set("experimental-webserver-password", "Password required for accessing the webserver") = "";
69e7f117 2494 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="0.0.0.0/0,::/0";
c348c0c8 2495 ::arg().set("experimental-api-config-dir", "Directory where REST API stores config and zones") = "";
bbef8f04 2496 ::arg().set("experimental-api-key", "REST API Static authentication key (required for API use)") = "";
cc08b5a9 2497 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
2c78bd57 2498 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server")="";
2499 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
b4ae7322 2500 ::arg().set("experimental-api-readonly", "If the JSON API should disallow data modification") = "no";
c038218b 2501 ::arg().set("quiet","Suppress logging of questions and answers")="";
f27e6356 2502 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
2e3d8a19 2503 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
fdbf35ac
BH
2504 ::arg().set("socket-owner","Owner of socket")="";
2505 ::arg().set("socket-group","Group of socket")="";
2506 ::arg().set("socket-mode", "Permissions for socket")="";
3ddb9247 2507
2e3d8a19
BH
2508 ::arg().set("socket-dir","Where the controlsocket will live")=LOCALSTATEDIR;
2509 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
2510 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
d4fb76e9 2511 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
2e3d8a19 2512 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
85c32340 2513 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
2e3d8a19 2514 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
324dc148 2515 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
979edd70 2516 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
2e3d8a19 2517 ::arg().set("hint-file", "If set, load root hints from this file")="";
b45eb27c 2518 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
a9af3782 2519 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
c3e753c7 2520 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
1051f8a9 2521 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
927c12b0 2522 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
1051f8a9 2523 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
7f7b8d55 2524 ::arg().set("server-id", "Returned when queried for 'server.id' TXT or NSID, defaults to hostname")="";
92011b8f 2525 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
ba1a571d 2526 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
49a699c4 2527 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
2c95fc65 2528 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
51e2144e 2529 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3ddb9247 2530 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4e120339 2531 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
0d5f0a9f 2532 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4ef015cd 2533 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
5605c067 2534 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
644dd1da 2535 ::arg().set("rpz-files", "RPZ files to load in order, domain or domain=policy pairs separated by commas")="";
39ec5d29 2536 ::arg().set("rpz-masters", "RPZ master servers, address:name pairs separated by commas")="";
644dd1da 2537
5605c067 2538 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
927c12b0
BH
2539 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
2540 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
5605c067 2541 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
ac0b4eb3 2542 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3ea54bf0 2543 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
9bc8c14c 2544 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="";
4485aa35 2545 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
08f3f638 2546 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3ddb9247
PD
2547// ::arg().setSwitch( "disable-edns-ping", "Disable EDNSPing - EXPERIMENTAL, LEAVE DISABLED" )= "no";
2548 ::arg().setSwitch( "disable-edns", "Disable EDNS - EXPERIMENTAL, LEAVE DISABLED" )= "";
2549 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
376effcf 2550 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="powerdns.com,82.94.213.34,2001:888:2000:1d::2";
966d3ba8 2551 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="";
cd6310a8 2552 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="no";
e661a20b 2553 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
a09a8ce0 2554 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1680";
b33c2462 2555 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1680";
aadceba8 2556 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
173d790e 2557 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
c5950146 2558 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
a09a8ce0 2559
68e6df3c 2560 ::arg().set("include-dir","Include *.conf files from this directory")="";
d67620e4 2561 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
2e3d8a19
BH
2562
2563 ::arg().setCmd("help","Provide a helpful message");
ba1a571d 2564 ::arg().setCmd("version","Print version string");
d5141417 2565 ::arg().setCmd("config","Output blank configuration");
f27e6356 2566 L.toConsole(Logger::Info);
2e3d8a19 2567 ::arg().laxParse(argc,argv); // do a lax parse
c75a6a9e 2568
2d733c0f
CH
2569 string configname=::arg()["config-dir"]+"/recursor.conf";
2570 if(::arg()["config-name"]!="") {
2571 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
5124de27 2572 s_programname+="-"+::arg()["config-name"];
2d733c0f
CH
2573 }
2574 cleanSlashes(configname);
5124de27 2575
577cf284
BH
2576 if(::arg().mustDo("config")) {
2577 cout<<::arg().configstring()<<endl;
2578 exit(0);
2579 }
2580
3ddb9247 2581 if(!::arg().file(configname.c_str()))
c75a6a9e
BH
2582 L<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
2583
2e3d8a19 2584 ::arg().parse(argc,argv);
c836dc19 2585
2e3d8a19 2586 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
562588a3 2587
61d74169 2588 if(::arg().asNum("threads")==1)
2589 ::arg().set("pdns-distributes-queries")="no";
2590
2e3d8a19 2591 if(::arg().mustDo("help")) {
ff5ba4f9
WA
2592 cout<<"syntax:"<<endl<<endl;
2593 cout<<::arg().helpstring(::arg()["help"])<<endl;
2594 exit(0);
b636533b 2595 }
5e3de507 2596 if(::arg().mustDo("version")) {
ba1a571d 2597 showProductVersion();
3613a51c 2598 showBuildConfiguration();
5e3de507
BH
2599 exit(99);
2600 }
b636533b 2601
34162f8f 2602 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
f48d7b65 2603
34162f8f
CH
2604 if (logUrgency < Logger::Error)
2605 logUrgency = Logger::Error;
f48d7b65 2606 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
2607 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
2608 }
34162f8f
CH
2609 L.setLoglevel(logUrgency);
2610 L.toConsole(logUrgency);
2611
f7c1d4e3 2612 serviceMain(argc, argv);
288f4aa9 2613 }
3f81d239 2614 catch(PDNSException &ae) {
c836dc19 2615 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
22030c37 2616 ret=EXIT_FAILURE;
288f4aa9 2617 }
fdbf35ac 2618 catch(std::exception &e) {
c836dc19 2619 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
22030c37 2620 ret=EXIT_FAILURE;
288f4aa9
BH
2621 }
2622 catch(...) {
c836dc19 2623 L<<Logger::Error<<"any other exception in main: "<<endl;
22030c37 2624 ret=EXIT_FAILURE;
288f4aa9 2625 }
3ddb9247 2626
22030c37 2627 return ret;
288f4aa9 2628}