]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/pdns_recursor.cc
Merge pull request #1968 from pieterlexis/port-faq
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
CommitLineData
288f4aa9
BH
1/*
2 PowerDNS Versatile Database Driven Nameserver
4e9a20e6 3 Copyright (C) 2003 - 2014 PowerDNS.COM BV
288f4aa9
BH
4
5 This program is free software; you can redistribute it and/or modify
f28307ad
BH
6 it under the terms of the GNU General Public License version 2
7 as published by the Free Software Foundation
288f4aa9 8
f782fe38
MH
9 Additionally, the license of this program contains a special
10 exception which allows to distribute the program in binary form when
11 it is linked against OpenSSL.
12
288f4aa9
BH
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
06bd9ccf 20 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
288f4aa9 21*/
caa6eefa 22
76473b92
KM
23#include <netdb.h>
24#include <sys/stat.h>
25#include <unistd.h>
5a38281c 26#include <boost/foreach.hpp>
2470b36e 27#include "ws-recursor.hh"
49a699c4 28#include <pthread.h>
3ea54bf0 29#include "recpacketcache.hh"
caa6eefa 30#include "utility.hh"
51e2144e 31#include "dns_random.hh"
288f4aa9
BH
32#include <iostream>
33#include <errno.h>
81859ba5 34#include <boost/static_assert.hpp>
288f4aa9
BH
35#include <map>
36#include <set>
97bb160b 37#include "recursor_cache.hh"
38c9ceaa 38#include "cachecleaner.hh"
288f4aa9 39#include <stdio.h>
c75a6a9e 40#include <signal.h>
288f4aa9 41#include <stdlib.h>
bb4bdbaf 42#include "misc.hh"
288f4aa9
BH
43#include "mtasker.hh"
44#include <utility>
288f4aa9
BH
45#include "arguments.hh"
46#include "syncres.hh"
88def049
BH
47#include <fcntl.h>
48#include <fstream>
5c633640
BH
49#include "sstuff.hh"
50#include <boost/tuple/tuple.hpp>
51#include <boost/tuple/tuple_comparison.hpp>
72df400f 52#include <boost/shared_array.hpp>
ea634573 53#include <boost/lexical_cast.hpp>
7f1fa77d 54#include <boost/function.hpp>
5605c067 55#include <boost/algorithm/string.hpp>
40a3dd64 56#include <netinet/tcp.h>
ea634573
BH
57#include "dnsparser.hh"
58#include "dnswriter.hh"
59#include "dnsrecords.hh"
f814d7c8 60#include "zoneparser-tng.hh"
1d5b3ce6 61#include "rec_channel.hh"
aaacf7f2 62#include "logger.hh"
c8ddb7c2 63#include "iputils.hh"
09e6702a 64#include "mplexer.hh"
c038218b 65#include "config.h"
5704e107 66#include "lua-recursor.hh"
ba1a571d 67#include "version.hh"
79332bff 68#include "responsestats.hh"
d67620e4 69#include "secpoll-recursor.hh"
a2bfc3ff
BH
70#ifndef RECURSOR
71#include "statbag.hh"
72StatBag S;
73#endif
74
bb4bdbaf 75__thread FDMultiplexer* t_fdm;
674cf0f6 76__thread unsigned int t_id;
09e6702a 77unsigned int g_maxTCPPerClient;
5b0ddd18 78unsigned int g_networkTimeoutMsec;
08f3f638 79uint64_t g_latencyStatSize;
09e6702a 80bool g_logCommonErrors;
e661a20b 81bool g_anyToTcp;
a09a8ce0 82uint16_t g_udpTruncationThreshold;
5704e107 83__thread shared_ptr<RecursorLua>* t_pdl;
b3b5459d 84__thread RemoteKeeper* t_remotes;
77499b05 85__thread shared_ptr<Regex>* t_traceRegex;
674cf0f6 86
d7dae798
BH
87RecursorControlChannel s_rcc; // only active in thread 0
88
89// for communicating with our threads
49a699c4
BH
90struct ThreadPipeSet
91{
92 int writeToThread;
93 int readToThread;
94 int writeFromThread;
95 int readFromThread;
96};
3ea54bf0 97
d7dae798 98vector<ThreadPipeSet> g_pipes; // effectively readonly after startup
5c633640 99
d7dae798 100SyncRes::domainmap_t* g_initialDomainMap; // new threads needs this to be setup
49a699c4
BH
101
102#include "namespaces.hh"
3ea54bf0 103
49a699c4 104__thread MemRecursorCache* t_RC;
16beeaa4 105__thread RecursorPacketCache* t_packetCache;
1d5b3ce6
BH
106RecursorStats g_stats;
107bool g_quiet;
49a699c4 108
1bc3c142
BH
109bool g_weDistributeQueries; // if true, only 1 thread listens on the incoming query sockets
110
41942bb3 111__thread NetmaskGroup* t_allowFrom;
49a699c4
BH
112static NetmaskGroup* g_initialAllowFrom; // new thread needs to be setup with this
113
eb5bae86 114NetmaskGroup* g_dontQuery;
2d733c0f 115string s_programname="pdns_recursor";
49a699c4 116
40a3dd64
BH
117typedef vector<int> tcpListenSockets_t;
118tcpListenSockets_t g_tcpListenSockets; // shared across threads, but this is fine, never written to from a thread. All threads listen on all sockets
3159c9ef 119int g_tcpTimeout;
85c32340 120unsigned int g_maxMThreads;
d7dae798 121struct timeval g_now; // timestamp, updated (too) frequently
84433b79 122typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
123listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
18af64a8 124
d7dae798
BH
125__thread MT_t* MT; // the big MTasker
126
c3828c03
BH
127unsigned int g_numThreads;
128
12cd44ee 129#define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
130// Bad Nets taken from both:
131// http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
132// and
133// http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
134// where such a network may not be considered a valid destination
135#define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
136#define DONT_QUERY LOCAL_NETS ", " BAD_NETS
49a699c4 137
d7dae798 138//! used to send information to a newborn mthread
ea634573 139struct DNSComboWriter {
3ea54bf0 140 DNSComboWriter(const char* data, uint16_t len, const struct timeval& now) : d_mdp(data, len), d_now(now),
232f0877 141 d_tcp(false), d_socket(-1)
ea634573
BH
142 {}
143 MOADNSParser d_mdp;
00c9b8c1 144 void setRemote(const ComboAddress* sa)
ea634573 145 {
37d3f960 146 d_remote=*sa;
ea634573
BH
147 }
148
149 void setSocket(int sock)
150 {
151 d_socket=sock;
152 }
a1754c6a
BH
153
154 string getRemote() const
155 {
37d3f960 156 return d_remote.toString();
a1754c6a
BH
157 }
158
c9e9e5e0 159 struct timeval d_now;
37d3f960 160 ComboAddress d_remote;
ea634573
BH
161 bool d_tcp;
162 int d_socket;
cd989c87 163 shared_ptr<TCPConnection> d_tcpConnection;
ea634573
BH
164};
165
166
288f4aa9
BH
167ArgvMap &arg()
168{
169 static ArgvMap theArg;
170 return theArg;
171}
4ef015cd 172
09e6702a 173
d8f6d49f 174void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 175
50c81227 176// -1 is error, 0 is timeout, 1 is success
5c633640
BH
177int asendtcp(const string& data, Socket* sock)
178{
179 PacketID pident;
180 pident.sock=sock;
181 pident.outMSG=data;
23db0a09 182
bb4bdbaf 183 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
50c81227 184 string packet;
5c633640 185
5b0ddd18 186 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
23db0a09 187
9170fbaf 188 if(!ret || ret==-1) { // timeout
bb4bdbaf 189 t_fdm->removeWriteFD(sock->getHandle());
5c633640 190 }
50c81227
BH
191 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
192 return -1;
193 }
9170fbaf 194 return ret;
5c633640
BH
195}
196
d8f6d49f 197void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 198
9170fbaf 199// -1 is error, 0 is timeout, 1 is success
825fa717 200int arecvtcp(string& data, int len, Socket* sock, bool incompleteOkay)
288f4aa9 201{
50c81227 202 data.clear();
5c633640
BH
203 PacketID pident;
204 pident.sock=sock;
205 pident.inNeeded=len;
825fa717 206 pident.inIncompleteOkay=incompleteOkay;
bb4bdbaf 207 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
5c633640 208
bb4bdbaf 209 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
9170fbaf 210 if(!ret || ret==-1) { // timeout
bb4bdbaf 211 t_fdm->removeReadFD(sock->getHandle());
288f4aa9 212 }
50c81227
BH
213 else if(data.empty()) {// error, EOF or other
214 return -1;
215 }
216
9170fbaf 217 return ret;
288f4aa9
BH
218}
219
5a38281c 220vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
046c5a5d 221const ComboAddress g_local4("0.0.0.0"), g_local6("::");
1652a63e 222
d7dae798 223//! pick a random query local address
1652a63e 224ComboAddress getQueryLocalAddress(int family, uint16_t port)
5a38281c 225{
1652a63e 226 ComboAddress ret;
5a38281c 227 if(family==AF_INET) {
1652a63e
BH
228 if(g_localQueryAddresses4.empty())
229 ret = g_local4;
230 else
231 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
232 ret.sin4.sin_port = htons(port);
5a38281c
BH
233 }
234 else {
235 if(g_localQueryAddresses6.empty())
1652a63e
BH
236 ret = g_local6;
237 else
238 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
239
240 ret.sin6.sin6_port = htons(port);
5a38281c 241 }
1652a63e 242 return ret;
5a38281c 243}
4ef015cd 244
d8f6d49f 245void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
09e6702a 246
d7dae798
BH
247void setSocketBuffer(int fd, int optname, uint32_t size)
248{
249 uint32_t psize=0;
250 socklen_t len=sizeof(psize);
251
252 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
253 L<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
254 return;
255 }
256
257 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
258 L<<Logger::Error<<"Warning: unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
259}
260
261
262static void setSocketReceiveBuffer(int fd, uint32_t size)
263{
264 setSocketBuffer(fd, SO_RCVBUF, size);
265}
266
267static void setSocketSendBuffer(int fd, uint32_t size)
268{
269 setSocketBuffer(fd, SO_SNDBUF, size);
270}
271
272
4ef015cd
BH
273// you can ask this class for a UDP socket to send a query from
274// this socket is not yours, don't even think about deleting it
275// but after you call 'returnSocket' on it, don't assume anything anymore
276class UDPClientSocks
277{
4ef015cd 278 unsigned int d_numsocks;
4ef015cd 279public:
e2642526 280 UDPClientSocks() : d_numsocks(0)
4ef015cd
BH
281 {
282 }
283
996c89cc 284 typedef set<int> socks_t;
4ef015cd
BH
285 socks_t d_socks;
286
d8f6d49f
BH
287 // returning -1 means: temporary OS error (ie, out of files), -2 means OS error
288 int getSocket(const ComboAddress& toaddr, int* fd)
4ef015cd 289 {
d8f6d49f
BH
290 *fd=makeClientSocket(toaddr.sin4.sin_family);
291 if(*fd < 0) // temporary error - receive exception otherwise
292 return -1;
293
294 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
295 int err = errno;
41ff43f8
BH
296 // returnSocket(*fd);
297 Utility::closesocket(*fd);
d8f6d49f 298 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
4957a608 299 return -2;
998a4334 300 return -1;
d8f6d49f 301 }
998a4334 302
d8f6d49f 303 d_socks.insert(*fd);
998a4334 304 d_numsocks++;
d8f6d49f 305 return 0;
4ef015cd
BH
306 }
307
095c3045
BH
308 void returnSocket(int fd)
309 {
310 socks_t::iterator i=d_socks.find(fd);
34801ab1 311 if(i==d_socks.end()) {
3f81d239 312 throw PDNSException("Trying to return a socket (fd="+lexical_cast<string>(fd)+") not in the pool");
34801ab1 313 }
bb4bdbaf 314 returnSocketLocked(i);
095c3045
BH
315 }
316
4ef015cd 317 // return a socket to the pool, or simply erase it
bb4bdbaf 318 void returnSocketLocked(socks_t::iterator& i)
4ef015cd 319 {
600fc20b 320 if(i==d_socks.end()) {
3f81d239 321 throw PDNSException("Trying to return a socket not in the pool");
600fc20b 322 }
80baf329 323 try {
bb4bdbaf 324 t_fdm->removeReadFD(*i);
80baf329
BH
325 }
326 catch(FDMultiplexerException& e) {
bb4bdbaf 327 // we sometimes return a socket that has not yet been assigned to t_fdm
80baf329 328 }
c038218b 329 Utility::closesocket(*i);
998a4334
BH
330
331 d_socks.erase(i++);
332 --d_numsocks;
4ef015cd 333 }
d8f6d49f
BH
334
335 // returns -1 for errors which might go away, throws for ones that won't
bb4bdbaf 336 static int makeClientSocket(int family)
d8f6d49f
BH
337 {
338 int ret=(int)socket(family, SOCK_DGRAM, 0);
42c235e5 339
d8f6d49f
BH
340 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
341 return ret;
342
343 if(ret<0)
3f81d239 344 throw PDNSException("Making a socket for resolver (family = "+lexical_cast<string>(family)+"): "+stringerror());
36855b53
BH
345
346 Utility::setCloseOnExec(ret);
5a38281c 347
d8f6d49f
BH
348 int tries=10;
349 while(--tries) {
1652a63e
BH
350 uint16_t port;
351
d8f6d49f 352 if(tries==1) // fall back to kernel 'random'
4957a608 353 port = 0;
1652a63e
BH
354 else
355 port = 1025 + dns_random(64510);
356
357 ComboAddress sin=getQueryLocalAddress(family, port); // does htons for us
5a38281c 358
5a38281c 359 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
4957a608 360 break;
d8f6d49f
BH
361 }
362 if(!tries)
3f81d239 363 throw PDNSException("Resolver binding to local query client socket: "+stringerror());
d8f6d49f
BH
364
365 Utility::setNonBlocking(ret);
366 return ret;
367 }
49a699c4
BH
368};
369
370static __thread UDPClientSocks* t_udpclientsocks;
4ef015cd 371
288f4aa9 372/* these two functions are used by LWRes */
34801ab1 373// -2 is OS error, -1 is error that depends on the remote, > 0 is success
787e5eab 374int asendto(const char *data, int len, int flags,
4957a608 375 const ComboAddress& toaddr, uint16_t id, const string& domain, uint16_t qtype, int* fd)
288f4aa9 376{
34801ab1
BH
377
378 PacketID pident;
787e5eab
BH
379 pident.domain = domain;
380 pident.remote = toaddr;
381 pident.type = qtype;
34801ab1
BH
382
383 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
384 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
385
386 for(; chain.first != chain.second; chain.first++) {
387 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
e27e91a8 388 /*
4665c31e
BH
389 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
390 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
4957a608 391 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
e27e91a8 392 */
34801ab1
BH
393 chain.first->key.chain.insert(id); // we can chain
394 *fd=-1; // gets used in waitEvent / sendEvent later on
395 return 1;
396 }
397 }
398
49a699c4 399 int ret=t_udpclientsocks->getSocket(toaddr, fd);
d8f6d49f
BH
400 if(ret < 0)
401 return ret;
34801ab1 402
998a4334
BH
403 pident.fd=*fd;
404 pident.id=id;
998a4334 405
bb4bdbaf
BH
406 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
407 ret = send(*fd, data, len, 0);
408
5b0ddd18 409 int tmp = errno;
bb4bdbaf 410
7302ed0a 411 if(ret < 0)
49a699c4 412 t_udpclientsocks->returnSocket(*fd);
bb4bdbaf 413
5b0ddd18 414 errno = tmp; // this is for logging purposes only
7302ed0a 415 return ret;
288f4aa9
BH
416}
417
9170fbaf 418// -1 is error, 0 is timeout, 1 is success
787e5eab 419int arecvfrom(char *data, int len, int flags, const ComboAddress& fromaddr, int *d_len,
4957a608 420 uint16_t id, const string& domain, uint16_t qtype, int fd, struct timeval* now)
288f4aa9 421{
0d5f0a9f
BH
422 static optional<unsigned int> nearMissLimit;
423 if(!nearMissLimit)
424 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
425
288f4aa9 426 PacketID pident;
4ef015cd 427 pident.fd=fd;
288f4aa9 428 pident.id=id;
0d5f0a9f 429 pident.domain=domain;
787e5eab 430 pident.type = qtype;
996c89cc 431 pident.remote=fromaddr;
b636533b 432
288f4aa9 433 string packet;
5b0ddd18 434 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
34801ab1 435
9170fbaf 436 if(ret > 0) {
996c89cc 437 if(packet.empty()) // means "error"
998a4334 438 return -1;
998a4334 439
705f31ae 440 *d_len=(int)packet.size();
9170fbaf 441 memcpy(data,packet.c_str(),min(len,*d_len));
0d5f0a9f 442 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
996c89cc 443 L<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
0d5f0a9f 444 g_stats.spoofCount++;
35ce8576
BH
445 return -1;
446 }
288f4aa9 447 }
09e6702a 448 else {
34801ab1 449 if(fd >= 0)
49a699c4 450 t_udpclientsocks->returnSocket(fd);
09e6702a 451 }
9170fbaf 452 return ret;
288f4aa9
BH
453}
454
aa4e4cbf 455
87a5ea63 456string s_pidfname;
88def049
BH
457static void writePid(void)
458{
18e7758c 459 ofstream of(s_pidfname.c_str(), std::ios_base::app);
88def049 460 if(of)
705f31ae 461 of<< Utility::getpid() <<endl;
88def049 462 else
87a5ea63 463 L<<Logger::Error<<"Requested to write pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
88def049
BH
464}
465
bd0289fc
BH
466typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
467tcpClientCounts_t __thread* t_tcpClientCounts;
0e9d9ce2 468
cd989c87
BH
469TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : d_remote(addr), d_fd(fd)
470{
1bc9e6bd 471 ++s_currentConnections;
cd989c87 472 (*t_tcpClientCounts)[d_remote]++;
0e408828 473}
cd989c87
BH
474
475TCPConnection::~TCPConnection()
0e408828 476{
cd989c87
BH
477 if(Utility::closesocket(d_fd) < 0)
478 unixDie("closing socket for TCPConnection");
479 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
480 t_tcpClientCounts->erase(d_remote);
1bc9e6bd 481 --s_currentConnections;
0e408828 482}
0e9d9ce2 483
1bc9e6bd 484AtomicCounter TCPConnection::s_currentConnections;
d8f6d49f 485void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
6dcd28c3 486
2cc13433
BH
487void updateRcodeStats(int res)
488{
489 switch(res) {
490 case RCode::ServFail:
491 g_stats.servFails++;
492 break;
493 case RCode::NXDomain:
494 g_stats.nxDomains++;
495 break;
496 case RCode::NoError:
497 g_stats.noErrors++;
498 break;
499 }
500}
501
79332bff
F
502ResponseStats g_rs;
503
288f4aa9
BH
504void startDoResolve(void *p)
505{
7b1469bb 506 DNSComboWriter* dc=(DNSComboWriter *)p;
0fb1501b 507 string loginfo="";
b23b8614 508
288f4aa9 509 try {
0fb1501b 510 loginfo=" (while setting loginfo)";
fab3ed34 511 loginfo=" ("+dc->d_mdp.d_qname+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->d_remote.toString())+")";
b18ace73 512 uint32_t maxanswersize= dc->d_tcp ? 65535 : min((uint16_t) 512, g_udpTruncationThreshold);
7f7b8d55 513 EDNSOpts edo;
56b4d21b 514 if(getEDNSOpts(dc->d_mdp, &edo) && !dc->d_tcp) {
b18ace73 515 maxanswersize = min(edo.d_packetsize, g_udpTruncationThreshold);
10321a98 516 }
84433b79 517 ComboAddress local;
518 listenSocketsAddresses_t::const_iterator lociter;
ea634573 519 vector<DNSResourceRecord> ret;
ea634573 520 vector<uint8_t> packet;
b23b8614
BH
521
522 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
ea634573
BH
523
524 pw.getHeader()->aa=0;
525 pw.getHeader()->ra=1;
c154c8a4 526 pw.getHeader()->qr=1;
bb4bdbaf 527 pw.getHeader()->tc=0;
ea634573 528 pw.getHeader()->id=dc->d_mdp.d_header.id;
10321a98 529 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
ea634573 530
904d3219
PD
531 uint32_t minTTL=std::numeric_limits<uint32_t>::max();
532
533 SyncRes sr(dc->d_now);
534 bool tracedQuery=false; // we could consider letting Lua know about this too
535 bool variableAnswer = false;
536
56b4d21b
PD
537 int res;
538
e661a20b 539 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
56b4d21b
PD
540 pw.getHeader()->tc = 1;
541 res = 0;
542 variableAnswer = true;
e661a20b
PD
543 goto sendit;
544 }
545
77499b05
BH
546 if(t_traceRegex->get() && (*t_traceRegex)->match(dc->d_mdp.d_qname)) {
547 sr.setLogMode(SyncRes::Store);
548 tracedQuery=true;
549 }
550
551 if(!g_quiet || tracedQuery)
552 L<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
8a63d3ce 553 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote()<<endl;
c75a6a9e 554
fededf47 555 sr.setId(MT->getTid());
67828389 556 if(!dc->d_mdp.d_header.rd)
c836dc19
BH
557 sr.setCacheOnly();
558
84433b79 559 local.sin4.sin_family = dc->d_remote.sin4.sin_family;
560
561 lociter = g_listenSocketsAddresses.find(dc->d_socket);
562 if(lociter != g_listenSocketsAddresses.end()) {
563 local = lociter->second;
564 }
565 else {
566 socklen_t len = local.getSocklen();
567 getsockname(dc->d_socket, (sockaddr*)&local, &len); // if this fails, we're ok with it
568 }
569
570 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
571 if(!t_pdl->get() || !(*t_pdl)->preresolve(dc->d_remote, local, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret, res, &variableAnswer)) {
44971ca0
PD
572 try {
573 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
574 }
575 catch(ImmediateServFailException &e) {
37aa9904 576 L<<Logger::Error<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
44971ca0
PD
577
578 res = RCode::ServFail;
579 }
4485aa35 580
674cf0f6 581 if(t_pdl->get()) {
bd53ea9d 582 if(res == RCode::NoError) {
232f0877
CH
583 vector<DNSResourceRecord>::const_iterator i;
584 for(i=ret.begin(); i!=ret.end(); ++i)
585 if(i->qtype.getCode() == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
586 break;
587 if(i == ret.end())
84433b79 588 (*t_pdl)->nodata(dc->d_remote,local, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret, res, &variableAnswer);
232f0877
CH
589 }
590 else if(res == RCode::NXDomain)
84433b79 591 (*t_pdl)->nxdomain(dc->d_remote,local, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret, res, &variableAnswer);
bd53ea9d 592
84433b79 593 (*t_pdl)->postresolve(dc->d_remote,local, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret, res, &variableAnswer);
d2322a5e 594 }
4485aa35 595 }
99c69ed3 596
25456896 597 if(res == RecursorBehaviour::DROP) {
e9c2ad3a 598 g_stats.policyDrops++;
ae7e77ad 599 delete dc;
600 dc=0;
601 return;
602 }
25456896 603 if(tracedQuery || res == RecursorBehaviour::PASS || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
85ffbc53 604 {
85ffbc53
PD
605 string trace(sr.getTrace());
606 if(!trace.empty()) {
607 vector<string> lines;
608 boost::split(lines, trace, boost::is_any_of("\n"));
609 BOOST_FOREACH(const string& line, lines) {
610 if(!line.empty())
611 L<<Logger::Warning<< line << endl;
612 }
613 }
614 }
79332bff 615
25456896 616 if(res == RecursorBehaviour::PASS) {
0fe1d080
PD
617 pw.getHeader()->rcode=RCode::ServFail;
618 // no commit here, because no record
619 g_stats.servFails++;
620 }
288f4aa9 621 else {
ea634573 622 pw.getHeader()->rcode=res;
2cc13433 623 updateRcodeStats(res);
77499b05 624
c154c8a4 625 if(ret.size()) {
92476c8b 626 orderAndShuffle(ret);
4957a608
BH
627 for(vector<DNSResourceRecord>::const_iterator i=ret.begin(); i!=ret.end(); ++i) {
628 pw.startRecord(i->qname, i->qtype.getCode(), i->ttl, i->qclass, (DNSPacketWriter::Place)i->d_place);
629 minTTL = min(minTTL, i->ttl);
630 if(i->qtype.getCode() == QType::A) { // blast out A record w/o doing whole dnswriter thing
631 uint32_t ip=0;
632 IpToU32(i->content, &ip);
633 pw.xfr32BitInt(htonl(ip));
634 } else {
635 shared_ptr<DNSRecordContent> drc(DNSRecordContent::mastermake(i->qtype.getCode(), i->qclass, i->content));
636 drc->toPacket(pw);
637 }
dffbaa08 638 if(pw.size() > maxanswersize) {
4957a608
BH
639 pw.rollback();
640 if(i->d_place==DNSResourceRecord::ANSWER) // only truncate if we actually omitted parts of the answer
add935a2 641 {
4957a608 642 pw.getHeader()->tc=1;
add935a2
PD
643 pw.truncate();
644 }
4957a608
BH
645 goto sendit; // need to jump over pw.commit
646 }
647 }
b23b8614 648
18af64a8 649 pw.commit();
ea634573 650 }
288f4aa9 651 }
10321a98 652 sendit:;
79332bff 653 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
ea634573 654 if(!dc->d_tcp) {
c038218b 655 sendto(dc->d_socket, (const char*)&*packet.begin(), packet.size(), 0, (struct sockaddr *)(&dc->d_remote), dc->d_remote.getSocklen());
99c69ed3 656 if(!SyncRes::s_nopacketcache && !variableAnswer ) {
79332bff
F
657 t_packetCache->insertResponsePacket(string((const char*)&*packet.begin(), packet.size()),
658 g_now.tv_sec,
659 min(minTTL,
660 (pw.getHeader()->rcode == RCode::ServFail) ? SyncRes::s_packetcacheservfailttl : SyncRes::s_packetcachettl
661 )
662 );
1051f8a9 663 }
feccc9fc 664 }
9c495589
BH
665 else {
666 char buf[2];
ea634573
BH
667 buf[0]=packet.size()/256;
668 buf[1]=packet.size()%256;
feccc9fc 669
c038218b 670 Utility::iovec iov[2];
feccc9fc 671
ea634573
BH
672 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
673 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
feccc9fc 674
c038218b 675 int ret=Utility::writev(dc->d_socket, iov, 2);
0e9d9ce2 676 bool hadError=true;
feccc9fc 677
0e9d9ce2 678 if(ret == 0)
18af64a8 679 L<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
0e9d9ce2 680 else if(ret < 0 )
18af64a8 681 L<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
ea634573 682 else if((unsigned int)ret != 2 + packet.size())
18af64a8 683 L<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<ret<<")"<<endl;
0e9d9ce2 684 else
18af64a8 685 hadError=false;
09e6702a
BH
686
687 // update tcp connection status, either by closing or moving to 'BYTE0'
18af64a8 688
09e6702a 689 if(hadError) {
18af64a8 690 // no need to remove us from FDM, we weren't there
c36bc97a 691 dc->d_socket = -1;
09e6702a 692 }
a6ae6414 693 else {
cd989c87 694 dc->d_tcpConnection->state=TCPConnection::BYTE0;
18af64a8 695 Utility::gettimeofday(&g_now, 0); // needs to be updated
cd989c87
BH
696 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection);
697 t_fdm->setReadTTD(dc->d_socket, g_now, g_tcpTimeout);
0e9d9ce2 698 }
9c495589 699 }
b23b8614 700
1d5b3ce6 701 if(!g_quiet) {
bb4bdbaf 702 L<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
ea634573 703 L<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
18af64a8 704 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<<res<<endl;
c75a6a9e 705 }
b23b8614 706
49a699c4 707 sr.d_outqueries ? t_RC->cacheMisses++ : t_RC->cacheHits++;
fe213470
BH
708 float spent=makeFloat(sr.d_now-dc->d_now);
709 if(spent < 0.001)
710 g_stats.answers0_1++;
711 else if(spent < 0.010)
712 g_stats.answers1_10++;
713 else if(spent < 0.1)
714 g_stats.answers10_100++;
715 else if(spent < 1.0)
716 g_stats.answers100_1000++;
717 else
718 g_stats.answersSlow++;
719
574af7ea 720 uint64_t newLat=(uint64_t)(spent*1000000);
08f3f638 721 newLat = min(newLat,(uint64_t)(g_networkTimeoutMsec*1000)); // outliers of several minutes exist..
722 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
0a6b1027 723 // no worries, we do this for packet cache hits elsewhere
ea634573 724 delete dc;
c36bc97a 725 dc=0;
288f4aa9 726 }
3f81d239 727 catch(PDNSException &ae) {
0fb1501b 728 L<<Logger::Error<<"startDoResolve problem"<<loginfo<<": "<<ae.reason<<endl;
c36bc97a 729 delete dc;
288f4aa9 730 }
7b1469bb 731 catch(MOADNSException& e) {
0fb1501b 732 L<<Logger::Error<<"DNS parser error"<<loginfo<<": "<<dc->d_mdp.d_qname<<", "<<e.what()<<endl;
c36bc97a 733 delete dc;
7b1469bb 734 }
fdbf35ac 735 catch(std::exception& e) {
0fb1501b 736 L<<Logger::Error<<"STL error"<<loginfo<<": "<<e.what()<<endl;
c36bc97a 737 delete dc;
c154c8a4 738 }
288f4aa9 739 catch(...) {
0fb1501b 740 L<<Logger::Error<<"Any other exception in a resolver context"<<loginfo<<endl;
288f4aa9 741 }
ec6eacbc
BH
742
743 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
288f4aa9
BH
744}
745
677e2a46 746void makeControlChannelSocket(int processNum=-1)
1d5b3ce6 747{
2d733c0f 748 string sockname=::arg()["socket-dir"]+"/"+s_programname;
677e2a46
BH
749 if(processNum >= 0)
750 sockname += "."+lexical_cast<string>(processNum);
751 sockname+=".controlsocket";
41f7a068 752 s_rcc.listen(sockname);
387de317 753
387de317
BH
754 int sockowner = -1;
755 int sockgroup = -1;
756
757 if (!::arg().isEmpty("socket-group"))
758 sockgroup=::arg().asGid("socket-group");
759 if (!::arg().isEmpty("socket-owner"))
760 sockowner=::arg().asUid("socket-owner");
761
f838ad8d
BH
762 if (sockgroup > -1 || sockowner > -1) {
763 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
764 unixDie("Failed to chown control socket");
765 }
766 }
387de317
BH
767
768 // do mode change if socket-mode is given
769 if(!::arg().isEmpty("socket-mode")) {
770 mode_t sockmode=::arg().asMode("socket-mode");
771 chmod(sockname.c_str(), sockmode);
772 }
1d5b3ce6
BH
773}
774
d8f6d49f 775void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 776{
cd989c87 777 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
c038218b 778
879b3f70 779 if(conn->state==TCPConnection::BYTE0) {
cd989c87 780 int bytes=recv(conn->getFD(), conn->data, 2, 0);
09e6702a 781 if(bytes==1)
667f7e60 782 conn->state=TCPConnection::BYTE1;
09e6702a 783 if(bytes==2) {
a0aa4f64 784 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
667f7e60
BH
785 conn->bytesread=0;
786 conn->state=TCPConnection::GETQUESTION;
09e6702a
BH
787 }
788 if(!bytes || bytes < 0) {
bb4bdbaf 789 t_fdm->removeReadFD(fd);
09e6702a
BH
790 return;
791 }
792 }
667f7e60 793 else if(conn->state==TCPConnection::BYTE1) {
cd989c87 794 int bytes=recv(conn->getFD(), conn->data+1, 1, 0);
09e6702a 795 if(bytes==1) {
667f7e60 796 conn->state=TCPConnection::GETQUESTION;
a0aa4f64 797 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
667f7e60 798 conn->bytesread=0;
09e6702a
BH
799 }
800 if(!bytes || bytes < 0) {
801 if(g_logCommonErrors)
cd989c87 802 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected after first byte"<<endl;
bb4bdbaf 803 t_fdm->removeReadFD(fd);
09e6702a
BH
804 return;
805 }
806 }
667f7e60 807 else if(conn->state==TCPConnection::GETQUESTION) {
cd989c87 808 int bytes=recv(conn->getFD(), conn->data + conn->bytesread, conn->qlen - conn->bytesread, 0);
09e6702a 809 if(!bytes || bytes < 0) {
cd989c87 810 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected while reading question body"<<endl;
bb4bdbaf 811 t_fdm->removeReadFD(fd);
09e6702a
BH
812 return;
813 }
667f7e60
BH
814 conn->bytesread+=bytes;
815 if(conn->bytesread==conn->qlen) {
bb4bdbaf 816 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
879b3f70 817
09e6702a
BH
818 DNSComboWriter* dc=0;
819 try {
cd989c87 820 dc=new DNSComboWriter(conn->data, conn->qlen, g_now);
09e6702a
BH
821 }
822 catch(MOADNSException &mde) {
4957a608
BH
823 g_stats.clientParseError++;
824 if(g_logCommonErrors)
cd989c87 825 L<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toString() <<endl;
4957a608 826 return;
09e6702a 827 }
cd989c87
BH
828 dc->d_tcpConnection = conn; // carry the torch
829 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
09e6702a 830 dc->d_tcp=true;
cd989c87 831 dc->setRemote(&conn->d_remote);
879b3f70 832 if(dc->d_mdp.d_header.qr) {
4957a608
BH
833 delete dc;
834 L<<Logger::Error<<"Ignoring answer on server socket!"<<endl;
4957a608 835 return;
879b3f70 836 }
3abcdab2
PD
837 if(dc->d_mdp.d_header.opcode) {
838 delete dc;
839 L<<Logger::Error<<"Ignoring non-query opcode on server socket!"<<endl;
840 return;
841 }
09e6702a 842 else {
4957a608
BH
843 ++g_stats.qcounter;
844 ++g_stats.tcpqcounter;
50a5ef72 845 MT->makeThread(startDoResolve, dc); // deletes dc, will set state to BYTE0 again
4957a608 846 return;
09e6702a
BH
847 }
848 }
849 }
850}
851
6dcd28c3 852//! Handle new incoming TCP connection
d8f6d49f 853void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
09e6702a 854{
37d3f960 855 ComboAddress addr;
09e6702a 856 socklen_t addrlen=sizeof(addr);
705f31ae 857 int newsock=(int)accept(fd, (struct sockaddr*)&addr, &addrlen);
09e6702a 858 if(newsock>0) {
85c32340
BH
859 if(MT->numProcesses() > g_maxMThreads) {
860 g_stats.overCapacityDrops++;
861 Utility::closesocket(newsock);
862 return;
863 }
864
b3b5459d 865 t_remotes->addRemote(addr);
49a699c4 866 if(t_allowFrom && !t_allowFrom->match(&addr)) {
2914b022 867 if(!g_quiet)
4957a608 868 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2914b022 869
09e6702a 870 g_stats.unauthorizedTCP++;
705f31ae 871 Utility::closesocket(newsock);
09e6702a
BH
872 return;
873 }
bd0289fc 874 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
09e6702a 875 g_stats.tcpClientOverflow++;
705f31ae 876 Utility::closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
09e6702a
BH
877 return;
878 }
cd989c87 879
09e6702a 880 Utility::setNonBlocking(newsock);
cd989c87
BH
881 shared_ptr<TCPConnection> tc(new TCPConnection(newsock, addr));
882 tc->state=TCPConnection::BYTE0;
883
884 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc);
c038218b 885
0bff046b 886 struct timeval now;
c038218b 887 Utility::gettimeofday(&now, 0);
cd989c87 888 t_fdm->setReadTTD(tc->getFD(), now, g_tcpTimeout);
09e6702a
BH
889 }
890}
2914b022 891
1bc3c142
BH
892string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, int fd)
893{
894 ++g_stats.qcounter;
d7f10541
BH
895 if(fromaddr.sin4.sin_family==AF_INET6)
896 g_stats.ipv6qcounter++;
1bc3c142
BH
897
898 string response;
899 try {
900 uint32_t age;
901 if(!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(question, g_now.tv_sec, &response, &age)) {
902 if(!g_quiet)
232f0877 903 L<<Logger::Error<<t_id<< " question answered from packet cache from "<<fromaddr.toString()<<endl;
0a6b1027 904
1bc3c142
BH
905 g_stats.packetCacheHits++;
906 SyncRes::s_queries++;
907 ageDNSPacket(response, age);
908 sendto(fd, response.c_str(), response.length(), 0, (struct sockaddr*) &fromaddr, fromaddr.getSocklen());
97bee66d
BH
909 if(response.length() >= sizeof(struct dnsheader)) {
910 struct dnsheader dh;
911 memcpy(&dh, response.c_str(), sizeof(dh));
912 updateRcodeStats(dh.rcode);
913 }
08f3f638 914 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
1bc3c142
BH
915 return 0;
916 }
917 }
918 catch(std::exception& e) {
919 L<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
920 return 0;
921 }
922
1bc3c142
BH
923 if(MT->numProcesses() > g_maxMThreads) {
924 g_stats.overCapacityDrops++;
925 return 0;
926 }
927
928 DNSComboWriter* dc = new DNSComboWriter(question.c_str(), question.size(), g_now);
929 dc->setSocket(fd);
930 dc->setRemote(&fromaddr);
931
932 dc->d_tcp=false;
933 MT->makeThread(startDoResolve, (void*) dc); // deletes dc
934 return 0;
935}
936
d8f6d49f 937void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
5db529f8 938{
a9af3782 939 int len;
5db529f8
BH
940 char data[1500];
941 ComboAddress fromaddr;
942 socklen_t addrlen=sizeof(fromaddr);
85c32340 943
a9af3782 944 if((len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen)) >= 0) {
b3b5459d 945 t_remotes->addRemote(fromaddr);
b23b8614 946
49a699c4 947 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
2914b022 948 if(!g_quiet)
4957a608 949 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2914b022 950
5db529f8 951 g_stats.unauthorizedUDP++;
a9af3782 952 return;
5db529f8 953 }
15c01deb 954 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
81859ba5 955 if(!fromaddr.sin4.sin_port) { // also works for IPv6
956 if(!g_quiet)
957 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
958
959 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
960 return;
961 }
5db529f8 962 try {
b23b8614 963 dnsheader* dh=(dnsheader*)data;
5db529f8 964
b23b8614 965 if(dh->qr) {
4957a608
BH
966 if(g_logCommonErrors)
967 L<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
5db529f8 968 }
3abcdab2
PD
969 else if(dh->opcode) {
970 if(g_logCommonErrors)
971 L<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
972 }
5db529f8 973 else {
232f0877
CH
974 string question(data, len);
975 if(g_weDistributeQueries)
06ea9015 976 distributeAsyncFunction(question, boost::bind(doProcessUDPQuestion, question, fromaddr, fd));
232f0877
CH
977 else
978 doProcessUDPQuestion(question, fromaddr, fd);
5db529f8
BH
979 }
980 }
981 catch(MOADNSException& mde) {
982 g_stats.clientParseError++;
84e66a59 983 if(g_logCommonErrors)
4957a608 984 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
5db529f8
BH
985 }
986 }
ac0e821b
BH
987 else {
988 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
9326cae1
BH
989 if(errno == EAGAIN)
990 g_stats.noPacketError++;
ac0e821b 991 }
5db529f8
BH
992}
993
1bc3c142 994
5db529f8
BH
995typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
996deferredAdd_t deferredAdd;
997
f28307ad 998void makeTCPServerSockets()
9c495589 999{
37d3f960 1000 int fd;
f28307ad 1001 vector<string>locals;
2e3d8a19 1002 stringtok(locals,::arg()["local-address"]," ,");
9c495589 1003
f28307ad 1004 if(locals.empty())
3f81d239 1005 throw PDNSException("No local address specified");
f28307ad 1006
f28307ad 1007 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
1008 ServiceTuple st;
1009 st.port=::arg().asNum("local-port");
1010 parseService(*i, st);
1011
1012 ComboAddress sin;
1013
f28307ad 1014 memset((char *)&sin,0, sizeof(sin));
37d3f960 1015 sin.sin4.sin_family = AF_INET;
32252594 1016 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 1017 sin.sin6.sin6_family = AF_INET6;
f71bc087 1018 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3f81d239 1019 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
37d3f960
BH
1020 }
1021
1022 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
42c235e5
PD
1023 Utility::setCloseOnExec(fd);
1024
37d3f960 1025 if(fd<0)
3f81d239 1026 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
f28307ad
BH
1027
1028 int tmp=1;
37d3f960 1029 if(setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&tmp,sizeof tmp)<0) {
f28307ad 1030 L<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
c8ddb7c2 1031 exit(1);
f28307ad
BH
1032 }
1033
c8ddb7c2 1034#ifdef TCP_DEFER_ACCEPT
37d3f960
BH
1035 if(setsockopt(fd, SOL_TCP,TCP_DEFER_ACCEPT,(char*)&tmp,sizeof tmp) >= 0) {
1036 if(i==locals.begin())
4957a608 1037 L<<Logger::Error<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
c8ddb7c2
BH
1038 }
1039#endif
1040
32252594 1041 sin.sin4.sin_port = htons(st.port);
37d3f960
BH
1042 int socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
1043 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
3f81d239 1044 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
f28307ad 1045
37d3f960 1046 Utility::setNonBlocking(fd);
49a699c4 1047 setSocketSendBuffer(fd, 65000);
37d3f960 1048 listen(fd, 128);
5db529f8 1049 deferredAdd.push_back(make_pair(fd, handleNewTCPQuestion));
c2136bf0 1050 g_tcpListenSockets.push_back(fd);
84433b79 1051 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
1052 // - fd is not that which we know here, but returned from accept()
aa136564 1053 if(sin.sin4.sin_family == AF_INET)
32252594 1054 L<<Logger::Error<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 1055 else
32252594 1056 L<<Logger::Error<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 1057 }
9c495589
BH
1058}
1059
f28307ad 1060void makeUDPServerSockets()
288f4aa9 1061{
f28307ad 1062 vector<string>locals;
2e3d8a19 1063 stringtok(locals,::arg()["local-address"]," ,");
288f4aa9 1064
f28307ad 1065 if(locals.empty())
3f81d239 1066 throw PDNSException("No local address specified");
f28307ad 1067
2e3d8a19 1068 if(::arg()["local-address"]=="0.0.0.0") {
c836dc19 1069 L<<Logger::Warning<<"It is advised to bind to explicit addresses with the --local-address option"<<endl;
288f4aa9 1070 }
525b8a7c 1071
f28307ad 1072 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
1073 ServiceTuple st;
1074 st.port=::arg().asNum("local-port");
1075 parseService(*i, st);
1076
37d3f960 1077 ComboAddress sin;
996c89cc 1078
37d3f960
BH
1079 memset(&sin, 0, sizeof(sin));
1080 sin.sin4.sin_family = AF_INET;
32252594 1081 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 1082 sin.sin6.sin6_family = AF_INET6;
f71bc087 1083 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3f81d239 1084 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
37d3f960
BH
1085 }
1086
bb4bdbaf 1087 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
42c235e5 1088 Utility::setCloseOnExec(fd);
bb4bdbaf 1089
d3b4137e 1090 if(fd < 0) {
3f81d239 1091 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
d3b4137e 1092 }
37d3f960 1093
4e9a20e6 1094 setSocketReceiveBuffer(fd, 250000);
32252594 1095 sin.sin4.sin_port = htons(st.port);
37d3f960
BH
1096
1097 int socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
1098 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
3f81d239 1099 throw PDNSException("Resolver binding to server socket on port "+ lexical_cast<string>(st.port) +" for "+ st.host+": "+stringerror());
f28307ad
BH
1100
1101 Utility::setNonBlocking(fd);
c2136bf0 1102
0aaecd50 1103 deferredAdd.push_back(make_pair(fd, handleNewUDPQuestion));
40a3dd64 1104 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
aa136564 1105 if(sin.sin4.sin_family == AF_INET)
32252594 1106 L<<Logger::Error<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 1107 else
32252594 1108 L<<Logger::Error<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 1109 }
c836dc19 1110}
caa6eefa 1111
9c495589 1112
c836dc19
BH
1113void daemonize(void)
1114{
1115 if(fork())
1116 exit(0); // bye bye
1117
1118 setsid();
1119
27a5ead5
BH
1120 int i=open("/dev/null",O_RDWR); /* open stdin */
1121 if(i < 0)
1122 L<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
1123 else {
1124 dup2(i,0); /* stdin */
1125 dup2(i,1); /* stderr */
1126 dup2(i,2); /* stderr */
1127 close(i);
1128 }
288f4aa9 1129}
caa6eefa 1130
aaacf7f2 1131uint64_t counter;
c75a6a9e
BH
1132bool statsWanted;
1133
1134void usr1Handler(int)
1135{
1136 statsWanted=true;
1137}
ae1b2e98 1138
9170fbaf
BH
1139void usr2Handler(int)
1140{
f1f34cc2 1141 g_quiet= !g_quiet;
1142 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
1143 ::arg().set("quiet")=g_quiet ? "" : "no";
9170fbaf
BH
1144}
1145
c75a6a9e
BH
1146void doStats(void)
1147{
16beeaa4
BH
1148 static time_t lastOutputTime;
1149 static uint64_t lastQueryCount;
d299d4f5 1150
1151 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
1152 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
16beeaa4 1153
d299d4f5 1154 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
3427fa8a
BH
1155 L<<Logger::Warning<<"stats: "<<g_stats.qcounter<<" questions, "<<
1156 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
1157 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
1158 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
1159
1160 L<<Logger::Warning<<"stats: throttle map: "
1161 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
1162 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
70c2c8b1
BH
1163 L<<Logger::Warning<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
1164 L<<Logger::Warning<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
525b8a7c 1165 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
3427fa8a
BH
1166 L<<Logger::Warning<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
1167 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
81883dcc 1168
16beeaa4
BH
1169 //L<<Logger::Warning<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
1170 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
1171
1172 L<<Logger::Warning<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
1173 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
1174
1175 time_t now = time(0);
1176 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
1177 L<<Logger::Warning<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
1178 }
1179 lastOutputTime = now;
1180 lastQueryCount = SyncRes::s_queries;
c75a6a9e 1181 }
7becf07f 1182 else if(statsWanted)
70c2c8b1 1183 L<<Logger::Warning<<"stats: no stats yet!"<<endl;
7becf07f 1184
c75a6a9e
BH
1185 statsWanted=false;
1186}
c836dc19 1187
29f0b1ce 1188static void houseKeeping(void *)
779828c4 1189try
c836dc19 1190{
d67620e4 1191 static __thread time_t last_stat, last_rootupdate, last_prune, last_secpoll;
8baca3fa 1192 static __thread int cleanCounter=0;
c9e9e5e0 1193 struct timeval now;
c038218b 1194 Utility::gettimeofday(&now, 0);
c9e9e5e0 1195
1a16adf0 1196 // clog<<"* "<<t_id<<" "<<(void*)&last_stat<<"\t"<<(unsigned int)last_stat<<endl;
ac0e821b 1197
c3828c03 1198 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
5e4a2466
BH
1199 DTime dt;
1200 dt.setTimeval(now);
49a699c4 1201 t_RC->doPrune(); // this function is local to a thread, so fine anyhow
c3828c03 1202 t_packetCache->doPruneTo(::arg().asNum("max-packetcache-entries") / g_numThreads);
33988bfb 1203
1a16adf0 1204 pruneCollection(t_sstorage->negcache, ::arg().asNum("max-cache-entries") / (g_numThreads * 10), 200);
8baca3fa
BH
1205
1206 if(!((cleanCounter++)%40)) { // this is a full scan!
1207 time_t limit=now.tv_sec-300;
ac0e821b 1208 for(SyncRes::nsspeeds_t::iterator i = t_sstorage->nsSpeeds.begin() ; i!= t_sstorage->nsSpeeds.end(); )
8baca3fa 1209 if(i->second.stale(limit))
ac0e821b 1210 t_sstorage->nsSpeeds.erase(i++);
8baca3fa
BH
1211 else
1212 ++i;
1213 }
7393d6c0 1214// L<<Logger::Warning<<"Spent "<<dt.udiff()/1000<<" msec cleaning"<<endl;
ae1b2e98
BH
1215 last_prune=time(0);
1216 }
ac0e821b 1217
c038218b 1218 if(now.tv_sec - last_rootupdate > 7200) {
c9e9e5e0 1219 SyncRes sr(now);
2188dcc3 1220 sr.setDoEDNS0(true);
ea634573 1221 vector<DNSResourceRecord> ret;
c836dc19
BH
1222
1223 sr.setNoCache();
a9af3782 1224 int res=sr.beginResolve(".", QType(QType::NS), 1, ret);
c836dc19 1225 if(!res) {
70c2c8b1 1226 L<<Logger::Warning<<"Refreshed . records"<<endl;
c9e9e5e0 1227 last_rootupdate=now.tv_sec;
c836dc19
BH
1228 }
1229 else
1230 L<<Logger::Error<<"Failed to update . records, RCODE="<<res<<endl;
1231 }
d67620e4 1232
1233 if(!t_id) {
1234 if(now.tv_sec - last_stat >= 1800) {
1235 doStats();
1236 last_stat=time(0);
1237 }
1238
c0a074d7 1239 if(now.tv_sec - last_secpoll >= 3600) {
d67620e4 1240 doSecPoll(&last_secpoll);
1241 }
1242 }
c836dc19 1243}
3f81d239 1244catch(PDNSException& ae)
779828c4 1245{
c0a074d7 1246 L<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
779828c4
BH
1247 throw;
1248}
d6d5dea7 1249
49a699c4
BH
1250void makeThreadPipes()
1251{
c3828c03 1252 for(unsigned int n=0; n < g_numThreads; ++n) {
49a699c4
BH
1253 struct ThreadPipeSet tps;
1254 int fd[2];
1255 if(pipe(fd) < 0)
1256 unixDie("Creating pipe for inter-thread communications");
1257
1258 tps.readToThread = fd[0];
1259 tps.writeToThread = fd[1];
1260
1261 if(pipe(fd) < 0)
1262 unixDie("Creating pipe for inter-thread communications");
1263 tps.readFromThread = fd[0];
1264 tps.writeFromThread = fd[1];
1265
1266 g_pipes.push_back(tps);
1267 }
1268}
1269
00c9b8c1
BH
1270struct ThreadMSG
1271{
1272 pipefunc_t func;
1273 bool wantAnswer;
1274};
1275
49a699c4
BH
1276void broadcastFunction(const pipefunc_t& func, bool skipSelf)
1277{
49a699c4
BH
1278 unsigned int n = 0;
1279 BOOST_FOREACH(ThreadPipeSet& tps, g_pipes)
1280 {
1281 if(n++ == t_id) {
1282 if(!skipSelf)
1283 func(); // don't write to ourselves!
1284 continue;
1285 }
00c9b8c1
BH
1286
1287 ThreadMSG* tmsg = new ThreadMSG();
1288 tmsg->func = func;
1289 tmsg->wantAnswer = true;
1290 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg))
49a699c4
BH
1291 unixDie("write to thread pipe returned wrong size or error");
1292
1293 string* resp;
1294 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
1295 unixDie("read from thread pipe returned wrong size or error");
1296
1297 if(resp) {
1298// cerr <<"got response: " << *resp << endl;
1299 delete resp;
1300 }
1301 }
1302}
06ea9015 1303
1304uint32_t g_disthashseed;
1305void distributeAsyncFunction(const std::string& question, const pipefunc_t& func)
00c9b8c1 1306{
06ea9015 1307 unsigned int hash = hashQuestion(question.c_str(), question.length(), g_disthashseed);
1308 unsigned int target = 1 + (hash % (g_pipes.size()-1));
1309
00c9b8c1
BH
1310 if(target == t_id) {
1311 func();
1312 return;
1313 }
1314 ThreadPipeSet& tps = g_pipes[target];
1315 ThreadMSG* tmsg = new ThreadMSG();
1316 tmsg->func = func;
1317 tmsg->wantAnswer = false;
1318
1319 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg))
06ea9015 1320 unixDie("write to thread pipe returned wrong size or error");
00c9b8c1 1321}
3427fa8a 1322
49a699c4
BH
1323void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
1324{
00c9b8c1
BH
1325 ThreadMSG* tmsg;
1326
1327 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread
49a699c4
BH
1328 unixDie("read from thread pipe returned wrong size or error");
1329 }
3427fa8a 1330
2f22827a 1331 void *resp=0;
1332 try {
1333 resp = tmsg->func();
1334 }
1335 catch(std::exception& e) {
1336 L<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
1337 }
1338 catch(PDNSException& e) {
1339 L<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
1340 }
00c9b8c1
BH
1341 if(tmsg->wantAnswer)
1342 if(write(g_pipes[t_id].writeFromThread, &resp, sizeof(resp)) != sizeof(resp))
1343 unixDie("write to thread pipe returned wrong size or error");
3427fa8a 1344
00c9b8c1 1345 delete tmsg;
49a699c4 1346}
09e6702a 1347
13034931
BH
1348template<class T> void *voider(const boost::function<T*()>& func)
1349{
1350 return func();
1351}
1352
b3b5459d
BH
1353vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
1354{
1355 a.insert(a.end(), b.begin(), b.end());
1356 return a;
1357}
1358
13034931 1359template<class T> T broadcastAccFunction(const boost::function<T*()>& func, bool skipSelf)
3427fa8a
BH
1360{
1361 unsigned int n = 0;
1362 T ret=T();
1363 BOOST_FOREACH(ThreadPipeSet& tps, g_pipes)
1364 {
1365 if(n++ == t_id) {
1366 if(!skipSelf) {
1367 T* resp = (T*)func(); // don't write to ourselves!
1368 if(resp) {
1369 //~ cerr <<"got direct: " << *resp << endl;
1370 ret += *resp;
1371 delete resp;
1372 }
1373 }
1374 continue;
1375 }
1376
00c9b8c1
BH
1377 ThreadMSG* tmsg = new ThreadMSG();
1378 tmsg->func = boost::bind(voider<T>, func);
1379 tmsg->wantAnswer = true;
1380
1381 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg))
3427fa8a 1382 unixDie("write to thread pipe returned wrong size or error");
00c9b8c1 1383
3427fa8a
BH
1384
1385 T* resp;
1386 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
1387 unixDie("read from thread pipe returned wrong size or error");
1388
1389 if(resp) {
1390 //~ cerr <<"got response: " << *resp << endl;
1391 ret += *resp;
1392 delete resp;
1393 }
1394 }
1395 return ret;
1396}
1397
13034931
BH
1398template string broadcastAccFunction(const boost::function<string*()>& fun, bool skipSelf); // explicit instantiation
1399template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun, bool skipSelf); // explicit instantiation
b3b5459d 1400template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun, bool skipSelf); // explicit instantiation
3427fa8a 1401
d8f6d49f 1402void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
09e6702a
BH
1403{
1404 string remote;
1405 string msg=s_rcc.recv(&remote);
1406 RecursorControlParser rcp;
1407 RecursorControlParser::func_t* command;
77499b05 1408
09e6702a 1409 string answer=rcp.getAnswer(msg, &command);
ab5c053d
BH
1410 try {
1411 s_rcc.send(answer, &remote);
1412 command();
1413 }
fdbf35ac 1414 catch(std::exception& e) {
ab5c053d
BH
1415 L<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
1416 }
3f81d239 1417 catch(PDNSException& ae) {
ab5c053d
BH
1418 L<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
1419 }
09e6702a
BH
1420}
1421
d8f6d49f 1422void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1423{
0b18b22e 1424 PacketID* pident=any_cast<PacketID>(&var);
667f7e60 1425 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
09e6702a 1426
667f7e60 1427 shared_array<char> buffer(new char[pident->inNeeded]);
09e6702a 1428
705f31ae 1429 int ret=recv(fd, buffer.get(), pident->inNeeded,0);
09e6702a 1430 if(ret > 0) {
667f7e60
BH
1431 pident->inMSG.append(&buffer[0], &buffer[ret]);
1432 pident->inNeeded-=ret;
825fa717 1433 if(!pident->inNeeded || pident->inIncompleteOkay) {
667f7e60
BH
1434 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
1435 PacketID pid=*pident;
1436 string msg=pident->inMSG;
09e6702a 1437
bb4bdbaf 1438 t_fdm->removeReadFD(fd);
09e6702a
BH
1439 MT->sendEvent(pid, &msg);
1440 }
1441 else {
667f7e60 1442 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
09e6702a
BH
1443 }
1444 }
1445 else {
667f7e60 1446 PacketID tmp=*pident;
bb4bdbaf 1447 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
09e6702a
BH
1448 string empty;
1449 MT->sendEvent(tmp, &empty); // this conveys error status
1450 }
1451}
1452
d8f6d49f 1453void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1454{
0b18b22e 1455 PacketID* pid=any_cast<PacketID>(&var);
4ca15bca 1456 int ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
09e6702a 1457 if(ret > 0) {
667f7e60
BH
1458 pid->outPos+=ret;
1459 if(pid->outPos==pid->outMSG.size()) {
1460 PacketID tmp=*pid;
bb4bdbaf 1461 t_fdm->removeWriteFD(fd);
09e6702a
BH
1462 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
1463 }
1464 }
1465 else { // error or EOF
667f7e60 1466 PacketID tmp(*pid);
bb4bdbaf 1467 t_fdm->removeWriteFD(fd);
09e6702a 1468 string sent;
998a4334 1469 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
09e6702a
BH
1470 }
1471}
1472
34801ab1
BH
1473// resend event to everybody chained onto it
1474void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
1475{
1476 if(iter->key.chain.empty())
1477 return;
e27e91a8 1478 // cerr<<"doResends called!\n";
34801ab1
BH
1479 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
1480 resend.fd=-1;
1481 resend.id=*i;
e27e91a8 1482 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
4665c31e 1483
34801ab1
BH
1484 MT->sendEvent(resend, &content);
1485 g_stats.chainResends++;
34801ab1
BH
1486 }
1487}
1488
d8f6d49f 1489void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1490{
600fc20b 1491 PacketID pid=any_cast<PacketID>(var);
998a4334 1492 int len;
09e6702a 1493 char data[1500];
996c89cc 1494 ComboAddress fromaddr;
09e6702a
BH
1495 socklen_t addrlen=sizeof(fromaddr);
1496
998a4334 1497 len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen);
c1da7976 1498
998a4334
BH
1499 if(len < (int)sizeof(dnsheader)) {
1500 if(len < 0)
996c89cc 1501 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
09e6702a
BH
1502 else {
1503 g_stats.serverParseError++;
1504 if(g_logCommonErrors)
85db02c5 1505 L<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
e44d9fa7 1506 ": packet smaller than DNS header"<<endl;
998a4334 1507 }
34801ab1 1508
49a699c4 1509 t_udpclientsocks->returnSocket(fd);
34801ab1
BH
1510 string empty;
1511
1512 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
1513 if(iter != MT->d_waiters.end())
1514 doResends(iter, pid, empty);
1515
1516 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
998a4334
BH
1517 return;
1518 }
1519
1520 dnsheader dh;
1521 memcpy(&dh, data, sizeof(dh));
1522
6da3b3ad
PD
1523 PacketID pident;
1524 pident.remote=fromaddr;
1525 pident.id=dh.id;
1526 pident.fd=fd;
34801ab1 1527
33a928af 1528 if(!dh.qr && g_logCommonErrors) {
6da3b3ad
PD
1529 L<<Logger::Warning<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
1530 }
1531
1532 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
1533 !dh.qr) { // one weird server
1534 pident.domain.clear();
1535 pident.type = 0;
1536 }
1537 else {
1538 try {
1539 pident.domain=questionExpand(data, len, pident.type); // don't copy this from above - we need to do the actual read
1540 }
1541 catch(std::exception& e) {
1542 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
1543 L<<Logger::Warning<<"Error in packet from "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
1544 return;
34801ab1 1545 }
6da3b3ad
PD
1546 }
1547 string packet;
1548 packet.assign(data, len);
34801ab1 1549
6da3b3ad
PD
1550 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
1551 if(iter != MT->d_waiters.end()) {
1552 doResends(iter, pident, packet);
1553 }
c1da7976 1554
6da3b3ad 1555retryWithName:
4957a608 1556
6da3b3ad
PD
1557 if(!MT->sendEvent(pident, &packet)) {
1558 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
1559 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
1560 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
1561 pdns_iequals(pident.domain, mthread->key.domain)) {
1562 mthread->key.nearMisses++;
998a4334 1563 }
6da3b3ad
PD
1564
1565 // be a bit paranoid here since we're weakening our matching
1566 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
1567 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
1568 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
1569 pident.domain = mthread->key.domain;
1570 pident.type = mthread->key.type;
1571 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
d4fb76e9 1572 }
09e6702a 1573 }
6da3b3ad
PD
1574 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
1575 if(g_logCommonErrors) {
1576 L<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<<pident.domain<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
d8f6d49f 1577 }
09e6702a 1578 }
6da3b3ad
PD
1579 else if(fd >= 0) {
1580 t_udpclientsocks->returnSocket(fd);
1581 }
09e6702a
BH
1582}
1583
1f4abb20
BH
1584FDMultiplexer* getMultiplexer()
1585{
1586 FDMultiplexer* ret;
1587 for(FDMultiplexer::FDMultiplexermap_t::const_iterator i = FDMultiplexer::getMultiplexerMap().begin();
1588 i != FDMultiplexer::getMultiplexerMap().end(); ++i) {
1589 try {
1590 ret=i->second();
1f4abb20
BH
1591 return ret;
1592 }
98d0ee4a 1593 catch(FDMultiplexerException &fe) {
0a7f24cb 1594 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
98d0ee4a
BH
1595 }
1596 catch(...) {
1597 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
1598 }
1f4abb20
BH
1599 }
1600 L<<Logger::Error<<"No working multiplexer found!"<<endl;
1601 exit(1);
1602}
1603
5605c067 1604
0f39c1a3 1605string* doReloadLuaScript()
4485aa35 1606{
674cf0f6 1607 string fname= ::arg()["lua-dns-script"];
4485aa35 1608 try {
674cf0f6
BH
1609 if(fname.empty()) {
1610 t_pdl->reset();
1611 L<<Logger::Error<<t_id<<" Unloaded current lua script"<<endl;
0f39c1a3 1612 return new string("unloaded\n");
4485aa35
BH
1613 }
1614 else {
5704e107 1615 *t_pdl = shared_ptr<RecursorLua>(new RecursorLua(fname));
4485aa35
BH
1616 }
1617 }
fdbf35ac 1618 catch(std::exception& e) {
674cf0f6 1619 L<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
0f39c1a3 1620 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
4485aa35 1621 }
674cf0f6
BH
1622
1623 L<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
0f39c1a3 1624 return new string("(re)loaded '"+fname+"'\n");
4485aa35
BH
1625}
1626
49a699c4
BH
1627string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
1628{
1629 if(begin != end)
1630 ::arg().set("lua-dns-script") = *begin;
1631
0f39c1a3 1632 return broadcastAccFunction<string>(doReloadLuaScript);
49a699c4
BH
1633}
1634
77499b05
BH
1635string* pleaseUseNewTraceRegex(const std::string& newRegex)
1636try
1637{
1638 if(newRegex.empty()) {
1639 t_traceRegex->reset();
1640 return new string("unset\n");
1641 }
1642 else {
1643 (*t_traceRegex) = shared_ptr<Regex>(new Regex(newRegex));
1644 return new string("ok\n");
1645 }
1646}
3f81d239 1647catch(PDNSException& ae)
77499b05
BH
1648{
1649 return new string(ae.reason+"\n");
1650}
1651
1652string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
1653{
1654 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
1655}
1656
4e9a20e6 1657static void checkLinuxIPv6Limits()
1658{
1659#ifdef __linux__
1660 string line;
1661 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
1662 int lim=atoi(line.c_str());
1663 if(lim < 16384) {
36849ff2 1664 L<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
4e9a20e6 1665 }
1666 }
1667#endif
1668}
36849ff2 1669static void checkOrFixFDS()
4e9a20e6 1670{
36849ff2 1671 unsigned int availFDs=getFilenumLimit();
1672 if(g_maxMThreads * g_numThreads > availFDs) {
1673 if(getFilenumLimit(true) >= g_maxMThreads * g_numThreads) {
1674 setFilenumLimit(g_maxMThreads * g_numThreads);
1675 L<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<g_maxMThreads * g_numThreads<<" to match max-mthreads and threads settings"<<endl;
1676 }
1677 else {
1678 int newval = getFilenumLimit(true) / g_numThreads;
1679 L<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<availFDs<<" < "<<g_maxMThreads*g_numThreads<<"), reducing max-mthreads to "<<newval<<endl;
1680 g_maxMThreads = newval;
1681 setFilenumLimit(g_maxMThreads * g_numThreads);
1682 }
1683 }
1684
4e9a20e6 1685}
77499b05 1686
bb4bdbaf 1687void* recursorThread(void*);
51e2144e 1688
3427fa8a 1689void* pleaseSupplantACLs(NetmaskGroup *ng)
49a699c4
BH
1690{
1691 t_allowFrom = ng;
3427fa8a 1692 return 0;
49a699c4
BH
1693}
1694
dbd23fc2
BH
1695int g_argc;
1696char** g_argv;
1697
18af64a8 1698void parseACLs()
f7c1d4e3 1699{
18af64a8 1700 static bool l_initialized;
49a699c4
BH
1701
1702 if(l_initialized) { // only reload configuration file on second call
18af64a8
BH
1703 string configname=::arg()["config-dir"]+"/recursor.conf";
1704 cleanSlashes(configname);
1705
1706 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
7e818521 1707 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
49a699c4 1708 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
242b90e1 1709 ::arg().preParseFile(configname.c_str(), "include-dir");
829849d6
AT
1710 ::arg().preParse(g_argc, g_argv, "include-dir");
1711
1712 // then process includes
1713 std::vector<std::string> extraConfigs;
242b90e1
AT
1714 ::arg().gatherIncludes(extraConfigs);
1715
829849d6 1716 BOOST_FOREACH(const std::string& fn, extraConfigs) {
7e818521 1717 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
1718 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
1719 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
1720 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
829849d6 1721 }
ca2c884c
AT
1722
1723 ::arg().preParse(g_argc, g_argv, "allow-from-file");
1724 ::arg().preParse(g_argc, g_argv, "allow-from");
f27e6356 1725 }
49a699c4
BH
1726
1727 NetmaskGroup* oldAllowFrom = t_allowFrom, *allowFrom=new NetmaskGroup;
1728
2c95fc65
BH
1729 if(!::arg()["allow-from-file"].empty()) {
1730 string line;
2c95fc65
BH
1731 ifstream ifs(::arg()["allow-from-file"].c_str());
1732 if(!ifs) {
49a699c4 1733 delete allowFrom;
9c61b9d0 1734 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2c95fc65
BH
1735 }
1736
1737 string::size_type pos;
1738 while(getline(ifs,line)) {
1739 pos=line.find('#');
1740 if(pos!=string::npos)
1741 line.resize(pos);
1742 trim(line);
1743 if(line.empty())
1744 continue;
1745
18af64a8 1746 allowFrom->addMask(line);
2c95fc65 1747 }
49a699c4 1748 L<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2c95fc65
BH
1749 }
1750 else if(!::arg()["allow-from"].empty()) {
f7c1d4e3
BH
1751 vector<string> ips;
1752 stringtok(ips, ::arg()["allow-from"], ", ");
c36bc97a 1753
f7c1d4e3
BH
1754 L<<Logger::Warning<<"Only allowing queries from: ";
1755 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
18af64a8 1756 allowFrom->addMask(*i);
f7c1d4e3 1757 if(i!=ips.begin())
674cf0f6 1758 L<<Logger::Warning<<", ";
f7c1d4e3
BH
1759 L<<Logger::Warning<<*i;
1760 }
1761 L<<Logger::Warning<<endl;
1762 }
49a699c4
BH
1763 else {
1764 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
1765 L<<Logger::Error<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
1766 delete allowFrom;
1767 allowFrom = 0;
1768 }
1769
1770 g_initialAllowFrom = allowFrom;
d7dae798 1771 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
49a699c4
BH
1772 delete oldAllowFrom;
1773
1774 l_initialized = true;
18af64a8
BH
1775}
1776
1777int serviceMain(int argc, char*argv[])
1778{
5124de27 1779 L.setName(s_programname);
18af64a8
BH
1780
1781 L.setLoglevel((Logger::Urgency)(6)); // info and up
1782
1783 if(!::arg()["logging-facility"].empty()) {
f8499e52
BH
1784 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
1785 if(val >= 0)
1786 theL().setFacility(val);
18af64a8
BH
1787 else
1788 L<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
1789 }
1790
ba1a571d 1791 showProductVersion();
18af64a8 1792 seedRandom(::arg()["entropy-source"]);
06ea9015 1793 g_disthashseed=dns_random(0xffffffff);
1794
18af64a8
BH
1795 parseACLs();
1796
eb5bae86
BH
1797 if(!::arg()["dont-query"].empty()) {
1798 g_dontQuery=new NetmaskGroup;
1799 vector<string> ips;
1800 stringtok(ips, ::arg()["dont-query"], ", ");
66e0b6ea
BH
1801 ips.push_back("0.0.0.0");
1802 ips.push_back("::");
c36bc97a 1803
eb5bae86
BH
1804 L<<Logger::Warning<<"Will not send queries to: ";
1805 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
1806 g_dontQuery->addMask(*i);
1807 if(i!=ips.begin())
4957a608 1808 L<<Logger::Warning<<", ";
eb5bae86
BH
1809 L<<Logger::Warning<<*i;
1810 }
1811 L<<Logger::Warning<<endl;
1812 }
1813
f7c1d4e3 1814 g_quiet=::arg().mustDo("quiet");
1bc3c142
BH
1815 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
1816 if(g_weDistributeQueries) {
1817 L<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
1818 }
1819
77499b05
BH
1820 if(::arg()["trace"]=="fail") {
1821 SyncRes::setDefaultLogMode(SyncRes::Store);
1822 }
1823 else if(::arg().mustDo("trace")) {
1824 SyncRes::setDefaultLogMode(SyncRes::Log);
f7c1d4e3
BH
1825 ::arg().set("quiet")="no";
1826 g_quiet=false;
1827 }
f7c1d4e3 1828
aadceba8 1829 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
1830
4e9a20e6 1831 checkLinuxIPv6Limits();
5a38281c
BH
1832 try {
1833 vector<string> addrs;
1834 if(!::arg()["query-local-address6"].empty()) {
1835 SyncRes::s_doIPv6=true;
d4fb76e9 1836 L<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
5a38281c
BH
1837
1838 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
1839 BOOST_FOREACH(const string& addr, addrs) {
4957a608 1840 g_localQueryAddresses6.push_back(ComboAddress(addr));
5a38281c
BH
1841 }
1842 }
d4fb76e9
BH
1843 else {
1844 L<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
1845 }
5a38281c
BH
1846 addrs.clear();
1847 stringtok(addrs, ::arg()["query-local-address"], ", ;");
1848 BOOST_FOREACH(const string& addr, addrs) {
1849 g_localQueryAddresses4.push_back(ComboAddress(addr));
1850 }
1851 }
1852 catch(std::exception& e) {
1853 L<<Logger::Error<<"Assigning local query addresses: "<<e.what();
1854 exit(99);
f7c1d4e3 1855 }
f555e92e 1856
578361b3 1857 SyncRes::s_noEDNSPing = true; // ::arg().mustDo("disable-edns-ping");
4bfae16d 1858 SyncRes::s_noEDNS = ::arg().mustDo("disable-edns");
578361b3 1859 if(!SyncRes::s_noEDNS) {
1860 L<<Logger::Warning<<"Running in experimental EDNS mode - may cause problems"<<endl;
1861 }
bb4bdbaf 1862
1051f8a9
BH
1863 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
1864
f7c1d4e3 1865 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
c3e753c7 1866 SyncRes::s_maxcachettl=::arg().asNum("max-cache-ttl");
1051f8a9
BH
1867 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
1868 SyncRes::s_packetcacheservfailttl=::arg().asNum("packetcache-servfail-ttl");
628e2c7b
PA
1869 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
1870 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
f7c1d4e3 1871 SyncRes::s_serverID=::arg()["server-id"];
173d790e 1872 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
f7c1d4e3
BH
1873 if(SyncRes::s_serverID.empty()) {
1874 char tmp[128];
1875 gethostname(tmp, sizeof(tmp)-1);
1876 SyncRes::s_serverID=tmp;
1877 }
1878
5b0ddd18 1879 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
bb4bdbaf 1880
49a699c4 1881 g_initialDomainMap = parseAuthAndForwards();
674cf0f6 1882
08f3f638 1883 g_latencyStatSize=::arg().asNum("latency-statistic-size");
b3b5459d 1884
f7c1d4e3 1885 g_logCommonErrors=::arg().mustDo("log-common-errors");
e661a20b
PD
1886
1887 g_anyToTcp = ::arg().mustDo("any-to-tcp");
a09a8ce0
PD
1888 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
1889
f7c1d4e3
BH
1890 makeUDPServerSockets();
1891 makeTCPServerSockets();
815099b2 1892
677e2a46
BH
1893 int forks;
1894 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
1bc3c142
BH
1895 if(!fork()) // we are child
1896 break;
1897 }
1898
2d733c0f 1899 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
815099b2
BH
1900 if(!s_pidfname.empty())
1901 unlink(s_pidfname.c_str()); // remove possible old pid file
f7c1d4e3 1902
f7c1d4e3
BH
1903 if(::arg().mustDo("daemon")) {
1904 L<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
1905 L.toConsole(Logger::Critical);
f7c1d4e3
BH
1906 daemonize();
1907 }
1908 signal(SIGUSR1,usr1Handler);
1909 signal(SIGUSR2,usr2Handler);
1910 signal(SIGPIPE,SIG_IGN);
1911 writePid();
677e2a46 1912 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
138435cb
BH
1913
1914 int newgid=0;
1915 if(!::arg()["setgid"].empty())
1916 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
1917 int newuid=0;
1918 if(!::arg()["setuid"].empty())
1919 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
1920
f1d6a7ce
KM
1921 Utility::dropGroupPrivs(newuid, newgid);
1922
138435cb
BH
1923 if (!::arg()["chroot"].empty()) {
1924 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
1925 L<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
1926 exit(1);
1927 }
1928 }
1929
f1d6a7ce 1930 Utility::dropUserPrivs(newuid);
77c535ae 1931 g_numThreads = ::arg().asNum("threads") + ::arg().mustDo("pdns-distributes-queries");
343257a4 1932
49a699c4 1933 makeThreadPipes();
5d4dd7fe
BH
1934
1935 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
1936 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
3a8a4d68 1937 g_maxMThreads=::arg().asNum("max-mthreads");
36849ff2 1938 checkOrFixFDS();
343257a4 1939
c3828c03 1940 if(g_numThreads == 1) {
76698c6e 1941 L<<Logger::Warning<<"Operating unthreaded"<<endl;
76698c6e
BH
1942 recursorThread(0);
1943 }
1944 else {
1945 pthread_t tid;
c3828c03
BH
1946 L<<Logger::Warning<<"Launching "<< g_numThreads <<" threads"<<endl;
1947 for(unsigned int n=0; n < g_numThreads; ++n) {
77499b05 1948 pthread_create(&tid, 0, recursorThread, (void*)(long)n);
76698c6e
BH
1949 }
1950 void* res;
49a699c4
BH
1951
1952
76698c6e 1953 pthread_join(tid, &res);
bb4bdbaf 1954 }
bb4bdbaf
BH
1955 return 0;
1956}
1957
1958void* recursorThread(void* ptr)
1959try
1960{
2e2cd8ec 1961 t_id=(int) (long) ptr;
49a699c4 1962 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
ac0e821b 1963 t_sstorage->domainmap = g_initialDomainMap;
49a699c4
BH
1964 t_allowFrom = g_initialAllowFrom;
1965 t_udpclientsocks = new UDPClientSocks();
bd0289fc 1966 t_tcpClientCounts = new tcpClientCounts_t();
49a699c4 1967 primeHints();
674cf0f6 1968
49a699c4
BH
1969 t_packetCache = new RecursorPacketCache();
1970
1971 L<<Logger::Warning<<"Done priming cache with root hints"<<endl;
1972
5704e107 1973 t_pdl = new shared_ptr<RecursorLua>();
49a699c4 1974
674cf0f6
BH
1975 try {
1976 if(!::arg()["lua-dns-script"].empty()) {
5704e107 1977 *t_pdl = shared_ptr<RecursorLua>(new RecursorLua(::arg()["lua-dns-script"]));
674cf0f6
BH
1978 L<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
1979 }
1980
1981 }
1982 catch(std::exception &e) {
1983 L<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
1984 exit(99);
1985 }
1986
77499b05
BH
1987 t_traceRegex = new shared_ptr<Regex>();
1988
1989
b3b5459d
BH
1990 t_remotes = new RemoteKeeper();
1991 t_remotes->remotes.resize(::arg().asNum("remotes-ringbuffer-entries") / g_numThreads);
1992
1993 if(!t_remotes->remotes.empty())
1994 memset(&t_remotes->remotes[0], 0, t_remotes->remotes.size() * sizeof(RemoteKeeper::remotes_t::value_type));
1995
1996
bb4bdbaf
BH
1997 MT=new MTasker<PacketID,string>(::arg().asNum("stack-size"));
1998
bb4bdbaf
BH
1999 PacketID pident;
2000
2001 t_fdm=getMultiplexer();
f3d1d67b 2002 if(!t_id) {
30a1aa92 2003 if(::arg().mustDo("experimental-webserver")) {
2004 L<<Logger::Warning << "Enabling web server" << endl;
8989097d 2005 try {
1ce57618 2006 new RecursorWebServer(t_fdm);
8989097d
CH
2007 }
2008 catch(PDNSException &e) {
2009 L<<Logger::Error<<"Exception: "<<e.reason<<endl;
2010 exit(99);
2011 }
f3d1d67b 2012 }
83252304 2013 L<<Logger::Error<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
f3d1d67b 2014 }
83252304 2015
49a699c4 2016 t_fdm->addReadFD(g_pipes[t_id].readToThread, handlePipeRequest);
83252304 2017
1bc3c142
BH
2018 if(!g_weDistributeQueries || !t_id) // if we distribute queries, only t_id = 0 listens
2019 for(deferredAdd_t::const_iterator i=deferredAdd.begin(); i!=deferredAdd.end(); ++i)
2020 t_fdm->addReadFD(i->first, i->second);
f7c1d4e3 2021
674cf0f6 2022 if(!t_id) {
674cf0f6
BH
2023 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
2024 }
1bc3c142 2025
f7c1d4e3 2026 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
f7c1d4e3 2027
f7c1d4e3 2028 bool listenOnTCP(true);
49a699c4 2029
2c78bd57 2030 time_t last_carbon=0;
2031 time_t carbonInterval=::arg().asNum("carbon-interval");
3427fa8a 2032 counter=0; // used to periodically execute certain tasks
f7c1d4e3 2033 for(;;) {
ac0e821b 2034 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
f7c1d4e3 2035
3427fa8a
BH
2036 if(!(counter%500)) {
2037 MT->makeThread(houseKeeping, 0);
f7c1d4e3
BH
2038 }
2039
d2392145 2040 if(!(counter%55)) {
d8f6d49f 2041 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
bb4bdbaf 2042 expired_t expired=t_fdm->getTimeouts(g_now);
4957a608 2043
f7c1d4e3 2044 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
cd989c87 2045 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4957a608 2046 if(g_logCommonErrors)
cd989c87 2047 L<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toString() <<endl;
4957a608 2048 t_fdm->removeReadFD(i->first);
f7c1d4e3
BH
2049 }
2050 }
2051
2052 counter++;
2053
3427fa8a 2054 if(!t_id && statsWanted) {
f7c1d4e3
BH
2055 doStats();
2056 }
2057
2058 Utility::gettimeofday(&g_now, 0);
2c78bd57 2059
2060 if(!t_id && (g_now.tv_sec - last_carbon >= carbonInterval)) {
2061 MT->makeThread(doCarbonDump, 0);
2062 last_carbon = g_now.tv_sec;
2063 }
2064
bb4bdbaf 2065 t_fdm->run(&g_now);
3ea54bf0 2066 // 'run' updates g_now for us
f7c1d4e3 2067
b8ef5c5c 2068 if(!g_weDistributeQueries || !t_id) { // if pdns distributes queries, only tid 0 should do this
5c889cf5 2069 if(listenOnTCP) {
2070 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
2071 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
2072 t_fdm->removeReadFD(*i);
2073 listenOnTCP=false;
2074 }
f7c1d4e3 2075 }
5c889cf5 2076 else {
2077 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
2078 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
2079 t_fdm->addReadFD(*i, handleNewTCPQuestion);
2080 listenOnTCP=true;
2081 }
f7c1d4e3
BH
2082 }
2083 }
2084 }
2085}
3f81d239 2086catch(PDNSException &ae) {
bb4bdbaf
BH
2087 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
2088 return 0;
2089}
2090catch(std::exception &e) {
2091 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
2092 return 0;
2093}
2094catch(...) {
2095 L<<Logger::Error<<"any other exception in main: "<<endl;
2096 return 0;
2097}
2098
51e2144e 2099
288f4aa9
BH
2100int main(int argc, char **argv)
2101{
dbd23fc2
BH
2102 g_argc = argc;
2103 g_argv = argv;
5e3de507 2104 g_stats.startupTime=time(0);
3e135495 2105 versionSetProduct(ProductRecursor);
8a63d3ce 2106 reportBasicTypes();
0007c2e5 2107 reportOtherTypes();
ea634573 2108
22030c37 2109 int ret = EXIT_SUCCESS;
caa6eefa 2110
288f4aa9 2111 try {
f888311c 2112 ::arg().set("stack-size","stack size per mthread")="200000";
2e3d8a19 2113 ::arg().set("soa-minimum-ttl","Don't change")="0";
2e3d8a19 2114 ::arg().set("no-shuffle","Don't change")="off";
2e3d8a19 2115 ::arg().set("local-port","port to listen on")="53";
32252594 2116 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
77499b05 2117 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
2e3d8a19 2118 ::arg().set("daemon","Operate as a daemon")="yes";
34162f8f 2119 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="4";
0e9d9ce2 2120 ::arg().set("log-common-errors","If we should log rather common errors")="yes";
2e3d8a19
BH
2121 ::arg().set("chroot","switch to chroot jail")="";
2122 ::arg().set("setgid","If set, change group id to this gid for more security")="";
2123 ::arg().set("setuid","If set, change user id to this uid for more security")="";
5b0ddd18 2124 ::arg().set("network-timeout", "Wait this nummer of milliseconds for network i/o")="1500";
bb4bdbaf 2125 ::arg().set("threads", "Launch this number of threads")="2";
1bc3c142 2126 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1";
5124de27 2127 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
9097239c 2128 ::arg().set( "experimental-logfile", "Filename of the log file for JSON parser" )= "/var/log/pdns.log";
88d77d73
CH
2129 ::arg().setSwitch("experimental-webserver", "Start a webserver for monitoring") = "no";
2130 ::arg().set("experimental-webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
2131 ::arg().set("experimental-webserver-port", "Port of webserver to listen on") = "8082";
2132 ::arg().set("experimental-webserver-password", "Password required for accessing the webserver") = "";
69e7f117 2133 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="0.0.0.0/0,::/0";
c348c0c8 2134 ::arg().set("experimental-api-config-dir", "Directory where REST API stores config and zones") = "";
bbef8f04 2135 ::arg().set("experimental-api-key", "REST API Static authentication key (required for API use)") = "";
cc08b5a9 2136 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
2c78bd57 2137 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server")="";
2138 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
b4ae7322 2139 ::arg().set("experimental-api-readonly", "If the JSON API should disallow data modification") = "no";
c038218b 2140 ::arg().set("quiet","Suppress logging of questions and answers")="";
f27e6356 2141 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
2e3d8a19 2142 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
fdbf35ac
BH
2143 ::arg().set("socket-owner","Owner of socket")="";
2144 ::arg().set("socket-group","Group of socket")="";
2145 ::arg().set("socket-mode", "Permissions for socket")="";
fdbf35ac 2146
2e3d8a19
BH
2147 ::arg().set("socket-dir","Where the controlsocket will live")=LOCALSTATEDIR;
2148 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
2149 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
d4fb76e9 2150 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
2e3d8a19 2151 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
85c32340 2152 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
2e3d8a19 2153 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
324dc148 2154 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
979edd70 2155 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
2e3d8a19 2156 ::arg().set("hint-file", "If set, load root hints from this file")="";
b45eb27c 2157 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
a9af3782 2158 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
c3e753c7 2159 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
1051f8a9 2160 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
927c12b0 2161 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
1051f8a9 2162 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
7f7b8d55 2163 ::arg().set("server-id", "Returned when queried for 'server.id' TXT or NSID, defaults to hostname")="";
a9af3782 2164 ::arg().set("remotes-ringbuffer-entries", "maximum number of packets to store statistics for")="0";
ba1a571d 2165 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
49a699c4 2166 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
2c95fc65 2167 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
51e2144e 2168 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
12cd44ee 2169 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4e120339 2170 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
0d5f0a9f 2171 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4ef015cd 2172 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
5605c067
BH
2173 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
2174 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
927c12b0
BH
2175 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
2176 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
5605c067 2177 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
ac0b4eb3 2178 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3ea54bf0 2179 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
9bc8c14c 2180 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="";
4485aa35 2181 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
08f3f638 2182 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
578361b3 2183// ::arg().setSwitch( "disable-edns-ping", "Disable EDNSPing - EXPERIMENTAL, LEAVE DISABLED" )= "no";
2184 ::arg().setSwitch( "disable-edns", "Disable EDNS - EXPERIMENTAL, LEAVE DISABLED" )= "";
1bc3c142 2185 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
966d3ba8 2186 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="";
e661a20b 2187 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
a09a8ce0 2188 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1680";
aadceba8 2189 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
173d790e 2190 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
a09a8ce0 2191
68e6df3c 2192 ::arg().set("include-dir","Include *.conf files from this directory")="";
d67620e4 2193 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
2e3d8a19
BH
2194
2195 ::arg().setCmd("help","Provide a helpful message");
ba1a571d 2196 ::arg().setCmd("version","Print version string");
d5141417 2197 ::arg().setCmd("config","Output blank configuration");
f27e6356 2198 L.toConsole(Logger::Info);
2e3d8a19 2199 ::arg().laxParse(argc,argv); // do a lax parse
c75a6a9e 2200
2d733c0f
CH
2201 string configname=::arg()["config-dir"]+"/recursor.conf";
2202 if(::arg()["config-name"]!="") {
2203 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
5124de27 2204 s_programname+="-"+::arg()["config-name"];
2d733c0f
CH
2205 }
2206 cleanSlashes(configname);
5124de27 2207
577cf284
BH
2208 if(::arg().mustDo("config")) {
2209 cout<<::arg().configstring()<<endl;
2210 exit(0);
2211 }
2212
2e3d8a19 2213 if(!::arg().file(configname.c_str()))
c75a6a9e
BH
2214 L<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
2215
2e3d8a19 2216 ::arg().parse(argc,argv);
c836dc19 2217
2e3d8a19 2218 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
562588a3 2219
61d74169 2220 if(::arg().asNum("threads")==1)
2221 ::arg().set("pdns-distributes-queries")="no";
2222
2e3d8a19 2223 if(::arg().mustDo("help")) {
ff5ba4f9
WA
2224 cout<<"syntax:"<<endl<<endl;
2225 cout<<::arg().helpstring(::arg()["help"])<<endl;
2226 exit(0);
b636533b 2227 }
5e3de507 2228 if(::arg().mustDo("version")) {
ba1a571d 2229 showProductVersion();
3613a51c 2230 showBuildConfiguration();
5e3de507
BH
2231 exit(99);
2232 }
b636533b 2233
34162f8f
CH
2234 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
2235 if (logUrgency < Logger::Error)
2236 logUrgency = Logger::Error;
2237 L.setLoglevel(logUrgency);
2238 L.toConsole(logUrgency);
2239
f7c1d4e3 2240 serviceMain(argc, argv);
288f4aa9 2241 }
3f81d239 2242 catch(PDNSException &ae) {
c836dc19 2243 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
22030c37 2244 ret=EXIT_FAILURE;
288f4aa9 2245 }
fdbf35ac 2246 catch(std::exception &e) {
c836dc19 2247 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
22030c37 2248 ret=EXIT_FAILURE;
288f4aa9
BH
2249 }
2250 catch(...) {
c836dc19 2251 L<<Logger::Error<<"any other exception in main: "<<endl;
22030c37 2252 ret=EXIT_FAILURE;
288f4aa9 2253 }
caa6eefa 2254
22030c37 2255 return ret;
288f4aa9 2256}