]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/pdns_recursor.cc
implement Lua gettag() which optionally tells you which part of the packet cache...
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
CommitLineData
288f4aa9
BH
1/*
2 PowerDNS Versatile Database Driven Nameserver
32cb6fd4 3 Copyright (C) 2003 - 2016 PowerDNS.COM BV
288f4aa9
BH
4
5 This program is free software; you can redistribute it and/or modify
3ddb9247 6 it under the terms of the GNU General Public License version 2
f28307ad 7 as published by the Free Software Foundation
288f4aa9 8
f782fe38
MH
9 Additionally, the license of this program contains a special
10 exception which allows to distribute the program in binary form when
11 it is linked against OpenSSL.
12
288f4aa9
BH
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
06bd9ccf 20 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
288f4aa9 21*/
caa6eefa 22
870a0fe4
AT
23#ifdef HAVE_CONFIG_H
24#include "config.h"
25#endif
3e61e7f7 26
76473b92
KM
27#include <netdb.h>
28#include <sys/stat.h>
29#include <unistd.h>
fa8fd4d2 30
2470b36e 31#include "ws-recursor.hh"
49a699c4 32#include <pthread.h>
3ea54bf0 33#include "recpacketcache.hh"
3ddb9247 34#include "utility.hh"
51e2144e 35#include "dns_random.hh"
288f4aa9
BH
36#include <iostream>
37#include <errno.h>
81859ba5 38#include <boost/static_assert.hpp>
288f4aa9
BH
39#include <map>
40#include <set>
97bb160b 41#include "recursor_cache.hh"
38c9ceaa 42#include "cachecleaner.hh"
288f4aa9 43#include <stdio.h>
c75a6a9e 44#include <signal.h>
288f4aa9 45#include <stdlib.h>
bb4bdbaf 46#include "misc.hh"
288f4aa9
BH
47#include "mtasker.hh"
48#include <utility>
288f4aa9
BH
49#include "arguments.hh"
50#include "syncres.hh"
88def049
BH
51#include <fcntl.h>
52#include <fstream>
3e61e7f7 53#include "sortlist.hh"
54extern SortList g_sortlist;
5c633640
BH
55#include "sstuff.hh"
56#include <boost/tuple/tuple.hpp>
57#include <boost/tuple/tuple_comparison.hpp>
72df400f 58#include <boost/shared_array.hpp>
7f1fa77d 59#include <boost/function.hpp>
5605c067 60#include <boost/algorithm/string.hpp>
8f7473d7 61#ifdef MALLOC_TRACE
62#include "malloctrace.hh"
63#endif
40a3dd64 64#include <netinet/tcp.h>
ea634573
BH
65#include "dnsparser.hh"
66#include "dnswriter.hh"
67#include "dnsrecords.hh"
f814d7c8 68#include "zoneparser-tng.hh"
1d5b3ce6 69#include "rec_channel.hh"
aaacf7f2 70#include "logger.hh"
c8ddb7c2 71#include "iputils.hh"
09e6702a 72#include "mplexer.hh"
c038218b 73#include "config.h"
808c5ef7 74#include "lua-recursor4.hh"
ba1a571d 75#include "version.hh"
79332bff 76#include "responsestats.hh"
d67620e4 77#include "secpoll-recursor.hh"
c5c066bf 78#include "dnsname.hh"
644dd1da 79#include "filterpo.hh"
80#include "rpzloader.hh"
b3f0ed10 81#include "validate-recursor.hh"
f3c18728 82#include "rec-lua-conf.hh"
83
a2bfc3ff
BH
84#ifndef RECURSOR
85#include "statbag.hh"
86StatBag S;
87#endif
f3c18728 88
bb4bdbaf 89__thread FDMultiplexer* t_fdm;
674cf0f6 90__thread unsigned int t_id;
09e6702a 91unsigned int g_maxTCPPerClient;
5b0ddd18 92unsigned int g_networkTimeoutMsec;
08f3f638 93uint64_t g_latencyStatSize;
09e6702a 94bool g_logCommonErrors;
e661a20b 95bool g_anyToTcp;
b33c2462 96uint16_t g_udpTruncationThreshold, g_outgoingEDNSBufsize;
a3e7b735 97__thread shared_ptr<RecursorLua4>* t_pdl;
60c8afa8 98
99__thread addrringbuf_t* t_remotes, *t_servfailremotes, *t_largeanswerremotes;
100
c5c066bf 101__thread boost::circular_buffer<pair<DNSName, uint16_t> >* t_queryring, *t_servfailqueryring;
77499b05 102__thread shared_ptr<Regex>* t_traceRegex;
674cf0f6 103
376effcf 104NetmaskGroup g_ednssubnets;
105SuffixMatchNode g_ednsdomains;
106
d7dae798
BH
107RecursorControlChannel s_rcc; // only active in thread 0
108
109// for communicating with our threads
49a699c4
BH
110struct ThreadPipeSet
111{
112 int writeToThread;
113 int readToThread;
114 int writeFromThread;
115 int readFromThread;
116};
3ea54bf0 117
d7dae798 118vector<ThreadPipeSet> g_pipes; // effectively readonly after startup
5c633640 119
d7dae798 120SyncRes::domainmap_t* g_initialDomainMap; // new threads needs this to be setup
49a699c4
BH
121
122#include "namespaces.hh"
3ea54bf0 123
49a699c4 124__thread MemRecursorCache* t_RC;
16beeaa4 125__thread RecursorPacketCache* t_packetCache;
1d5b3ce6
BH
126RecursorStats g_stats;
127bool g_quiet;
49a699c4 128
1bc3c142
BH
129bool g_weDistributeQueries; // if true, only 1 thread listens on the incoming query sockets
130
41942bb3 131__thread NetmaskGroup* t_allowFrom;
49a699c4
BH
132static NetmaskGroup* g_initialAllowFrom; // new thread needs to be setup with this
133
eb5bae86 134NetmaskGroup* g_dontQuery;
2d733c0f 135string s_programname="pdns_recursor";
49a699c4 136
40a3dd64
BH
137typedef vector<int> tcpListenSockets_t;
138tcpListenSockets_t g_tcpListenSockets; // shared across threads, but this is fine, never written to from a thread. All threads listen on all sockets
3159c9ef 139int g_tcpTimeout;
85c32340 140unsigned int g_maxMThreads;
183eb877 141__thread struct timeval g_now; // timestamp, updated (too) frequently
84433b79 142typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
143listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
cbc03320 144set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
18af64a8 145
d7dae798
BH
146__thread MT_t* MT; // the big MTasker
147
f8f243b0 148unsigned int g_numThreads, g_numWorkerThreads;
c3828c03 149
12cd44ee 150#define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
151// Bad Nets taken from both:
3ddb9247 152// http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
12cd44ee 153// and
154// http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
155// where such a network may not be considered a valid destination
156#define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
157#define DONT_QUERY LOCAL_NETS ", " BAD_NETS
49a699c4 158
d7dae798 159//! used to send information to a newborn mthread
ea634573 160struct DNSComboWriter {
3ddb9247 161 DNSComboWriter(const char* data, uint16_t len, const struct timeval& now) : d_mdp(data, len), d_now(now),
232f0877 162 d_tcp(false), d_socket(-1)
ea634573
BH
163 {}
164 MOADNSParser d_mdp;
00c9b8c1 165 void setRemote(const ComboAddress* sa)
ea634573 166 {
37d3f960 167 d_remote=*sa;
ea634573
BH
168 }
169
b71b60ee 170 void setLocal(const ComboAddress& sa)
171 {
172 d_local=sa;
173 }
174
175
ea634573
BH
176 void setSocket(int sock)
177 {
178 d_socket=sock;
179 }
a1754c6a
BH
180
181 string getRemote() const
182 {
37d3f960 183 return d_remote.toString();
a1754c6a
BH
184 }
185
c9e9e5e0 186 struct timeval d_now;
b71b60ee 187 ComboAddress d_remote, d_local;
ea634573
BH
188 bool d_tcp;
189 int d_socket;
cd989c87 190 shared_ptr<TCPConnection> d_tcpConnection;
ea634573
BH
191};
192
193
288f4aa9
BH
194ArgvMap &arg()
195{
196 static ArgvMap theArg;
197 return theArg;
198}
4ef015cd 199
09e6702a 200
d8f6d49f 201void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 202
50c81227 203// -1 is error, 0 is timeout, 1 is success
3ddb9247 204int asendtcp(const string& data, Socket* sock)
5c633640
BH
205{
206 PacketID pident;
207 pident.sock=sock;
208 pident.outMSG=data;
3ddb9247 209
bb4bdbaf 210 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
50c81227 211 string packet;
5c633640 212
5b0ddd18 213 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
23db0a09 214
9170fbaf 215 if(!ret || ret==-1) { // timeout
bb4bdbaf 216 t_fdm->removeWriteFD(sock->getHandle());
5c633640 217 }
50c81227
BH
218 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
219 return -1;
220 }
9170fbaf 221 return ret;
5c633640
BH
222}
223
d8f6d49f 224void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 225
9170fbaf 226// -1 is error, 0 is timeout, 1 is success
825fa717 227int arecvtcp(string& data, int len, Socket* sock, bool incompleteOkay)
288f4aa9 228{
50c81227 229 data.clear();
5c633640
BH
230 PacketID pident;
231 pident.sock=sock;
232 pident.inNeeded=len;
825fa717 233 pident.inIncompleteOkay=incompleteOkay;
bb4bdbaf 234 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
5c633640 235
bb4bdbaf 236 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
9170fbaf 237 if(!ret || ret==-1) { // timeout
bb4bdbaf 238 t_fdm->removeReadFD(sock->getHandle());
288f4aa9 239 }
50c81227
BH
240 else if(data.empty()) {// error, EOF or other
241 return -1;
242 }
243
9170fbaf 244 return ret;
288f4aa9
BH
245}
246
fba1e944 247void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
4465e941 248{
fba1e944 249 PacketID pident=*any_cast<PacketID>(&var);
4465e941 250 char resp[512];
251 int ret=recv(fd, resp, sizeof(resp), 0);
252 t_fdm->removeReadFD(fd);
253 if(ret >= 0) {
254 string data(resp, ret);
fba1e944 255 MT->sendEvent(pident, &data);
4465e941 256 }
257 else {
fba1e944 258 string empty;
259 MT->sendEvent(pident, &empty);
260 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
4465e941 261 }
262}
fba1e944 263string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
4465e941 264{
4465e941 265 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
266 s.setNonBlocking();
267 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
268
269 s.bind(local);
270 s.connect(dest);
4465e941 271 s.send(query);
272
273 PacketID pident;
274 pident.sock=&s;
275 pident.type=0;
fba1e944 276 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
4465e941 277
278 string data;
fba1e944 279
4465e941 280 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
fba1e944 281
4465e941 282 if(!ret || ret==-1) { // timeout
4465e941 283 t_fdm->removeReadFD(s.getHandle());
284 }
285 else if(data.empty()) {// error, EOF or other
fba1e944 286 // we could special case this
4465e941 287 return data;
288 }
4465e941 289 return data;
290}
291
292
3ddb9247 293vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
046c5a5d 294const ComboAddress g_local4("0.0.0.0"), g_local6("::");
1652a63e 295
d7dae798 296//! pick a random query local address
1652a63e 297ComboAddress getQueryLocalAddress(int family, uint16_t port)
5a38281c 298{
1652a63e 299 ComboAddress ret;
5a38281c 300 if(family==AF_INET) {
3ddb9247 301 if(g_localQueryAddresses4.empty())
1652a63e 302 ret = g_local4;
3ddb9247 303 else
1652a63e
BH
304 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
305 ret.sin4.sin_port = htons(port);
5a38281c
BH
306 }
307 else {
308 if(g_localQueryAddresses6.empty())
1652a63e
BH
309 ret = g_local6;
310 else
311 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
3ddb9247 312
1652a63e 313 ret.sin6.sin6_port = htons(port);
5a38281c 314 }
1652a63e 315 return ret;
5a38281c 316}
4ef015cd 317
d8f6d49f 318void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
09e6702a 319
d7dae798
BH
320void setSocketBuffer(int fd, int optname, uint32_t size)
321{
322 uint32_t psize=0;
323 socklen_t len=sizeof(psize);
3ddb9247 324
d7dae798
BH
325 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
326 L<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
3ddb9247 327 return;
d7dae798
BH
328 }
329
330 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
c057bfaa 331 L<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
d7dae798
BH
332}
333
334
335static void setSocketReceiveBuffer(int fd, uint32_t size)
336{
337 setSocketBuffer(fd, SO_RCVBUF, size);
338}
339
340static void setSocketSendBuffer(int fd, uint32_t size)
341{
342 setSocketBuffer(fd, SO_SNDBUF, size);
343}
344
345
4ef015cd
BH
346// you can ask this class for a UDP socket to send a query from
347// this socket is not yours, don't even think about deleting it
348// but after you call 'returnSocket' on it, don't assume anything anymore
349class UDPClientSocks
350{
4ef015cd 351 unsigned int d_numsocks;
4ef015cd 352public:
e2642526 353 UDPClientSocks() : d_numsocks(0)
4ef015cd
BH
354 {
355 }
356
996c89cc 357 typedef set<int> socks_t;
4ef015cd
BH
358 socks_t d_socks;
359
2ee280cf 360 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
d8f6d49f 361 int getSocket(const ComboAddress& toaddr, int* fd)
4ef015cd 362 {
d8f6d49f
BH
363 *fd=makeClientSocket(toaddr.sin4.sin_family);
364 if(*fd < 0) // temporary error - receive exception otherwise
2ee280cf 365 return -2;
d8f6d49f
BH
366
367 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
368 int err = errno;
41ff43f8 369 // returnSocket(*fd);
3897b9e1 370 closesocket(*fd);
d8f6d49f 371 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
4957a608 372 return -2;
998a4334 373 return -1;
d8f6d49f 374 }
998a4334 375
d8f6d49f 376 d_socks.insert(*fd);
998a4334 377 d_numsocks++;
d8f6d49f 378 return 0;
4ef015cd
BH
379 }
380
095c3045
BH
381 void returnSocket(int fd)
382 {
383 socks_t::iterator i=d_socks.find(fd);
34801ab1 384 if(i==d_socks.end()) {
335da0ba 385 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd)+") not in the pool");
34801ab1 386 }
bb4bdbaf 387 returnSocketLocked(i);
095c3045
BH
388 }
389
4ef015cd 390 // return a socket to the pool, or simply erase it
bb4bdbaf 391 void returnSocketLocked(socks_t::iterator& i)
4ef015cd 392 {
600fc20b 393 if(i==d_socks.end()) {
3f81d239 394 throw PDNSException("Trying to return a socket not in the pool");
600fc20b 395 }
80baf329 396 try {
bb4bdbaf 397 t_fdm->removeReadFD(*i);
80baf329
BH
398 }
399 catch(FDMultiplexerException& e) {
bb4bdbaf 400 // we sometimes return a socket that has not yet been assigned to t_fdm
80baf329 401 }
3897b9e1 402 closesocket(*i);
3ddb9247 403
998a4334
BH
404 d_socks.erase(i++);
405 --d_numsocks;
4ef015cd 406 }
d8f6d49f
BH
407
408 // returns -1 for errors which might go away, throws for ones that won't
bb4bdbaf 409 static int makeClientSocket(int family)
d8f6d49f 410 {
a903b39c 411 int ret=(int)socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
42c235e5 412
d8f6d49f
BH
413 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
414 return ret;
3ddb9247
PD
415
416 if(ret<0)
335da0ba 417 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
36855b53 418
3897b9e1 419 setCloseOnExec(ret);
5a38281c 420
d8f6d49f 421 int tries=10;
3aa91c3e 422 ComboAddress sin;
d8f6d49f 423 while(--tries) {
1652a63e 424 uint16_t port;
3ddb9247 425
d8f6d49f 426 if(tries==1) // fall back to kernel 'random'
4957a608 427 port = 0;
1652a63e
BH
428 else
429 port = 1025 + dns_random(64510);
5a38281c 430
3aa91c3e 431 sin=getQueryLocalAddress(family, port); // does htons for us
5a38281c 432
3ddb9247 433 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
4957a608 434 break;
d8f6d49f
BH
435 }
436 if(!tries)
3aa91c3e 437 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
3ddb9247 438
3897b9e1 439 setNonBlocking(ret);
d8f6d49f
BH
440 return ret;
441 }
49a699c4
BH
442};
443
444static __thread UDPClientSocks* t_udpclientsocks;
4ef015cd 445
288f4aa9 446/* these two functions are used by LWRes */
34801ab1 447// -2 is OS error, -1 is error that depends on the remote, > 0 is success
3ddb9247
PD
448int asendto(const char *data, int len, int flags,
449 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
288f4aa9 450{
34801ab1
BH
451
452 PacketID pident;
787e5eab
BH
453 pident.domain = domain;
454 pident.remote = toaddr;
455 pident.type = qtype;
34801ab1
BH
456
457 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
458 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
459
460 for(; chain.first != chain.second; chain.first++) {
461 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
e27e91a8 462 /*
4665c31e
BH
463 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
464 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
4957a608 465 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
e27e91a8 466 */
34801ab1
BH
467 chain.first->key.chain.insert(id); // we can chain
468 *fd=-1; // gets used in waitEvent / sendEvent later on
469 return 1;
470 }
471 }
472
49a699c4 473 int ret=t_udpclientsocks->getSocket(toaddr, fd);
d8f6d49f
BH
474 if(ret < 0)
475 return ret;
34801ab1 476
998a4334
BH
477 pident.fd=*fd;
478 pident.id=id;
3ddb9247 479
bb4bdbaf
BH
480 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
481 ret = send(*fd, data, len, 0);
482
5b0ddd18 483 int tmp = errno;
bb4bdbaf 484
7302ed0a 485 if(ret < 0)
49a699c4 486 t_udpclientsocks->returnSocket(*fd);
bb4bdbaf 487
5b0ddd18 488 errno = tmp; // this is for logging purposes only
7302ed0a 489 return ret;
288f4aa9
BH
490}
491
9170fbaf 492// -1 is error, 0 is timeout, 1 is success
3ddb9247 493int arecvfrom(char *data, int len, int flags, const ComboAddress& fromaddr, int *d_len,
c5c066bf 494 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
288f4aa9 495{
0d5f0a9f 496 static optional<unsigned int> nearMissLimit;
3ddb9247 497 if(!nearMissLimit)
0d5f0a9f
BH
498 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
499
288f4aa9 500 PacketID pident;
4ef015cd 501 pident.fd=fd;
288f4aa9 502 pident.id=id;
0d5f0a9f 503 pident.domain=domain;
787e5eab 504 pident.type = qtype;
996c89cc 505 pident.remote=fromaddr;
b636533b 506
288f4aa9 507 string packet;
5b0ddd18 508 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
34801ab1 509
9170fbaf 510 if(ret > 0) {
996c89cc 511 if(packet.empty()) // means "error"
3ddb9247 512 return -1;
998a4334 513
705f31ae 514 *d_len=(int)packet.size();
9170fbaf 515 memcpy(data,packet.c_str(),min(len,*d_len));
0d5f0a9f 516 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
996c89cc 517 L<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
0d5f0a9f 518 g_stats.spoofCount++;
35ce8576
BH
519 return -1;
520 }
288f4aa9 521 }
09e6702a 522 else {
34801ab1 523 if(fd >= 0)
49a699c4 524 t_udpclientsocks->returnSocket(fd);
09e6702a 525 }
9170fbaf 526 return ret;
288f4aa9
BH
527}
528
aa4e4cbf 529
87a5ea63 530string s_pidfname;
88def049
BH
531static void writePid(void)
532{
191f2e47 533 if(!::arg().mustDo("write-pid"))
534 return;
18e7758c 535 ofstream of(s_pidfname.c_str(), std::ios_base::app);
88def049 536 if(of)
705f31ae 537 of<< Utility::getpid() <<endl;
88def049 538 else
c057bfaa 539 L<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
88def049
BH
540}
541
bd0289fc
BH
542typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
543tcpClientCounts_t __thread* t_tcpClientCounts;
0e9d9ce2 544
cd989c87 545TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : d_remote(addr), d_fd(fd)
3ddb9247
PD
546{
547 ++s_currentConnections;
cd989c87 548 (*t_tcpClientCounts)[d_remote]++;
0e408828 549}
cd989c87
BH
550
551TCPConnection::~TCPConnection()
0e408828 552{
3ddb9247 553 if(closesocket(d_fd) < 0)
cd989c87 554 unixDie("closing socket for TCPConnection");
3ddb9247 555 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
cd989c87 556 t_tcpClientCounts->erase(d_remote);
1bc9e6bd 557 --s_currentConnections;
0e408828 558}
0e9d9ce2 559
3ddb9247 560AtomicCounter TCPConnection::s_currentConnections;
d8f6d49f 561void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
6dcd28c3 562
92011b8f 563// the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
c5c066bf 564void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
2cc13433 565{
92011b8f 566 if(packetsize > 1000 && t_largeanswerremotes)
567 t_largeanswerremotes->push_back(remote);
2cc13433
BH
568 switch(res) {
569 case RCode::ServFail:
92011b8f 570 if(t_servfailremotes) {
571 t_servfailremotes->push_back(remote);
572 if(query) // packet cache
573 t_servfailqueryring->push_back(make_pair(*query, qtype));
574 }
2cc13433
BH
575 g_stats.servFails++;
576 break;
577 case RCode::NXDomain:
578 g_stats.nxDomains++;
579 break;
580 case RCode::NoError:
581 g_stats.noErrors++;
582 break;
583 }
584}
585
a903b39c 586static string makeLoginfo(DNSComboWriter* dc)
587try
588{
c5c066bf 589 return "("+dc->d_mdp.d_qname.toString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->d_remote.toString())+")";
a903b39c 590}
591catch(...)
592{
593 return "Exception making error message for exception";
594}
595
288f4aa9
BH
596void startDoResolve(void *p)
597{
7b1469bb 598 DNSComboWriter* dc=(DNSComboWriter *)p;
288f4aa9 599 try {
92011b8f 600 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
601
b18ace73 602 uint32_t maxanswersize= dc->d_tcp ? 65535 : min((uint16_t) 512, g_udpTruncationThreshold);
7f7b8d55 603 EDNSOpts edo;
8e079f3a 604 bool haveEDNS=false;
605 if(getEDNSOpts(dc->d_mdp, &edo)) {
606 if(!dc->d_tcp)
607 maxanswersize = min(edo.d_packetsize, g_udpTruncationThreshold);
608 haveEDNS=true;
10321a98 609 }
e325f20c 610 vector<DNSRecord> ret;
ea634573 611 vector<uint8_t> packet;
b23b8614 612
ad42489c 613 auto luaconfsLocal = g_luaconfs.getLocal();
614
3ddb9247 615 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
ea634573
BH
616
617 pw.getHeader()->aa=0;
618 pw.getHeader()->ra=1;
c154c8a4 619 pw.getHeader()->qr=1;
bb4bdbaf 620 pw.getHeader()->tc=0;
ea634573 621 pw.getHeader()->id=dc->d_mdp.d_header.id;
10321a98 622 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
57769f13 623 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
ea634573 624
904d3219
PD
625 uint32_t minTTL=std::numeric_limits<uint32_t>::max();
626
627 SyncRes sr(dc->d_now);
3457a2a0 628 if(t_pdl) {
629 sr.setLuaEngine(*t_pdl);
4ea94941 630 sr.d_requestor=dc->d_remote;
3457a2a0 631 }
57769f13 632
633 if(pw.getHeader()->cd || edo.d_Z & EDNSOpts::DNSSECOK)
634 sr.d_doDNSSEC=true;
635
904d3219
PD
636 bool tracedQuery=false; // we could consider letting Lua know about this too
637 bool variableAnswer = false;
638
56b4d21b 639 int res;
39ec5d29 640 DNSFilterEngine::Policy dfepol;
641 DNSRecord spoofed;
e661a20b 642 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
56b4d21b
PD
643 pw.getHeader()->tc = 1;
644 res = 0;
645 variableAnswer = true;
e661a20b
PD
646 goto sendit;
647 }
648
c5c066bf 649 if(t_traceRegex->get() && (*t_traceRegex)->match(dc->d_mdp.d_qname.toString())) {
77499b05
BH
650 sr.setLogMode(SyncRes::Store);
651 tracedQuery=true;
652 }
3ddb9247 653
8f7473d7 654
77499b05 655 if(!g_quiet || tracedQuery)
461df9d2 656 L<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
8a63d3ce 657 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote()<<endl;
c75a6a9e 658
fededf47 659 sr.setId(MT->getTid());
67828389 660 if(!dc->d_mdp.d_header.rd)
c836dc19
BH
661 sr.setCacheOnly();
662
84433b79 663
3ddb9247 664 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
e325f20c 665
ad42489c 666 dfepol = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_remote);
39ec5d29 667
668 switch(dfepol.d_kind) {
669 case DNSFilterEngine::PolicyKind::NoAction:
644dd1da 670 break;
39ec5d29 671 case DNSFilterEngine::PolicyKind::Drop:
644dd1da 672 g_stats.policyDrops++;
673 delete dc;
674 dc=0;
675 return;
39ec5d29 676 case DNSFilterEngine::PolicyKind::NXDOMAIN:
644dd1da 677 res=RCode::NXDomain;
678 goto haveAnswer;
679
39ec5d29 680 case DNSFilterEngine::PolicyKind::NODATA:
681 res=RCode::NoError;
682 goto haveAnswer;
683
684 case DNSFilterEngine::PolicyKind::Custom:
644dd1da 685 res=RCode::NoError;
39ec5d29 686 spoofed.d_name=dc->d_mdp.d_qname;
5a1f298f 687 spoofed.d_type=dfepol.d_custom->getType();
3876ee44 688 spoofed.d_ttl = dfepol.d_ttl;
39ec5d29 689 spoofed.d_class = 1;
690 spoofed.d_content = dfepol.d_custom;
589ad24b 691 spoofed.d_place = DNSResourceRecord::ANSWER;
39ec5d29 692 ret.push_back(spoofed);
644dd1da 693 goto haveAnswer;
694
39ec5d29 695
696 case DNSFilterEngine::PolicyKind::Truncate:
644dd1da 697 if(!dc->d_tcp) {
698 res=RCode::NoError;
699 pw.getHeader()->tc=1;
700 goto haveAnswer;
701 }
702 break;
703 }
704
808c5ef7 705
a6147cd2 706 if(!t_pdl->get() || !(*t_pdl)->preresolve(dc->d_remote, dc->d_local, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret, res, &variableAnswer)) {
44971ca0
PD
707 try {
708 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
709 }
710 catch(ImmediateServFailException &e) {
854d44e3 711 if(g_logCommonErrors)
712 L<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
44971ca0
PD
713 res = RCode::ServFail;
714 }
4485aa35 715
ad42489c 716 dfepol = luaconfsLocal->dfe.getPostPolicy(ret);
39ec5d29 717 switch(dfepol.d_kind) {
718 case DNSFilterEngine::PolicyKind::NoAction:
644dd1da 719 break;
39ec5d29 720 case DNSFilterEngine::PolicyKind::Drop:
644dd1da 721 g_stats.policyDrops++;
722 delete dc;
723 dc=0;
724 return;
39ec5d29 725 case DNSFilterEngine::PolicyKind::NXDOMAIN:
644dd1da 726 ret.clear();
727 res=RCode::NXDomain;
728 goto haveAnswer;
729
39ec5d29 730 case DNSFilterEngine::PolicyKind::NODATA:
644dd1da 731 ret.clear();
732 res=RCode::NoError;
733 goto haveAnswer;
734
39ec5d29 735 case DNSFilterEngine::PolicyKind::Truncate:
644dd1da 736 if(!dc->d_tcp) {
737 ret.clear();
738 res=RCode::NoError;
739 pw.getHeader()->tc=1;
740 goto haveAnswer;
741 }
742 break;
39ec5d29 743
744 case DNSFilterEngine::PolicyKind::Custom:
ad42489c 745 ret.clear();
39ec5d29 746 res=RCode::NoError;
747 spoofed.d_name=dc->d_mdp.d_qname;
5a1f298f 748 spoofed.d_type=dfepol.d_custom->getType();
3876ee44 749 spoofed.d_ttl = dfepol.d_ttl;
39ec5d29 750 spoofed.d_class = 1;
751 spoofed.d_content = dfepol.d_custom;
589ad24b 752 spoofed.d_place = DNSResourceRecord::ANSWER;
39ec5d29 753 ret.push_back(spoofed);
754 goto haveAnswer;
644dd1da 755 }
a3e7b735 756
674cf0f6 757 if(t_pdl->get()) {
bd53ea9d 758 if(res == RCode::NoError) {
e325f20c 759 auto i=ret.cbegin();
760 for(; i!= ret.cend(); ++i)
e693ff5a 761 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
232f0877 762 break;
e325f20c 763 if(i == ret.cend())
a6147cd2 764 (*t_pdl)->nodata(dc->d_remote, dc->d_local, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret, res, &variableAnswer);
a3e7b735 765 }
766 else if(res == RCode::NXDomain)
a6147cd2 767 (*t_pdl)->nxdomain(dc->d_remote, dc->d_local, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret, res, &variableAnswer);
644dd1da 768
a3e7b735 769
a6147cd2 770 (*t_pdl)->postresolve(dc->d_remote, dc->d_local, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret, res, &variableAnswer);
93f0da94 771
d2322a5e 772 }
4485aa35 773 }
644dd1da 774 haveAnswer:;
3e8216c8 775 if(res == PolicyDecision::DROP) {
e9c2ad3a 776 g_stats.policyDrops++;
ae7e77ad 777 delete dc;
778 dc=0;
779 return;
3ddb9247 780 }
3e8216c8 781 if(tracedQuery || res == PolicyDecision::PASS || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1dc8f4d0 782 {
85ffbc53
PD
783 string trace(sr.getTrace());
784 if(!trace.empty()) {
785 vector<string> lines;
786 boost::split(lines, trace, boost::is_any_of("\n"));
1dc8f4d0 787 for(const string& line : lines) {
85ffbc53
PD
788 if(!line.empty())
789 L<<Logger::Warning<< line << endl;
790 }
791 }
792 }
3ddb9247 793
b3f0ed10 794 if(res == PolicyDecision::PASS) { // XXX what does this MEAN? Why servfail on PASS?
0fe1d080
PD
795 pw.getHeader()->rcode=RCode::ServFail;
796 // no commit here, because no record
797 g_stats.servFails++;
798 }
288f4aa9 799 else {
ea634573 800 pw.getHeader()->rcode=res;
92011b8f 801
8e079f3a 802 if(haveEDNS) {
12ce523e 803 if(g_dnssecmode != DNSSECMode::Off && ((edo.d_Z & EDNSOpts::DNSSECOK) || g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog)) {
8e079f3a 804 auto state=validateRecords(ret);
805 if(state == Secure) {
806 pw.getHeader()->ad=1;
807 }
808 else if(state == Insecure) {
809 pw.getHeader()->ad=0;
810 }
811 else if(state == Bogus && !pw.getHeader()->cd) {
12ce523e 812 if(g_dnssecmode == DNSSECMode::ValidateAll || (edo.d_Z & EDNSOpts::DNSSECOK)) {
813 pw.getHeader()->rcode=RCode::ServFail;
814 goto sendit;
815 }
816 else {
817 L<<Logger::Warning<<"Failed to validate "<<dc->d_mdp.d_qname<<" for "<<dc->d_remote.toStringWithPort()<<endl;
818 }
8e079f3a 819 }
b3f0ed10 820 }
821 }
822
8e079f3a 823
824
c154c8a4 825 if(ret.size()) {
92476c8b 826 orderAndShuffle(ret);
ad42489c 827 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_remote)) {
3e61e7f7 828 sort(ret.begin(), ret.end(), *sl);
829 variableAnswer=true;
830 }
8e079f3a 831 }
832 if(haveEDNS) {
833 ret.push_back(makeOpt(edo.d_packetsize, 0, edo.d_Z));
834 }
835
836 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
837 pw.startRecord(i->d_name, i->d_type, i->d_ttl, i->d_class, i->d_place);
838 if(i->d_type != QType::OPT) // their TTL ain't real
839 minTTL = min(minTTL, i->d_ttl);
840 i->d_content->toPacket(pw);
841 if(pw.size() > maxanswersize) {
842 pw.rollback();
843 if(i->d_place==DNSResourceRecord::ANSWER) // only truncate if we actually omitted parts of the answer
add935a2 844 {
4957a608 845 pw.getHeader()->tc=1;
add935a2
PD
846 pw.truncate();
847 }
8e079f3a 848 goto sendit; // need to jump over pw.commit
849 }
ea634573 850 }
8e079f3a 851 if(ret.size())
852 pw.commit();
288f4aa9 853 }
10321a98 854 sendit:;
b3f0ed10 855
856
79332bff 857 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
92011b8f 858 updateResponseStats(res, dc->d_remote, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
ea634573 859 if(!dc->d_tcp) {
b71b60ee 860 struct msghdr msgh;
861 struct iovec iov;
862 char cbuf[256];
863 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
cbc03320 864 if(g_fromtosockets.count(dc->d_socket)) {
fbe2a2e0 865 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
cbc03320 866 } else
579cae19 867 msgh.msg_control=NULL;
cbc03320 868 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
869 L<<Logger::Warning<<"Sending UDP reply to client "<<dc->d_remote.toStringWithPort()<<" failed with: "<<strerror(errno)<<endl;
870
3762e821 871 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
79332bff 872 t_packetCache->insertResponsePacket(string((const char*)&*packet.begin(), packet.size()),
12ce523e 873 (edo.d_Z & EDNSOpts::DNSSECOK), // ponder filtering on dnssecmode here
3ddb9247
PD
874 g_now.tv_sec,
875 min(minTTL,
79332bff 876 (pw.getHeader()->rcode == RCode::ServFail) ? SyncRes::s_packetcacheservfailttl : SyncRes::s_packetcachettl
3ddb9247 877 )
79332bff 878 );
1051f8a9 879 }
3762e821 880 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
feccc9fc 881 }
9c495589
BH
882 else {
883 char buf[2];
ea634573
BH
884 buf[0]=packet.size()/256;
885 buf[1]=packet.size()%256;
feccc9fc 886
c038218b 887 Utility::iovec iov[2];
feccc9fc 888
ea634573
BH
889 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
890 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
feccc9fc 891
c038218b 892 int ret=Utility::writev(dc->d_socket, iov, 2);
0e9d9ce2 893 bool hadError=true;
feccc9fc 894
3ddb9247 895 if(ret == 0)
18af64a8 896 L<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
3ddb9247 897 else if(ret < 0 )
18af64a8 898 L<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
ea634573 899 else if((unsigned int)ret != 2 + packet.size())
18af64a8 900 L<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<ret<<")"<<endl;
0e9d9ce2 901 else
18af64a8 902 hadError=false;
3ddb9247 903
09e6702a 904 // update tcp connection status, either by closing or moving to 'BYTE0'
3ddb9247 905
09e6702a 906 if(hadError) {
18af64a8 907 // no need to remove us from FDM, we weren't there
c36bc97a 908 dc->d_socket = -1;
09e6702a 909 }
a6ae6414 910 else {
cd989c87 911 dc->d_tcpConnection->state=TCPConnection::BYTE0;
18af64a8 912 Utility::gettimeofday(&g_now, 0); // needs to be updated
cd989c87
BH
913 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection);
914 t_fdm->setReadTTD(dc->d_socket, g_now, g_tcpTimeout);
0e9d9ce2 915 }
9c495589 916 }
3ddb9247 917
1d5b3ce6 918 if(!g_quiet) {
461df9d2 919 L<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
ea634573 920 L<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
9de3e034 921 sr.d_totUsec/1000.0<<" ms, "<<
922 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<<res<<endl;
c75a6a9e 923 }
b23b8614 924
3ddb9247 925 sr.d_outqueries ? t_RC->cacheMisses++ : t_RC->cacheHits++;
fe213470
BH
926 float spent=makeFloat(sr.d_now-dc->d_now);
927 if(spent < 0.001)
928 g_stats.answers0_1++;
929 else if(spent < 0.010)
930 g_stats.answers1_10++;
931 else if(spent < 0.1)
932 g_stats.answers10_100++;
933 else if(spent < 1.0)
934 g_stats.answers100_1000++;
935 else
936 g_stats.answersSlow++;
937
574af7ea 938 uint64_t newLat=(uint64_t)(spent*1000000);
b841314c 939 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
08f3f638 940 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
0a6b1027 941 // no worries, we do this for packet cache hits elsewhere
c6d04bdc 942 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
ea634573 943 delete dc;
c36bc97a 944 dc=0;
288f4aa9 945 }
3f81d239 946 catch(PDNSException &ae) {
a903b39c 947 L<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
c36bc97a 948 delete dc;
288f4aa9 949 }
7b1469bb 950 catch(MOADNSException& e) {
a903b39c 951 L<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<e.what()<<endl;
c36bc97a 952 delete dc;
7b1469bb 953 }
fdbf35ac 954 catch(std::exception& e) {
a903b39c 955 L<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what()<<endl;
c36bc97a 956 delete dc;
c154c8a4 957 }
288f4aa9 958 catch(...) {
a903b39c 959 L<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
288f4aa9 960 }
3ddb9247 961
ec6eacbc 962 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
288f4aa9
BH
963}
964
677e2a46 965void makeControlChannelSocket(int processNum=-1)
1d5b3ce6 966{
2d733c0f 967 string sockname=::arg()["socket-dir"]+"/"+s_programname;
677e2a46 968 if(processNum >= 0)
335da0ba 969 sockname += "."+std::to_string(processNum);
677e2a46 970 sockname+=".controlsocket";
41f7a068 971 s_rcc.listen(sockname);
3ddb9247 972
387de317
BH
973 int sockowner = -1;
974 int sockgroup = -1;
975
976 if (!::arg().isEmpty("socket-group"))
977 sockgroup=::arg().asGid("socket-group");
978 if (!::arg().isEmpty("socket-owner"))
979 sockowner=::arg().asUid("socket-owner");
3ddb9247 980
f838ad8d
BH
981 if (sockgroup > -1 || sockowner > -1) {
982 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
983 unixDie("Failed to chown control socket");
984 }
985 }
387de317
BH
986
987 // do mode change if socket-mode is given
988 if(!::arg().isEmpty("socket-mode")) {
989 mode_t sockmode=::arg().asMode("socket-mode");
990 chmod(sockname.c_str(), sockmode);
991 }
1d5b3ce6
BH
992}
993
d8f6d49f 994void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 995{
cd989c87 996 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
c038218b 997
879b3f70 998 if(conn->state==TCPConnection::BYTE0) {
b841314c 999 ssize_t bytes=recv(conn->getFD(), conn->data, 2, 0);
09e6702a 1000 if(bytes==1)
667f7e60 1001 conn->state=TCPConnection::BYTE1;
3ddb9247 1002 if(bytes==2) {
a0aa4f64 1003 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
667f7e60
BH
1004 conn->bytesread=0;
1005 conn->state=TCPConnection::GETQUESTION;
09e6702a
BH
1006 }
1007 if(!bytes || bytes < 0) {
bb4bdbaf 1008 t_fdm->removeReadFD(fd);
09e6702a
BH
1009 return;
1010 }
1011 }
667f7e60 1012 else if(conn->state==TCPConnection::BYTE1) {
b841314c 1013 ssize_t bytes=recv(conn->getFD(), conn->data+1, 1, 0);
09e6702a 1014 if(bytes==1) {
667f7e60 1015 conn->state=TCPConnection::GETQUESTION;
a0aa4f64 1016 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
667f7e60 1017 conn->bytesread=0;
09e6702a
BH
1018 }
1019 if(!bytes || bytes < 0) {
1020 if(g_logCommonErrors)
cd989c87 1021 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected after first byte"<<endl;
bb4bdbaf 1022 t_fdm->removeReadFD(fd);
09e6702a
BH
1023 return;
1024 }
1025 }
667f7e60 1026 else if(conn->state==TCPConnection::GETQUESTION) {
b841314c
RG
1027 ssize_t bytes=recv(conn->getFD(), conn->data + conn->bytesread, conn->qlen - conn->bytesread, 0);
1028 if(!bytes || bytes < 0 || bytes > UINT16_MAX) {
cd989c87 1029 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected while reading question body"<<endl;
bb4bdbaf 1030 t_fdm->removeReadFD(fd);
09e6702a
BH
1031 return;
1032 }
b841314c 1033 conn->bytesread+=(uint16_t)bytes;
667f7e60 1034 if(conn->bytesread==conn->qlen) {
bb4bdbaf 1035 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
879b3f70 1036
09e6702a
BH
1037 DNSComboWriter* dc=0;
1038 try {
cd989c87 1039 dc=new DNSComboWriter(conn->data, conn->qlen, g_now);
09e6702a
BH
1040 }
1041 catch(MOADNSException &mde) {
3ddb9247 1042 g_stats.clientParseError++;
4957a608 1043 if(g_logCommonErrors)
cd989c87 1044 L<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toString() <<endl;
4957a608 1045 return;
09e6702a 1046 }
cd989c87
BH
1047 dc->d_tcpConnection = conn; // carry the torch
1048 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
09e6702a 1049 dc->d_tcp=true;
cd989c87 1050 dc->setRemote(&conn->d_remote);
a6147cd2 1051 ComboAddress dest;
1052 memset(&dest, 0, sizeof(dest));
1053 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1054 socklen_t len = dest.getSocklen();
1055 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1056 dc->setLocal(dest);
1057
879b3f70 1058 if(dc->d_mdp.d_header.qr) {
4957a608 1059 delete dc;
048f5db6 1060 g_stats.ignoredCount++;
4328f463 1061 L<<Logger::Error<<"Ignoring answer from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
4957a608 1062 return;
879b3f70 1063 }
3abcdab2
PD
1064 if(dc->d_mdp.d_header.opcode) {
1065 delete dc;
048f5db6 1066 g_stats.ignoredCount++;
4328f463 1067 L<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
3abcdab2
PD
1068 return;
1069 }
09e6702a 1070 else {
4957a608
BH
1071 ++g_stats.qcounter;
1072 ++g_stats.tcpqcounter;
50a5ef72 1073 MT->makeThread(startDoResolve, dc); // deletes dc, will set state to BYTE0 again
4957a608 1074 return;
09e6702a
BH
1075 }
1076 }
1077 }
1078}
1079
6dcd28c3 1080//! Handle new incoming TCP connection
d8f6d49f 1081void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
09e6702a 1082{
37d3f960 1083 ComboAddress addr;
09e6702a 1084 socklen_t addrlen=sizeof(addr);
705f31ae 1085 int newsock=(int)accept(fd, (struct sockaddr*)&addr, &addrlen);
b841314c 1086 if(newsock>=0) {
85c32340
BH
1087 if(MT->numProcesses() > g_maxMThreads) {
1088 g_stats.overCapacityDrops++;
3897b9e1 1089 closesocket(newsock);
85c32340
BH
1090 return;
1091 }
1092
92011b8f 1093 if(t_remotes)
1094 t_remotes->push_back(addr);
49a699c4 1095 if(t_allowFrom && !t_allowFrom->match(&addr)) {
3ddb9247 1096 if(!g_quiet)
4957a608 1097 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2914b022 1098
09e6702a 1099 g_stats.unauthorizedTCP++;
3897b9e1 1100 closesocket(newsock);
09e6702a
BH
1101 return;
1102 }
bd0289fc 1103 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
09e6702a 1104 g_stats.tcpClientOverflow++;
3897b9e1 1105 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
09e6702a
BH
1106 return;
1107 }
3ddb9247 1108
3897b9e1 1109 setNonBlocking(newsock);
cd989c87
BH
1110 shared_ptr<TCPConnection> tc(new TCPConnection(newsock, addr));
1111 tc->state=TCPConnection::BYTE0;
3ddb9247 1112
cd989c87 1113 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc);
c038218b 1114
0bff046b 1115 struct timeval now;
c038218b 1116 Utility::gettimeofday(&now, 0);
cd989c87 1117 t_fdm->setReadTTD(tc->getFD(), now, g_tcpTimeout);
09e6702a
BH
1118 }
1119}
3ddb9247 1120
b71b60ee 1121string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
1bc3c142 1122{
183eb877 1123 gettimeofday(&g_now, 0);
b71b60ee 1124 struct timeval diff = g_now - tv;
1125 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
183eb877 1126
22cf1fda 1127 if(tv.tv_sec && delta > 1000.0) {
b71b60ee 1128 g_stats.tooOldDrops++;
1129 return 0;
1130 }
1131
1bc3c142 1132 ++g_stats.qcounter;
d7f10541
BH
1133 if(fromaddr.sin4.sin_family==AF_INET6)
1134 g_stats.ipv6qcounter++;
1bc3c142
BH
1135
1136 string response;
93f0da94 1137 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
1bc3c142
BH
1138 try {
1139 uint32_t age;
8f7473d7 1140#ifdef MALLOC_TRACE
1141 /*
1142 static uint64_t last=0;
1143 if(!last)
1144 g_mtracer->clearAllocators();
1145 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
1146 last=g_mtracer->getAllocs();
1147 cout<<g_mtracer->topAllocatorsString()<<endl;
1148 g_mtracer->clearAllocators();
1149 */
1150#endif
12ce523e 1151 bool needsDNSSEC=false;
93f0da94 1152
12ce523e 1153 if(dh->arcount) {
1154 unsigned int consumed=0;
1155 DNSName qname(question.c_str(), question.length(), sizeof(dnsheader), false, 0, 0, &consumed);
1156 if(question.size() > (consumed+12+11) && ((question[consumed+12+11]&0x80)==0x80))
1157 needsDNSSEC=true;
1158 }
1159
1160 if(!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(question, needsDNSSEC, g_now.tv_sec, &response, &age)) {
1bc3c142 1161 if(!g_quiet)
d738f00f 1162 L<<Logger::Notice<<t_id<< " question answered from packet cache from "<<fromaddr.toString()<<endl;
92011b8f 1163 // t_queryring->push_back("packetcached");
3ddb9247 1164
8f7473d7 1165
1166
1bc3c142
BH
1167 g_stats.packetCacheHits++;
1168 SyncRes::s_queries++;
1169 ageDNSPacket(response, age);
b71b60ee 1170 struct msghdr msgh;
1171 struct iovec iov;
1172 char cbuf[256];
1173 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
cbc03320 1174 if(g_fromtosockets.count(fd)) {
fbe2a2e0 1175 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
b71b60ee 1176 }
579cae19
PD
1177 else {
1178 msgh.msg_control=NULL;
1179 }
cbc03320 1180 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
1181 L<<Logger::Warning<<"Sending UDP reply to client "<<fromaddr.toStringWithPort()<<" failed with: "<<strerror(errno)<<endl;
b71b60ee 1182
97bee66d
BH
1183 if(response.length() >= sizeof(struct dnsheader)) {
1184 struct dnsheader dh;
1185 memcpy(&dh, response.c_str(), sizeof(dh));
92011b8f 1186 updateResponseStats(dh.rcode, fromaddr, response.length(), 0, 0);
97bee66d 1187 }
08f3f638 1188 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
1bc3c142
BH
1189 return 0;
1190 }
3ddb9247 1191 }
1bc3c142
BH
1192 catch(std::exception& e) {
1193 L<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1194 return 0;
1195 }
3ddb9247 1196
4ea94941 1197 if(t_pdl->get()) {
93f0da94 1198 if((*t_pdl)->ipfilter(fromaddr, destaddr, *dh)) {
4ea94941 1199 if(!g_quiet)
1200 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<" based on policy"<<endl;
1201 g_stats.policyDrops++;
1202 return 0;
1203 }
1204 }
1205
1bc3c142 1206 if(MT->numProcesses() > g_maxMThreads) {
461df9d2 1207 if(!g_quiet)
854d44e3 1208 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<", over capacity"<<endl;
461df9d2 1209
1bc3c142
BH
1210 g_stats.overCapacityDrops++;
1211 return 0;
1212 }
3ddb9247 1213
1bc3c142
BH
1214 DNSComboWriter* dc = new DNSComboWriter(question.c_str(), question.size(), g_now);
1215 dc->setSocket(fd);
1216 dc->setRemote(&fromaddr);
b71b60ee 1217 dc->setLocal(destaddr);
1bc3c142
BH
1218 dc->d_tcp=false;
1219 MT->makeThread(startDoResolve, (void*) dc); // deletes dc
1220 return 0;
3ddb9247
PD
1221}
1222
b71b60ee 1223
d8f6d49f 1224void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
5db529f8 1225{
a9af3782 1226 int len;
5db529f8
BH
1227 char data[1500];
1228 ComboAddress fromaddr;
b71b60ee 1229 struct msghdr msgh;
1230 struct iovec iov;
1231 char cbuf[256];
1232
1233 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
1234 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), data, sizeof(data), &fromaddr);
1235
3ddb9247 1236 for(;;)
b71b60ee 1237 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
92011b8f 1238 if(t_remotes)
1239 t_remotes->push_back(fromaddr);
b23b8614 1240
49a699c4 1241 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
3ddb9247 1242 if(!g_quiet)
4957a608 1243 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2914b022 1244
5db529f8 1245 g_stats.unauthorizedUDP++;
a9af3782 1246 return;
5db529f8 1247 }
15c01deb 1248 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
81859ba5 1249 if(!fromaddr.sin4.sin_port) { // also works for IPv6
3ddb9247 1250 if(!g_quiet)
81859ba5 1251 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
1252
1253 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
1254 return;
1255 }
5db529f8 1256 try {
b23b8614 1257 dnsheader* dh=(dnsheader*)data;
3ddb9247 1258
b23b8614 1259 if(dh->qr) {
048f5db6 1260 g_stats.ignoredCount++;
4957a608
BH
1261 if(g_logCommonErrors)
1262 L<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
5db529f8 1263 }
3abcdab2 1264 else if(dh->opcode) {
048f5db6 1265 g_stats.ignoredCount++;
3abcdab2
PD
1266 if(g_logCommonErrors)
1267 L<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
1268 }
5db529f8 1269 else {
232f0877 1270 string question(data, len);
b71b60ee 1271 struct timeval tv={0,0};
1272 HarvestTimestamp(&msgh, &tv);
1273 ComboAddress dest;
1274 memset(&dest, 0, sizeof(dest)); // this makes sure we igore this address if not returned by recvmsg above
a6147cd2 1275 auto loc = rplookup(g_listenSocketsAddresses, fd);
1276 if(HarvestDestinationAddress(&msgh, &dest)) {
1277 // but.. need to get port too
1278 if(loc)
1279 dest.sin4.sin_port = loc->sin4.sin_port;
1280 }
1281 else {
1282 if(loc) {
1283 dest = *loc;
1284 }
1285 else {
1286 dest.sin4.sin_family = fromaddr.sin4.sin_family;
1287 socklen_t len = dest.getSocklen();
1288 getsockname(fd, (sockaddr*)&dest, &len); // if this fails, we're ok with it
1289 }
1290 }
232f0877 1291 if(g_weDistributeQueries)
b71b60ee 1292 distributeAsyncFunction(question, boost::bind(doProcessUDPQuestion, question, fromaddr, dest, tv, fd));
232f0877 1293 else
b71b60ee 1294 doProcessUDPQuestion(question, fromaddr, dest, tv, fd);
5db529f8
BH
1295 }
1296 }
1297 catch(MOADNSException& mde) {
3ddb9247 1298 g_stats.clientParseError++;
84e66a59 1299 if(g_logCommonErrors)
4957a608 1300 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
5db529f8 1301 }
0b602819
KM
1302 catch(std::runtime_error& e) {
1303 g_stats.clientParseError++;
1304 if(g_logCommonErrors)
1305 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
5db529f8
BH
1306 }
1307 }
ac0e821b
BH
1308 else {
1309 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
3ddb9247 1310 if(errno == EAGAIN)
9326cae1 1311 g_stats.noPacketError++;
bf3b0cec 1312 break;
ac0e821b 1313 }
5db529f8
BH
1314}
1315
1bc3c142 1316
5db529f8
BH
1317typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
1318deferredAdd_t deferredAdd;
1319
f28307ad 1320void makeTCPServerSockets()
9c495589 1321{
37d3f960 1322 int fd;
f28307ad 1323 vector<string>locals;
2e3d8a19 1324 stringtok(locals,::arg()["local-address"]," ,");
9c495589 1325
f28307ad 1326 if(locals.empty())
3f81d239 1327 throw PDNSException("No local address specified");
3ddb9247 1328
f28307ad 1329 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
1330 ServiceTuple st;
1331 st.port=::arg().asNum("local-port");
1332 parseService(*i, st);
3ddb9247 1333
32252594
BH
1334 ComboAddress sin;
1335
f28307ad 1336 memset((char *)&sin,0, sizeof(sin));
37d3f960 1337 sin.sin4.sin_family = AF_INET;
32252594 1338 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 1339 sin.sin6.sin6_family = AF_INET6;
f71bc087 1340 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 1341 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
37d3f960
BH
1342 }
1343
1344 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
3ddb9247 1345 if(fd<0)
3f81d239 1346 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
f28307ad 1347
3897b9e1 1348 setCloseOnExec(fd);
a903b39c 1349
f28307ad 1350 int tmp=1;
37d3f960 1351 if(setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&tmp,sizeof tmp)<0) {
f28307ad 1352 L<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
c8ddb7c2 1353 exit(1);
f28307ad 1354 }
0dfa94ab 1355 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
1356 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
1357 }
1358
c8ddb7c2 1359#ifdef TCP_DEFER_ACCEPT
37d3f960
BH
1360 if(setsockopt(fd, SOL_TCP,TCP_DEFER_ACCEPT,(char*)&tmp,sizeof tmp) >= 0) {
1361 if(i==locals.begin())
4957a608 1362 L<<Logger::Error<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
c8ddb7c2
BH
1363 }
1364#endif
1365
fec7dd5a
SS
1366 if( ::arg().mustDo("non-local-bind") )
1367 Utility::setBindAny(AF_INET, fd);
1368
32252594 1369 sin.sin4.sin_port = htons(st.port);
37d3f960 1370 int socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
3ddb9247 1371 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
3f81d239 1372 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
3ddb9247 1373
3897b9e1 1374 setNonBlocking(fd);
49a699c4 1375 setSocketSendBuffer(fd, 65000);
37d3f960 1376 listen(fd, 128);
5db529f8 1377 deferredAdd.push_back(make_pair(fd, handleNewTCPQuestion));
c2136bf0 1378 g_tcpListenSockets.push_back(fd);
84433b79 1379 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
1380 // - fd is not that which we know here, but returned from accept()
3ddb9247 1381 if(sin.sin4.sin_family == AF_INET)
32252594 1382 L<<Logger::Error<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 1383 else
32252594 1384 L<<Logger::Error<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 1385 }
9c495589
BH
1386}
1387
f28307ad 1388void makeUDPServerSockets()
288f4aa9 1389{
fec7dd5a 1390 int one=1;
f28307ad 1391 vector<string>locals;
2e3d8a19 1392 stringtok(locals,::arg()["local-address"]," ,");
288f4aa9 1393
f28307ad 1394 if(locals.empty())
3f81d239 1395 throw PDNSException("No local address specified");
3ddb9247 1396
f28307ad 1397 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
1398 ServiceTuple st;
1399 st.port=::arg().asNum("local-port");
1400 parseService(*i, st);
1401
37d3f960 1402 ComboAddress sin;
996c89cc 1403
37d3f960
BH
1404 memset(&sin, 0, sizeof(sin));
1405 sin.sin4.sin_family = AF_INET;
32252594 1406 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 1407 sin.sin6.sin6_family = AF_INET6;
f71bc087 1408 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 1409 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
37d3f960 1410 }
3ddb9247 1411
bb4bdbaf 1412 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
d3b4137e 1413 if(fd < 0) {
3f81d239 1414 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
d3b4137e 1415 }
915b0c39
AT
1416 if (!setSocketTimestamps(fd))
1417 L<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
0dfa94ab 1418
b71b60ee 1419 if(IsAnyAddress(sin)) {
cbc03320 1420 if(sin.sin4.sin_family == AF_INET)
1421 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
1422 g_fromtosockets.insert(fd);
757d3179 1423#ifdef IPV6_RECVPKTINFO
cbc03320 1424 if(sin.sin4.sin_family == AF_INET6)
1425 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
1426 g_fromtosockets.insert(fd);
757d3179 1427#endif
0dfa94ab 1428 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
1429 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
1430 }
b71b60ee 1431 }
fec7dd5a
SS
1432 if( ::arg().mustDo("non-local-bind") )
1433 Utility::setBindAny(AF_INET6, fd);
1434
3897b9e1 1435 setCloseOnExec(fd);
a903b39c 1436
4e9a20e6 1437 setSocketReceiveBuffer(fd, 250000);
32252594 1438 sin.sin4.sin_port = htons(st.port);
37d3f960 1439
a6147cd2 1440 int socklen=sin.getSocklen();
1441
3ddb9247 1442 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
335da0ba 1443 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
3ddb9247 1444
3897b9e1 1445 setNonBlocking(fd);
c2136bf0 1446
0aaecd50 1447 deferredAdd.push_back(make_pair(fd, handleNewUDPQuestion));
40a3dd64 1448 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
3ddb9247 1449 if(sin.sin4.sin_family == AF_INET)
32252594 1450 L<<Logger::Error<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 1451 else
32252594 1452 L<<Logger::Error<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 1453 }
c836dc19 1454}
caa6eefa 1455
9c495589 1456
c836dc19
BH
1457void daemonize(void)
1458{
1459 if(fork())
1460 exit(0); // bye bye
3ddb9247
PD
1461
1462 setsid();
c836dc19 1463
27a5ead5 1464 int i=open("/dev/null",O_RDWR); /* open stdin */
3ddb9247 1465 if(i < 0)
27a5ead5
BH
1466 L<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
1467 else {
1468 dup2(i,0); /* stdin */
1469 dup2(i,1); /* stderr */
1470 dup2(i,2); /* stderr */
1471 close(i);
1472 }
288f4aa9 1473}
caa6eefa 1474
cc59bce6 1475AtomicCounter counter;
c75a6a9e
BH
1476bool statsWanted;
1477
1478void usr1Handler(int)
1479{
1480 statsWanted=true;
1481}
ae1b2e98 1482
9170fbaf
BH
1483void usr2Handler(int)
1484{
f1f34cc2 1485 g_quiet= !g_quiet;
1486 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
1487 ::arg().set("quiet")=g_quiet ? "" : "no";
9170fbaf
BH
1488}
1489
c75a6a9e
BH
1490void doStats(void)
1491{
16beeaa4
BH
1492 static time_t lastOutputTime;
1493 static uint64_t lastQueryCount;
d299d4f5 1494
1495 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
1496 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
3ddb9247 1497
d299d4f5 1498 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
3427fa8a
BH
1499 L<<Logger::Warning<<"stats: "<<g_stats.qcounter<<" questions, "<<
1500 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
1501 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
3ddb9247
PD
1502 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
1503
3427fa8a
BH
1504 L<<Logger::Warning<<"stats: throttle map: "
1505 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
3ddb9247 1506 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
70c2c8b1
BH
1507 L<<Logger::Warning<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
1508 L<<Logger::Warning<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
525b8a7c 1509 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
3427fa8a
BH
1510 L<<Logger::Warning<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
1511 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
81883dcc 1512
16beeaa4
BH
1513 //L<<Logger::Warning<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
1514 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
3ddb9247 1515
16beeaa4
BH
1516 L<<Logger::Warning<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
1517 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
3ddb9247 1518
16beeaa4
BH
1519 time_t now = time(0);
1520 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
1521 L<<Logger::Warning<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
1522 }
1523 lastOutputTime = now;
1524 lastQueryCount = SyncRes::s_queries;
c75a6a9e 1525 }
3ddb9247 1526 else if(statsWanted)
70c2c8b1 1527 L<<Logger::Warning<<"stats: no stats yet!"<<endl;
7becf07f 1528
c75a6a9e
BH
1529 statsWanted=false;
1530}
c836dc19 1531
29f0b1ce 1532static void houseKeeping(void *)
c836dc19 1533{
d67620e4 1534 static __thread time_t last_stat, last_rootupdate, last_prune, last_secpoll;
8baca3fa 1535 static __thread int cleanCounter=0;
cc59bce6 1536 static __thread bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
1537 try {
1538 if(s_running)
1539 return;
1540 s_running=true;
3ddb9247 1541
cc59bce6 1542 struct timeval now;
1543 Utility::gettimeofday(&now, 0);
3ddb9247
PD
1544
1545 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
cc59bce6 1546 DTime dt;
1547 dt.setTimeval(now);
1548 t_RC->doPrune(); // this function is local to a thread, so fine anyhow
f8f243b0 1549 t_packetCache->doPruneTo(::arg().asNum("max-packetcache-entries") / g_numWorkerThreads);
3ddb9247 1550
f8f243b0 1551 pruneCollection(t_sstorage->negcache, ::arg().asNum("max-cache-entries") / (g_numWorkerThreads * 10), 200);
3ddb9247 1552
cc59bce6 1553 if(!((cleanCounter++)%40)) { // this is a full scan!
1554 time_t limit=now.tv_sec-300;
1555 for(SyncRes::nsspeeds_t::iterator i = t_sstorage->nsSpeeds.begin() ; i!= t_sstorage->nsSpeeds.end(); )
1556 if(i->second.stale(limit))
1557 t_sstorage->nsSpeeds.erase(i++);
1558 else
1559 ++i;
1560 }
1561 last_prune=time(0);
d67620e4 1562 }
3ddb9247 1563
cc59bce6 1564 if(now.tv_sec - last_rootupdate > 7200) {
1565 SyncRes sr(now);
1566 sr.setDoEDNS0(true);
e325f20c 1567 vector<DNSRecord> ret;
3ddb9247 1568
cc59bce6 1569 sr.setNoCache();
1570 int res=-1;
18b73338 1571 try {
6ed9a611 1572 res=sr.beginResolve(DNSName("."), QType(QType::NS), 1, ret);
cc59bce6 1573 }
3aa91c3e 1574 catch(PDNSException& e)
1575 {
1576 L<<Logger::Error<<"Failed to update . records, got an exception: "<<e.reason<<endl;
1577 }
1578
1579 catch(std::exception& e)
1580 {
1581 L<<Logger::Error<<"Failed to update . records, got an exception: "<<e.what()<<endl;
1582 }
1583
cc59bce6 1584 catch(...)
1585 {
1586 L<<Logger::Error<<"Failed to update . records, got an exception"<<endl;
1587 }
1588 if(!res) {
1589 L<<Logger::Notice<<"Refreshed . records"<<endl;
1590 last_rootupdate=now.tv_sec;
1591 }
1592 else
1593 L<<Logger::Error<<"Failed to update . records, RCODE="<<res<<endl;
1594 }
3ddb9247 1595
cc59bce6 1596 if(!t_id) {
3ddb9247 1597 if(now.tv_sec - last_stat >= 1800) {
cc59bce6 1598 doStats();
1599 last_stat=time(0);
1600 }
3ddb9247 1601
cc59bce6 1602 if(now.tv_sec - last_secpoll >= 3600) {
1603 try {
1604 doSecPoll(&last_secpoll);
1605 }
1606 catch(...) {}
18b73338 1607 }
d67620e4 1608 }
cc59bce6 1609 s_running=false;
d67620e4 1610 }
cc59bce6 1611 catch(PDNSException& ae)
1612 {
1613 s_running=false;
1614 L<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
1615 throw;
1616 }
779828c4 1617}
d6d5dea7 1618
49a699c4
BH
1619void makeThreadPipes()
1620{
c3828c03 1621 for(unsigned int n=0; n < g_numThreads; ++n) {
49a699c4
BH
1622 struct ThreadPipeSet tps;
1623 int fd[2];
1624 if(pipe(fd) < 0)
1625 unixDie("Creating pipe for inter-thread communications");
3ddb9247 1626
49a699c4
BH
1627 tps.readToThread = fd[0];
1628 tps.writeToThread = fd[1];
3ddb9247 1629
49a699c4
BH
1630 if(pipe(fd) < 0)
1631 unixDie("Creating pipe for inter-thread communications");
1632 tps.readFromThread = fd[0];
1633 tps.writeFromThread = fd[1];
3ddb9247 1634
49a699c4
BH
1635 g_pipes.push_back(tps);
1636 }
1637}
1638
00c9b8c1
BH
1639struct ThreadMSG
1640{
1641 pipefunc_t func;
1642 bool wantAnswer;
1643};
1644
49a699c4
BH
1645void broadcastFunction(const pipefunc_t& func, bool skipSelf)
1646{
49a699c4 1647 unsigned int n = 0;
1dc8f4d0 1648 for(ThreadPipeSet& tps : g_pipes)
49a699c4
BH
1649 {
1650 if(n++ == t_id) {
1651 if(!skipSelf)
1652 func(); // don't write to ourselves!
1653 continue;
1654 }
3ddb9247 1655
00c9b8c1
BH
1656 ThreadMSG* tmsg = new ThreadMSG();
1657 tmsg->func = func;
1658 tmsg->wantAnswer = true;
b841314c
RG
1659 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
1660 delete tmsg;
49a699c4 1661 unixDie("write to thread pipe returned wrong size or error");
b841314c 1662 }
3ddb9247 1663
49a699c4
BH
1664 string* resp;
1665 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
1666 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 1667
49a699c4
BH
1668 if(resp) {
1669// cerr <<"got response: " << *resp << endl;
1670 delete resp;
1671 }
1672 }
1673}
06ea9015 1674
2fafb640 1675static uint32_t g_disthashseed;
8171ab83 1676void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
00c9b8c1 1677{
8171ab83 1678 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
06ea9015 1679 unsigned int target = 1 + (hash % (g_pipes.size()-1));
1680
00c9b8c1
BH
1681 if(target == t_id) {
1682 func();
1683 return;
1684 }
3ddb9247 1685 ThreadPipeSet& tps = g_pipes[target];
00c9b8c1
BH
1686 ThreadMSG* tmsg = new ThreadMSG();
1687 tmsg->func = func;
1688 tmsg->wantAnswer = false;
3ddb9247 1689
b841314c
RG
1690 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
1691 delete tmsg;
3ddb9247 1692 unixDie("write to thread pipe returned wrong size or error");
b841314c 1693 }
00c9b8c1 1694}
3427fa8a 1695
49a699c4
BH
1696void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
1697{
00c9b8c1 1698 ThreadMSG* tmsg;
3ddb9247
PD
1699
1700 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread
49a699c4
BH
1701 unixDie("read from thread pipe returned wrong size or error");
1702 }
3ddb9247 1703
2f22827a 1704 void *resp=0;
1705 try {
1706 resp = tmsg->func();
1707 }
1708 catch(std::exception& e) {
1709 L<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
1710 }
1711 catch(PDNSException& e) {
1712 L<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
1713 }
00c9b8c1
BH
1714 if(tmsg->wantAnswer)
1715 if(write(g_pipes[t_id].writeFromThread, &resp, sizeof(resp)) != sizeof(resp))
1716 unixDie("write to thread pipe returned wrong size or error");
3ddb9247 1717
00c9b8c1 1718 delete tmsg;
49a699c4 1719}
09e6702a 1720
13034931
BH
1721template<class T> void *voider(const boost::function<T*()>& func)
1722{
1723 return func();
1724}
1725
b3b5459d
BH
1726vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
1727{
1728 a.insert(a.end(), b.begin(), b.end());
1729 return a;
1730}
1731
92011b8f 1732vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
1733{
1734 a.insert(a.end(), b.begin(), b.end());
1735 return a;
1736}
1737
3ddb9247
PD
1738vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
1739{
1740 a.insert(a.end(), b.begin(), b.end());
1741 return a;
1742}
1743
92011b8f 1744
13034931 1745template<class T> T broadcastAccFunction(const boost::function<T*()>& func, bool skipSelf)
3427fa8a
BH
1746{
1747 unsigned int n = 0;
1748 T ret=T();
1dc8f4d0 1749 for(ThreadPipeSet& tps : g_pipes)
3427fa8a
BH
1750 {
1751 if(n++ == t_id) {
1752 if(!skipSelf) {
1753 T* resp = (T*)func(); // don't write to ourselves!
1754 if(resp) {
1755 //~ cerr <<"got direct: " << *resp << endl;
1756 ret += *resp;
1757 delete resp;
1758 }
1759 }
1760 continue;
1761 }
3ddb9247 1762
00c9b8c1
BH
1763 ThreadMSG* tmsg = new ThreadMSG();
1764 tmsg->func = boost::bind(voider<T>, func);
1765 tmsg->wantAnswer = true;
3ddb9247 1766
b841314c
RG
1767 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
1768 delete tmsg;
3427fa8a 1769 unixDie("write to thread pipe returned wrong size or error");
b841314c 1770 }
3ddb9247 1771
3427fa8a
BH
1772 T* resp;
1773 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
1774 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 1775
3427fa8a
BH
1776 if(resp) {
1777 //~ cerr <<"got response: " << *resp << endl;
1778 ret += *resp;
1779 delete resp;
1780 }
1781 }
1782 return ret;
1783}
1784
13034931
BH
1785template string broadcastAccFunction(const boost::function<string*()>& fun, bool skipSelf); // explicit instantiation
1786template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun, bool skipSelf); // explicit instantiation
b3b5459d 1787template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun, bool skipSelf); // explicit instantiation
3ddb9247 1788template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun, bool skipSelf); // explicit instantiation
3427fa8a 1789
d8f6d49f 1790void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
09e6702a
BH
1791{
1792 string remote;
1793 string msg=s_rcc.recv(&remote);
1794 RecursorControlParser rcp;
1795 RecursorControlParser::func_t* command;
3ddb9247 1796
09e6702a 1797 string answer=rcp.getAnswer(msg, &command);
f0f3f0b0
PL
1798
1799 // If we are inside a chroot, we need to strip
1800 if (!arg()["chroot"].empty()) {
1801 int len = arg()["chroot"].length();
1802 remote = remote.substr(len);
1803 }
1804
ab5c053d
BH
1805 try {
1806 s_rcc.send(answer, &remote);
1807 command();
1808 }
fdbf35ac 1809 catch(std::exception& e) {
ab5c053d
BH
1810 L<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
1811 }
3f81d239 1812 catch(PDNSException& ae) {
ab5c053d
BH
1813 L<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
1814 }
09e6702a
BH
1815}
1816
d8f6d49f 1817void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1818{
0b18b22e 1819 PacketID* pident=any_cast<PacketID>(&var);
667f7e60 1820 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
09e6702a 1821
667f7e60 1822 shared_array<char> buffer(new char[pident->inNeeded]);
09e6702a 1823
705f31ae 1824 int ret=recv(fd, buffer.get(), pident->inNeeded,0);
09e6702a 1825 if(ret > 0) {
667f7e60
BH
1826 pident->inMSG.append(&buffer[0], &buffer[ret]);
1827 pident->inNeeded-=ret;
825fa717 1828 if(!pident->inNeeded || pident->inIncompleteOkay) {
667f7e60
BH
1829 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
1830 PacketID pid=*pident;
1831 string msg=pident->inMSG;
3ddb9247 1832
bb4bdbaf 1833 t_fdm->removeReadFD(fd);
3ddb9247 1834 MT->sendEvent(pid, &msg);
09e6702a
BH
1835 }
1836 else {
667f7e60 1837 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
09e6702a
BH
1838 }
1839 }
1840 else {
667f7e60 1841 PacketID tmp=*pident;
bb4bdbaf 1842 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
09e6702a
BH
1843 string empty;
1844 MT->sendEvent(tmp, &empty); // this conveys error status
1845 }
1846}
1847
d8f6d49f 1848void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1849{
0b18b22e 1850 PacketID* pid=any_cast<PacketID>(&var);
4ca15bca 1851 int ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
09e6702a 1852 if(ret > 0) {
667f7e60
BH
1853 pid->outPos+=ret;
1854 if(pid->outPos==pid->outMSG.size()) {
1855 PacketID tmp=*pid;
bb4bdbaf 1856 t_fdm->removeWriteFD(fd);
09e6702a
BH
1857 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
1858 }
1859 }
1860 else { // error or EOF
667f7e60 1861 PacketID tmp(*pid);
bb4bdbaf 1862 t_fdm->removeWriteFD(fd);
09e6702a 1863 string sent;
998a4334 1864 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
09e6702a
BH
1865 }
1866}
1867
34801ab1
BH
1868// resend event to everybody chained onto it
1869void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
1870{
1871 if(iter->key.chain.empty())
1872 return;
e27e91a8 1873 // cerr<<"doResends called!\n";
34801ab1
BH
1874 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
1875 resend.fd=-1;
1876 resend.id=*i;
e27e91a8 1877 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
4665c31e 1878
34801ab1
BH
1879 MT->sendEvent(resend, &content);
1880 g_stats.chainResends++;
34801ab1
BH
1881 }
1882}
1883
d8f6d49f 1884void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1885{
600fc20b 1886 PacketID pid=any_cast<PacketID>(var);
998a4334 1887 int len;
e45beeda 1888 char data[g_outgoingEDNSBufsize];
996c89cc 1889 ComboAddress fromaddr;
09e6702a
BH
1890 socklen_t addrlen=sizeof(fromaddr);
1891
998a4334 1892 len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen);
c1da7976 1893
998a4334
BH
1894 if(len < (int)sizeof(dnsheader)) {
1895 if(len < 0)
996c89cc 1896 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
09e6702a 1897 else {
3ddb9247 1898 g_stats.serverParseError++;
09e6702a 1899 if(g_logCommonErrors)
85db02c5 1900 L<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
e44d9fa7 1901 ": packet smaller than DNS header"<<endl;
998a4334 1902 }
34801ab1 1903
49a699c4 1904 t_udpclientsocks->returnSocket(fd);
34801ab1
BH
1905 string empty;
1906
1907 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3ddb9247 1908 if(iter != MT->d_waiters.end())
34801ab1 1909 doResends(iter, pid, empty);
3ddb9247 1910
34801ab1 1911 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
998a4334 1912 return;
3ddb9247 1913 }
998a4334
BH
1914
1915 dnsheader dh;
1916 memcpy(&dh, data, sizeof(dh));
3ddb9247 1917
6da3b3ad
PD
1918 PacketID pident;
1919 pident.remote=fromaddr;
1920 pident.id=dh.id;
1921 pident.fd=fd;
34801ab1 1922
33a928af 1923 if(!dh.qr && g_logCommonErrors) {
854d44e3 1924 L<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
6da3b3ad
PD
1925 }
1926
1927 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
1928 !dh.qr) { // one weird server
1929 pident.domain.clear();
1930 pident.type = 0;
1931 }
1932 else {
1933 try {
8171ab83 1934 pident.domain=DNSName(data, len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
6da3b3ad
PD
1935 }
1936 catch(std::exception& e) {
1937 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
1938 L<<Logger::Warning<<"Error in packet from "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
1939 return;
34801ab1 1940 }
6da3b3ad
PD
1941 }
1942 string packet;
1943 packet.assign(data, len);
34801ab1 1944
6da3b3ad
PD
1945 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
1946 if(iter != MT->d_waiters.end()) {
1947 doResends(iter, pident, packet);
1948 }
c1da7976 1949
6da3b3ad 1950retryWithName:
4957a608 1951
6da3b3ad
PD
1952 if(!MT->sendEvent(pident, &packet)) {
1953 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
1954 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
1955 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
e325f20c 1956 pident.domain == mthread->key.domain) {
6da3b3ad 1957 mthread->key.nearMisses++;
998a4334 1958 }
6da3b3ad
PD
1959
1960 // be a bit paranoid here since we're weakening our matching
3ddb9247 1961 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
6da3b3ad
PD
1962 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
1963 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
1964 pident.domain = mthread->key.domain;
1965 pident.type = mthread->key.type;
1966 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
d4fb76e9 1967 }
09e6702a 1968 }
6da3b3ad
PD
1969 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
1970 if(g_logCommonErrors) {
1971 L<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<<pident.domain<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
d8f6d49f 1972 }
09e6702a 1973 }
6da3b3ad
PD
1974 else if(fd >= 0) {
1975 t_udpclientsocks->returnSocket(fd);
1976 }
09e6702a
BH
1977}
1978
1f4abb20
BH
1979FDMultiplexer* getMultiplexer()
1980{
1981 FDMultiplexer* ret;
1982 for(FDMultiplexer::FDMultiplexermap_t::const_iterator i = FDMultiplexer::getMultiplexerMap().begin();
1983 i != FDMultiplexer::getMultiplexerMap().end(); ++i) {
1984 try {
1985 ret=i->second();
1f4abb20
BH
1986 return ret;
1987 }
98d0ee4a 1988 catch(FDMultiplexerException &fe) {
0a7f24cb 1989 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
98d0ee4a
BH
1990 }
1991 catch(...) {
1992 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
1993 }
1f4abb20
BH
1994 }
1995 L<<Logger::Error<<"No working multiplexer found!"<<endl;
1996 exit(1);
1997}
1998
3ddb9247 1999
0f39c1a3 2000string* doReloadLuaScript()
4485aa35 2001{
674cf0f6 2002 string fname= ::arg()["lua-dns-script"];
4485aa35 2003 try {
674cf0f6
BH
2004 if(fname.empty()) {
2005 t_pdl->reset();
2006 L<<Logger::Error<<t_id<<" Unloaded current lua script"<<endl;
0f39c1a3 2007 return new string("unloaded\n");
4485aa35
BH
2008 }
2009 else {
a3e7b735 2010 *t_pdl = shared_ptr<RecursorLua4>(new RecursorLua4(fname));
4485aa35
BH
2011 }
2012 }
fdbf35ac 2013 catch(std::exception& e) {
674cf0f6 2014 L<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
0f39c1a3 2015 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
4485aa35 2016 }
3ddb9247 2017
674cf0f6 2018 L<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
0f39c1a3 2019 return new string("(re)loaded '"+fname+"'\n");
4485aa35
BH
2020}
2021
49a699c4
BH
2022string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2023{
3ddb9247 2024 if(begin != end)
49a699c4 2025 ::arg().set("lua-dns-script") = *begin;
3ddb9247 2026
0f39c1a3 2027 return broadcastAccFunction<string>(doReloadLuaScript);
3ddb9247 2028}
49a699c4 2029
77499b05
BH
2030string* pleaseUseNewTraceRegex(const std::string& newRegex)
2031try
2032{
2033 if(newRegex.empty()) {
2034 t_traceRegex->reset();
2035 return new string("unset\n");
2036 }
2037 else {
2038 (*t_traceRegex) = shared_ptr<Regex>(new Regex(newRegex));
2039 return new string("ok\n");
2040 }
2041}
3f81d239 2042catch(PDNSException& ae)
77499b05
BH
2043{
2044 return new string(ae.reason+"\n");
2045}
2046
2047string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2048{
2049 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
2050}
2051
4e9a20e6 2052static void checkLinuxIPv6Limits()
2053{
2054#ifdef __linux__
2055 string line;
2056 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
335da0ba 2057 int lim=std::stoi(line);
4e9a20e6 2058 if(lim < 16384) {
36849ff2 2059 L<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
4e9a20e6 2060 }
2061 }
2062#endif
2063}
36849ff2 2064static void checkOrFixFDS()
4e9a20e6 2065{
c0063e60 2066 unsigned int availFDs=getFilenumLimit();
2067 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
2068
2069 if(wantFDs > availFDs) {
067ad20e 2070 unsigned int hardlimit= getFilenumLimit(true);
2071 if(hardlimit >= wantFDs) {
c0063e60 2072 setFilenumLimit(wantFDs);
2073 L<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
36849ff2 2074 }
2075 else {
067ad20e 2076 int newval = (hardlimit - 25) / g_numWorkerThreads;
2077 L<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
36849ff2 2078 g_maxMThreads = newval;
067ad20e 2079 setFilenumLimit(hardlimit);
36849ff2 2080 }
2081 }
4e9a20e6 2082}
77499b05 2083
bb4bdbaf 2084void* recursorThread(void*);
51e2144e 2085
3427fa8a 2086void* pleaseSupplantACLs(NetmaskGroup *ng)
49a699c4
BH
2087{
2088 t_allowFrom = ng;
3427fa8a 2089 return 0;
49a699c4
BH
2090}
2091
dbd23fc2
BH
2092int g_argc;
2093char** g_argv;
2094
18af64a8 2095void parseACLs()
f7c1d4e3 2096{
18af64a8 2097 static bool l_initialized;
3ddb9247 2098
49a699c4 2099 if(l_initialized) { // only reload configuration file on second call
18af64a8
BH
2100 string configname=::arg()["config-dir"]+"/recursor.conf";
2101 cleanSlashes(configname);
3ddb9247
PD
2102
2103 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
7e818521 2104 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
49a699c4 2105 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
242b90e1 2106 ::arg().preParseFile(configname.c_str(), "include-dir");
829849d6
AT
2107 ::arg().preParse(g_argc, g_argv, "include-dir");
2108
2109 // then process includes
2110 std::vector<std::string> extraConfigs;
242b90e1
AT
2111 ::arg().gatherIncludes(extraConfigs);
2112
1dc8f4d0 2113 for(const std::string& fn : extraConfigs) {
7e818521 2114 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
2115 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
2116 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
2117 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
829849d6 2118 }
ca2c884c
AT
2119
2120 ::arg().preParse(g_argc, g_argv, "allow-from-file");
2121 ::arg().preParse(g_argc, g_argv, "allow-from");
f27e6356 2122 }
49a699c4
BH
2123
2124 NetmaskGroup* oldAllowFrom = t_allowFrom, *allowFrom=new NetmaskGroup;
3ddb9247 2125
2c95fc65
BH
2126 if(!::arg()["allow-from-file"].empty()) {
2127 string line;
2c95fc65
BH
2128 ifstream ifs(::arg()["allow-from-file"].c_str());
2129 if(!ifs) {
3ddb9247 2130 delete allowFrom;
9c61b9d0 2131 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2c95fc65
BH
2132 }
2133
2134 string::size_type pos;
2135 while(getline(ifs,line)) {
2136 pos=line.find('#');
2137 if(pos!=string::npos)
2138 line.resize(pos);
2139 trim(line);
2140 if(line.empty())
2141 continue;
2142
18af64a8 2143 allowFrom->addMask(line);
2c95fc65 2144 }
49a699c4 2145 L<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2c95fc65
BH
2146 }
2147 else if(!::arg()["allow-from"].empty()) {
f7c1d4e3
BH
2148 vector<string> ips;
2149 stringtok(ips, ::arg()["allow-from"], ", ");
3ddb9247 2150
f7c1d4e3
BH
2151 L<<Logger::Warning<<"Only allowing queries from: ";
2152 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
18af64a8 2153 allowFrom->addMask(*i);
f7c1d4e3 2154 if(i!=ips.begin())
674cf0f6 2155 L<<Logger::Warning<<", ";
f7c1d4e3
BH
2156 L<<Logger::Warning<<*i;
2157 }
2158 L<<Logger::Warning<<endl;
2159 }
49a699c4 2160 else {
3ddb9247 2161 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
49a699c4
BH
2162 L<<Logger::Error<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
2163 delete allowFrom;
2164 allowFrom = 0;
2165 }
3ddb9247 2166
49a699c4 2167 g_initialAllowFrom = allowFrom;
d7dae798 2168 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
49a699c4 2169 delete oldAllowFrom;
3ddb9247 2170
49a699c4 2171 l_initialized = true;
18af64a8
BH
2172}
2173
795215f2 2174boost::optional<Netmask> getEDNSSubnetMask(const ComboAddress& local, const DNSName&dn, const ComboAddress& rem)
2175{
2176 if(local.sin4.sin_family != AF_INET || local.sin4.sin_addr.s_addr) { // detect unset 'requestor'
2177 if(g_ednsdomains.check(dn) || g_ednssubnets.match(rem)) {
2178 int bits =local.sin4.sin_family == AF_INET ? 24 : 64;
2179 ComboAddress trunc(local);
2180 trunc.truncate(bits);
2181 return boost::optional<Netmask>(Netmask(trunc, bits));
2182 }
2183 }
2184 return boost::optional<Netmask>();
2185}
2186
2187void parseEDNSSubnetWhitelist(const std::string& wlist)
2188{
2189 vector<string> parts;
39588f55 2190 stringtok(parts, wlist, ",; ");
795215f2 2191 for(const auto& a : parts) {
2192 try {
2193 Netmask nm(a);
2194 g_ednssubnets.addMask(nm);
2195 }
2196 catch(...) {
2197 g_ednsdomains.add(DNSName(a));
2198 }
2199 }
2200}
2201
756e82cf 2202SuffixMatchNode g_delegationOnly;
2203static void setupDelegationOnly()
2204{
2205 vector<string> parts;
2206 stringtok(parts, ::arg()["delegation-only"], ", \t");
2207 for(const auto& p : parts) {
2208 g_delegationOnly.add(DNSName(p));
2209 }
2210}
795215f2 2211
18af64a8
BH
2212int serviceMain(int argc, char*argv[])
2213{
5124de27 2214 L.setName(s_programname);
18af64a8
BH
2215 L.setLoglevel((Logger::Urgency)(6)); // info and up
2216
2217 if(!::arg()["logging-facility"].empty()) {
f8499e52
BH
2218 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
2219 if(val >= 0)
2220 theL().setFacility(val);
18af64a8
BH
2221 else
2222 L<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
2223 }
2224
ba1a571d 2225 showProductVersion();
18af64a8 2226 seedRandom(::arg()["entropy-source"]);
06ea9015 2227 g_disthashseed=dns_random(0xffffffff);
2228
ad42489c 2229 loadRecursorLuaConfig(::arg()["lua-config-file"]);
2230
18af64a8 2231 parseACLs();
92011b8f 2232 sortPublicSuffixList();
2233
eb5bae86
BH
2234 if(!::arg()["dont-query"].empty()) {
2235 g_dontQuery=new NetmaskGroup;
2236 vector<string> ips;
2237 stringtok(ips, ::arg()["dont-query"], ", ");
66e0b6ea
BH
2238 ips.push_back("0.0.0.0");
2239 ips.push_back("::");
c36bc97a 2240
eb5bae86
BH
2241 L<<Logger::Warning<<"Will not send queries to: ";
2242 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
2243 g_dontQuery->addMask(*i);
2244 if(i!=ips.begin())
4957a608 2245 L<<Logger::Warning<<", ";
eb5bae86
BH
2246 L<<Logger::Warning<<*i;
2247 }
2248 L<<Logger::Warning<<endl;
2249 }
2250
f7c1d4e3 2251 g_quiet=::arg().mustDo("quiet");
3ddb9247 2252
1bc3c142
BH
2253 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
2254 if(g_weDistributeQueries) {
2255 L<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
2256 }
3ddb9247 2257
756e82cf 2258 setupDelegationOnly();
b33c2462 2259 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
756e82cf 2260
12ce523e 2261 if(::arg()["dnssec"]=="off")
2262 g_dnssecmode=DNSSECMode::Off;
2263 else if(::arg()["dnssec"]=="process")
2264 g_dnssecmode=DNSSECMode::Process;
2265 else if(::arg()["dnssec"]=="validate")
2266 g_dnssecmode=DNSSECMode::ValidateAll;
2267 else if(::arg()["dnssec"]=="log-fail")
2268 g_dnssecmode=DNSSECMode::ValidateForLog;
2269 else {
2270 L<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
2271 exit(1);
2272 }
2273
77499b05
BH
2274 if(::arg()["trace"]=="fail") {
2275 SyncRes::setDefaultLogMode(SyncRes::Store);
2276 }
2277 else if(::arg().mustDo("trace")) {
2278 SyncRes::setDefaultLogMode(SyncRes::Log);
f7c1d4e3
BH
2279 ::arg().set("quiet")="no";
2280 g_quiet=false;
2281 }
3ddb9247 2282
aadceba8 2283 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
2284
4e9a20e6 2285 checkLinuxIPv6Limits();
5a38281c 2286 try {
3ddb9247 2287 vector<string> addrs;
5a38281c
BH
2288 if(!::arg()["query-local-address6"].empty()) {
2289 SyncRes::s_doIPv6=true;
d4fb76e9 2290 L<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
3ddb9247 2291
5a38281c 2292 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
1dc8f4d0 2293 for(const string& addr : addrs) {
4957a608 2294 g_localQueryAddresses6.push_back(ComboAddress(addr));
5a38281c
BH
2295 }
2296 }
d4fb76e9
BH
2297 else {
2298 L<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
2299 }
5a38281c
BH
2300 addrs.clear();
2301 stringtok(addrs, ::arg()["query-local-address"], ", ;");
1dc8f4d0 2302 for(const string& addr : addrs) {
5a38281c
BH
2303 g_localQueryAddresses4.push_back(ComboAddress(addr));
2304 }
2305 }
2306 catch(std::exception& e) {
2307 L<<Logger::Error<<"Assigning local query addresses: "<<e.what();
2308 exit(99);
f7c1d4e3 2309 }
f555e92e 2310
1051f8a9
BH
2311 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
2312
f7c1d4e3 2313 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
c3e753c7 2314 SyncRes::s_maxcachettl=::arg().asNum("max-cache-ttl");
1051f8a9
BH
2315 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
2316 SyncRes::s_packetcacheservfailttl=::arg().asNum("packetcache-servfail-ttl");
628e2c7b
PA
2317 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
2318 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
f7c1d4e3 2319 SyncRes::s_serverID=::arg()["server-id"];
173d790e 2320 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
9de3e034 2321 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
01402d56 2322 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
f7c1d4e3
BH
2323 if(SyncRes::s_serverID.empty()) {
2324 char tmp[128];
2325 gethostname(tmp, sizeof(tmp)-1);
2326 SyncRes::s_serverID=tmp;
2327 }
3ddb9247 2328
5b0ddd18 2329 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
bb4bdbaf 2330
49a699c4 2331 g_initialDomainMap = parseAuthAndForwards();
3ddb9247 2332
08f3f638 2333 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3ddb9247 2334
f7c1d4e3 2335 g_logCommonErrors=::arg().mustDo("log-common-errors");
e661a20b
PD
2336
2337 g_anyToTcp = ::arg().mustDo("any-to-tcp");
a09a8ce0
PD
2338 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
2339
f7c1d4e3
BH
2340 makeUDPServerSockets();
2341 makeTCPServerSockets();
815099b2 2342
376effcf 2343 parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
2344
677e2a46
BH
2345 int forks;
2346 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
1bc3c142
BH
2347 if(!fork()) // we are child
2348 break;
2349 }
3ddb9247 2350
f7c1d4e3
BH
2351 if(::arg().mustDo("daemon")) {
2352 L<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
2353 L.toConsole(Logger::Critical);
f7c1d4e3
BH
2354 daemonize();
2355 }
2356 signal(SIGUSR1,usr1Handler);
2357 signal(SIGUSR2,usr2Handler);
2358 signal(SIGPIPE,SIG_IGN);
a6414fdc 2359 g_numThreads = ::arg().asNum("threads") + ::arg().mustDo("pdns-distributes-queries");
c0063e60 2360 g_numWorkerThreads = ::arg().asNum("threads");
a6414fdc
AT
2361 g_maxMThreads = ::arg().asNum("max-mthreads");
2362 checkOrFixFDS();
3ddb9247 2363
138435cb
BH
2364 int newgid=0;
2365 if(!::arg()["setgid"].empty())
2366 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
2367 int newuid=0;
2368 if(!::arg()["setuid"].empty())
2369 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
2370
f1d6a7ce
KM
2371 Utility::dropGroupPrivs(newuid, newgid);
2372
138435cb
BH
2373 if (!::arg()["chroot"].empty()) {
2374 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
2375 L<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
2376 exit(1);
2377 }
f0f3f0b0
PL
2378 else
2379 L<<Logger::Error<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
138435cb
BH
2380 }
2381
f0f3f0b0
PL
2382 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
2383 if(!s_pidfname.empty())
2384 unlink(s_pidfname.c_str()); // remove possible old pid file
2385 writePid();
2386
2387 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
2388
f1d6a7ce 2389 Utility::dropUserPrivs(newuid);
c0063e60 2390
49a699c4 2391 makeThreadPipes();
3ddb9247 2392
5d4dd7fe
BH
2393 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
2394 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
343257a4 2395
c3828c03 2396 if(g_numThreads == 1) {
76698c6e 2397 L<<Logger::Warning<<"Operating unthreaded"<<endl;
76698c6e
BH
2398 recursorThread(0);
2399 }
2400 else {
2401 pthread_t tid;
c3828c03
BH
2402 L<<Logger::Warning<<"Launching "<< g_numThreads <<" threads"<<endl;
2403 for(unsigned int n=0; n < g_numThreads; ++n) {
77499b05 2404 pthread_create(&tid, 0, recursorThread, (void*)(long)n);
76698c6e
BH
2405 }
2406 void* res;
49a699c4 2407
3ddb9247 2408
76698c6e 2409 pthread_join(tid, &res);
bb4bdbaf 2410 }
bb4bdbaf
BH
2411 return 0;
2412}
2413
2414void* recursorThread(void* ptr)
2415try
2416{
2e2cd8ec 2417 t_id=(int) (long) ptr;
49a699c4 2418 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
ac0e821b 2419 t_sstorage->domainmap = g_initialDomainMap;
49a699c4
BH
2420 t_allowFrom = g_initialAllowFrom;
2421 t_udpclientsocks = new UDPClientSocks();
bd0289fc 2422 t_tcpClientCounts = new tcpClientCounts_t();
49a699c4 2423 primeHints();
3ddb9247 2424
49a699c4 2425 t_packetCache = new RecursorPacketCache();
3ddb9247 2426
49a699c4 2427 L<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3ddb9247 2428
a3e7b735 2429 t_pdl = new shared_ptr<RecursorLua4>();
3ddb9247 2430
674cf0f6
BH
2431 try {
2432 if(!::arg()["lua-dns-script"].empty()) {
a3e7b735 2433 *t_pdl = shared_ptr<RecursorLua4>(new RecursorLua4(::arg()["lua-dns-script"]));
674cf0f6
BH
2434 L<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
2435 }
674cf0f6
BH
2436 }
2437 catch(std::exception &e) {
2438 L<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
62f0ae62 2439 _exit(99);
674cf0f6 2440 }
3ddb9247 2441
77499b05 2442 t_traceRegex = new shared_ptr<Regex>();
f8f243b0 2443 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
92011b8f 2444 if(ringsize) {
60c8afa8 2445 t_remotes = new addrringbuf_t();
f8f243b0 2446 if(g_weDistributeQueries) // if so, only 1 thread does recvfrom
3ddb9247 2447 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries"));
f8f243b0 2448 else
3ddb9247 2449 t_remotes->set_capacity(ringsize);
60c8afa8 2450 t_servfailremotes = new addrringbuf_t();
3ddb9247 2451 t_servfailremotes->set_capacity(ringsize);
60c8afa8 2452 t_largeanswerremotes = new addrringbuf_t();
3ddb9247 2453 t_largeanswerremotes->set_capacity(ringsize);
92011b8f 2454
c5c066bf 2455 t_queryring = new boost::circular_buffer<pair<DNSName, uint16_t> >();
3ddb9247 2456 t_queryring->set_capacity(ringsize);
c5c066bf 2457 t_servfailqueryring = new boost::circular_buffer<pair<DNSName, uint16_t> >();
3ddb9247 2458 t_servfailqueryring->set_capacity(ringsize);
92011b8f 2459 }
3ddb9247 2460
bb4bdbaf 2461 MT=new MTasker<PacketID,string>(::arg().asNum("stack-size"));
3ddb9247 2462
bb4bdbaf
BH
2463 PacketID pident;
2464
2465 t_fdm=getMultiplexer();
f3d1d67b 2466 if(!t_id) {
d07bf7ff 2467 if(::arg().mustDo("webserver")) {
30a1aa92 2468 L<<Logger::Warning << "Enabling web server" << endl;
8989097d 2469 try {
1ce57618 2470 new RecursorWebServer(t_fdm);
8989097d
CH
2471 }
2472 catch(PDNSException &e) {
2473 L<<Logger::Error<<"Exception: "<<e.reason<<endl;
2474 exit(99);
2475 }
f3d1d67b 2476 }
83252304 2477 L<<Logger::Error<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
f3d1d67b 2478 }
83252304 2479
49a699c4 2480 t_fdm->addReadFD(g_pipes[t_id].readToThread, handlePipeRequest);
83252304 2481
1bc3c142 2482 if(!g_weDistributeQueries || !t_id) // if we distribute queries, only t_id = 0 listens
3ddb9247 2483 for(deferredAdd_t::const_iterator i=deferredAdd.begin(); i!=deferredAdd.end(); ++i)
1bc3c142 2484 t_fdm->addReadFD(i->first, i->second);
3ddb9247 2485
674cf0f6 2486 if(!t_id) {
674cf0f6
BH
2487 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
2488 }
1bc3c142 2489
f7c1d4e3 2490 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3ddb9247 2491
f7c1d4e3 2492 bool listenOnTCP(true);
49a699c4 2493
2c78bd57 2494 time_t last_carbon=0;
2495 time_t carbonInterval=::arg().asNum("carbon-interval");
cc59bce6 2496 counter=AtomicCounter(0); // used to periodically execute certain tasks
f7c1d4e3 2497 for(;;) {
ac0e821b 2498 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3ddb9247 2499
3427fa8a
BH
2500 if(!(counter%500)) {
2501 MT->makeThread(houseKeeping, 0);
f7c1d4e3
BH
2502 }
2503
d2392145 2504 if(!(counter%55)) {
d8f6d49f 2505 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
bb4bdbaf 2506 expired_t expired=t_fdm->getTimeouts(g_now);
3ddb9247 2507
f7c1d4e3 2508 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
cd989c87 2509 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4957a608 2510 if(g_logCommonErrors)
cd989c87 2511 L<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toString() <<endl;
4957a608 2512 t_fdm->removeReadFD(i->first);
f7c1d4e3
BH
2513 }
2514 }
3ddb9247 2515
f7c1d4e3
BH
2516 counter++;
2517
3427fa8a 2518 if(!t_id && statsWanted) {
f7c1d4e3
BH
2519 doStats();
2520 }
2521
2522 Utility::gettimeofday(&g_now, 0);
2c78bd57 2523
2524 if(!t_id && (g_now.tv_sec - last_carbon >= carbonInterval)) {
2525 MT->makeThread(doCarbonDump, 0);
2526 last_carbon = g_now.tv_sec;
2527 }
2528
bb4bdbaf 2529 t_fdm->run(&g_now);
3ea54bf0 2530 // 'run' updates g_now for us
f7c1d4e3 2531
b8ef5c5c 2532 if(!g_weDistributeQueries || !t_id) { // if pdns distributes queries, only tid 0 should do this
5c889cf5 2533 if(listenOnTCP) {
2534 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
2535 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
2536 t_fdm->removeReadFD(*i);
2537 listenOnTCP=false;
2538 }
f7c1d4e3 2539 }
5c889cf5 2540 else {
2541 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
2542 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
2543 t_fdm->addReadFD(*i, handleNewTCPQuestion);
2544 listenOnTCP=true;
2545 }
f7c1d4e3
BH
2546 }
2547 }
2548 }
2549}
3f81d239 2550catch(PDNSException &ae) {
bb4bdbaf
BH
2551 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
2552 return 0;
2553}
2554catch(std::exception &e) {
2555 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
2556 return 0;
2557}
2558catch(...) {
2559 L<<Logger::Error<<"any other exception in main: "<<endl;
2560 return 0;
2561}
2562
51e2144e 2563
3ddb9247 2564int main(int argc, char **argv)
288f4aa9 2565{
dbd23fc2
BH
2566 g_argc = argc;
2567 g_argv = argv;
5e3de507 2568 g_stats.startupTime=time(0);
3e135495 2569 versionSetProduct(ProductRecursor);
8a63d3ce 2570 reportBasicTypes();
0007c2e5 2571 reportOtherTypes();
ea634573 2572
22030c37 2573 int ret = EXIT_SUCCESS;
caa6eefa 2574
288f4aa9 2575 try {
f888311c 2576 ::arg().set("stack-size","stack size per mthread")="200000";
2e3d8a19 2577 ::arg().set("soa-minimum-ttl","Don't change")="0";
2e3d8a19 2578 ::arg().set("no-shuffle","Don't change")="off";
2e3d8a19 2579 ::arg().set("local-port","port to listen on")="53";
32252594 2580 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
fec7dd5a 2581 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
77499b05 2582 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
12ce523e 2583 ::arg().set("dnssec", "DNSSEC mode: off/process (default)/log-fail/validate")="process";
d3f809bf 2584 ::arg().set("daemon","Operate as a daemon")="no";
191f2e47 2585 ::arg().setSwitch("write-pid","Write a PID file")="yes";
34162f8f 2586 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="4";
0e9d9ce2 2587 ::arg().set("log-common-errors","If we should log rather common errors")="yes";
2e3d8a19
BH
2588 ::arg().set("chroot","switch to chroot jail")="";
2589 ::arg().set("setgid","If set, change group id to this gid for more security")="";
2590 ::arg().set("setuid","If set, change user id to this uid for more security")="";
5b0ddd18 2591 ::arg().set("network-timeout", "Wait this nummer of milliseconds for network i/o")="1500";
bb4bdbaf 2592 ::arg().set("threads", "Launch this number of threads")="2";
1bc3c142 2593 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1";
5124de27 2594 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
d07bf7ff 2595 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
479e0976
CH
2596 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
2597 ::arg().set("api-logfile", "Location of the server logfile (used by the REST API)") = "/var/log/pdns.log";
2598 ::arg().set("api-readonly", "Disallow data modification through the REST API when set") = "no";
2599 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
d07bf7ff
PL
2600 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
2601 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
2602 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
69e7f117 2603 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="0.0.0.0/0,::/0";
cc08b5a9 2604 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
2c78bd57 2605 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server")="";
2606 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
c038218b 2607 ::arg().set("quiet","Suppress logging of questions and answers")="";
f27e6356 2608 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
2e3d8a19 2609 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
fdbf35ac
BH
2610 ::arg().set("socket-owner","Owner of socket")="";
2611 ::arg().set("socket-group","Group of socket")="";
2612 ::arg().set("socket-mode", "Permissions for socket")="";
3ddb9247 2613
f0f3f0b0 2614 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
2e3d8a19
BH
2615 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
2616 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
d4fb76e9 2617 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
2e3d8a19 2618 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
85c32340 2619 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
2e3d8a19 2620 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
324dc148 2621 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
979edd70 2622 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
2e3d8a19 2623 ::arg().set("hint-file", "If set, load root hints from this file")="";
b45eb27c 2624 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
a9af3782 2625 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
c3e753c7 2626 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
1051f8a9 2627 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
927c12b0 2628 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
1051f8a9 2629 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
7f7b8d55 2630 ::arg().set("server-id", "Returned when queried for 'server.id' TXT or NSID, defaults to hostname")="";
92011b8f 2631 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
ba1a571d 2632 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
49a699c4 2633 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
2c95fc65 2634 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
51e2144e 2635 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3ddb9247 2636 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4e120339 2637 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
0d5f0a9f 2638 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4ef015cd 2639 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
5605c067 2640 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3e61e7f7 2641 ::arg().set("lua-config-file", "More powerful configuration options")="";
644dd1da 2642
5605c067 2643 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
927c12b0
BH
2644 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
2645 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
5605c067 2646 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
ac0b4eb3 2647 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3ea54bf0 2648 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
9bc8c14c 2649 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="";
4485aa35 2650 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
08f3f638 2651 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3ddb9247 2652 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
376effcf 2653 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="powerdns.com,82.94.213.34,2001:888:2000:1d::2";
966d3ba8 2654 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="";
cd6310a8 2655 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="no";
e661a20b 2656 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
a09a8ce0 2657 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1680";
b33c2462 2658 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1680";
aadceba8 2659 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
173d790e 2660 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
c5950146 2661 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
a09a8ce0 2662
68e6df3c 2663 ::arg().set("include-dir","Include *.conf files from this directory")="";
d67620e4 2664 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
2e3d8a19
BH
2665
2666 ::arg().setCmd("help","Provide a helpful message");
ba1a571d 2667 ::arg().setCmd("version","Print version string");
d5141417 2668 ::arg().setCmd("config","Output blank configuration");
f27e6356 2669 L.toConsole(Logger::Info);
2e3d8a19 2670 ::arg().laxParse(argc,argv); // do a lax parse
c75a6a9e 2671
2d733c0f
CH
2672 string configname=::arg()["config-dir"]+"/recursor.conf";
2673 if(::arg()["config-name"]!="") {
2674 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
5124de27 2675 s_programname+="-"+::arg()["config-name"];
2d733c0f
CH
2676 }
2677 cleanSlashes(configname);
5124de27 2678
577cf284
BH
2679 if(::arg().mustDo("config")) {
2680 cout<<::arg().configstring()<<endl;
2681 exit(0);
2682 }
2683
3ddb9247 2684 if(!::arg().file(configname.c_str()))
c75a6a9e
BH
2685 L<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
2686
2e3d8a19 2687 ::arg().parse(argc,argv);
c836dc19 2688
f0f3f0b0
PL
2689 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() && !::arg().mustDo("api-readonly") ) {
2690 L<<Logger::Error<<"Using chroot and a writable API is not possible"<<endl;
2691 exit(EXIT_FAILURE);
2692 }
2693
2694 if (::arg()["socket-dir"].empty()) {
2695 if (::arg()["chroot"].empty())
2696 ::arg().set("socket-dir") = LOCALSTATEDIR;
2697 else
2698 ::arg().set("socket-dir") = "/";
2699 }
2700
2e3d8a19 2701 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
562588a3 2702
61d74169 2703 if(::arg().asNum("threads")==1)
2704 ::arg().set("pdns-distributes-queries")="no";
2705
2e3d8a19 2706 if(::arg().mustDo("help")) {
ff5ba4f9
WA
2707 cout<<"syntax:"<<endl<<endl;
2708 cout<<::arg().helpstring(::arg()["help"])<<endl;
2709 exit(0);
b636533b 2710 }
5e3de507 2711 if(::arg().mustDo("version")) {
ba1a571d 2712 showProductVersion();
3613a51c 2713 showBuildConfiguration();
5e3de507
BH
2714 exit(99);
2715 }
b636533b 2716
34162f8f 2717 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
f48d7b65 2718
34162f8f
CH
2719 if (logUrgency < Logger::Error)
2720 logUrgency = Logger::Error;
f48d7b65 2721 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
2722 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
2723 }
34162f8f
CH
2724 L.setLoglevel(logUrgency);
2725 L.toConsole(logUrgency);
2726
f7c1d4e3 2727 serviceMain(argc, argv);
288f4aa9 2728 }
3f81d239 2729 catch(PDNSException &ae) {
c836dc19 2730 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
22030c37 2731 ret=EXIT_FAILURE;
288f4aa9 2732 }
fdbf35ac 2733 catch(std::exception &e) {
c836dc19 2734 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
22030c37 2735 ret=EXIT_FAILURE;
288f4aa9
BH
2736 }
2737 catch(...) {
c836dc19 2738 L<<Logger::Error<<"any other exception in main: "<<endl;
22030c37 2739 ret=EXIT_FAILURE;
288f4aa9 2740 }
3ddb9247 2741
22030c37 2742 return ret;
288f4aa9 2743}