]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/pdns_recursor.cc
auth Install systemd unit file
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
CommitLineData
288f4aa9
BH
1/*
2 PowerDNS Versatile Database Driven Nameserver
32cb6fd4 3 Copyright (C) 2003 - 2016 PowerDNS.COM BV
288f4aa9
BH
4
5 This program is free software; you can redistribute it and/or modify
3ddb9247 6 it under the terms of the GNU General Public License version 2
f28307ad 7 as published by the Free Software Foundation
288f4aa9 8
f782fe38
MH
9 Additionally, the license of this program contains a special
10 exception which allows to distribute the program in binary form when
11 it is linked against OpenSSL.
12
288f4aa9
BH
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
06bd9ccf 20 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
288f4aa9 21*/
caa6eefa 22
870a0fe4
AT
23#ifdef HAVE_CONFIG_H
24#include "config.h"
25#endif
3e61e7f7 26
76473b92
KM
27#include <netdb.h>
28#include <sys/stat.h>
29#include <unistd.h>
fa8fd4d2 30
2470b36e 31#include "ws-recursor.hh"
49a699c4 32#include <pthread.h>
3ea54bf0 33#include "recpacketcache.hh"
3ddb9247 34#include "utility.hh"
51e2144e 35#include "dns_random.hh"
288f4aa9
BH
36#include <iostream>
37#include <errno.h>
81859ba5 38#include <boost/static_assert.hpp>
288f4aa9
BH
39#include <map>
40#include <set>
97bb160b 41#include "recursor_cache.hh"
38c9ceaa 42#include "cachecleaner.hh"
288f4aa9 43#include <stdio.h>
c75a6a9e 44#include <signal.h>
288f4aa9 45#include <stdlib.h>
bb4bdbaf 46#include "misc.hh"
288f4aa9
BH
47#include "mtasker.hh"
48#include <utility>
288f4aa9
BH
49#include "arguments.hh"
50#include "syncres.hh"
88def049
BH
51#include <fcntl.h>
52#include <fstream>
3e61e7f7 53#include "sortlist.hh"
54extern SortList g_sortlist;
5c633640
BH
55#include "sstuff.hh"
56#include <boost/tuple/tuple.hpp>
57#include <boost/tuple/tuple_comparison.hpp>
72df400f 58#include <boost/shared_array.hpp>
7f1fa77d 59#include <boost/function.hpp>
5605c067 60#include <boost/algorithm/string.hpp>
8f7473d7 61#ifdef MALLOC_TRACE
62#include "malloctrace.hh"
63#endif
40a3dd64 64#include <netinet/tcp.h>
ea634573
BH
65#include "dnsparser.hh"
66#include "dnswriter.hh"
67#include "dnsrecords.hh"
f814d7c8 68#include "zoneparser-tng.hh"
1d5b3ce6 69#include "rec_channel.hh"
aaacf7f2 70#include "logger.hh"
c8ddb7c2 71#include "iputils.hh"
09e6702a 72#include "mplexer.hh"
c038218b 73#include "config.h"
808c5ef7 74#include "lua-recursor4.hh"
ba1a571d 75#include "version.hh"
79332bff 76#include "responsestats.hh"
d67620e4 77#include "secpoll-recursor.hh"
c5c066bf 78#include "dnsname.hh"
644dd1da 79#include "filterpo.hh"
80#include "rpzloader.hh"
b3f0ed10 81#include "validate-recursor.hh"
f3c18728 82#include "rec-lua-conf.hh"
5c3b5e7f 83#include "ednsoptions.hh"
f3c18728 84
aa7929a3
RG
85#ifdef HAVE_PROTOBUF
86#include <boost/uuid/uuid.hpp>
87#include <boost/uuid/uuid_generators.hpp>
aa7929a3
RG
88#include "dnsmessage.pb.h"
89#endif
90
a2bfc3ff
BH
91#ifndef RECURSOR
92#include "statbag.hh"
93StatBag S;
94#endif
f3c18728 95
bb4bdbaf 96__thread FDMultiplexer* t_fdm;
674cf0f6 97__thread unsigned int t_id;
09e6702a 98unsigned int g_maxTCPPerClient;
5b0ddd18 99unsigned int g_networkTimeoutMsec;
08f3f638 100uint64_t g_latencyStatSize;
09e6702a 101bool g_logCommonErrors;
e661a20b 102bool g_anyToTcp;
b33c2462 103uint16_t g_udpTruncationThreshold, g_outgoingEDNSBufsize;
a3e7b735 104__thread shared_ptr<RecursorLua4>* t_pdl;
b3adda56 105bool g_lowercaseOutgoing;
60c8afa8 106
107__thread addrringbuf_t* t_remotes, *t_servfailremotes, *t_largeanswerremotes;
108
c5c066bf 109__thread boost::circular_buffer<pair<DNSName, uint16_t> >* t_queryring, *t_servfailqueryring;
77499b05 110__thread shared_ptr<Regex>* t_traceRegex;
674cf0f6 111
aa7929a3
RG
112#ifdef HAVE_PROTOBUF
113__thread boost::uuids::random_generator* t_uuidGenerator;
114#endif
115
376effcf 116NetmaskGroup g_ednssubnets;
117SuffixMatchNode g_ednsdomains;
118
d7dae798
BH
119RecursorControlChannel s_rcc; // only active in thread 0
120
121// for communicating with our threads
49a699c4
BH
122struct ThreadPipeSet
123{
124 int writeToThread;
125 int readToThread;
126 int writeFromThread;
127 int readFromThread;
128};
3ea54bf0 129
d7dae798 130vector<ThreadPipeSet> g_pipes; // effectively readonly after startup
5c633640 131
d7dae798 132SyncRes::domainmap_t* g_initialDomainMap; // new threads needs this to be setup
49a699c4
BH
133
134#include "namespaces.hh"
3ea54bf0 135
49a699c4 136__thread MemRecursorCache* t_RC;
16beeaa4 137__thread RecursorPacketCache* t_packetCache;
1d5b3ce6
BH
138RecursorStats g_stats;
139bool g_quiet;
49a699c4 140
1bc3c142
BH
141bool g_weDistributeQueries; // if true, only 1 thread listens on the incoming query sockets
142
41942bb3 143__thread NetmaskGroup* t_allowFrom;
49a699c4
BH
144static NetmaskGroup* g_initialAllowFrom; // new thread needs to be setup with this
145
eb5bae86 146NetmaskGroup* g_dontQuery;
2d733c0f 147string s_programname="pdns_recursor";
49a699c4 148
40a3dd64
BH
149typedef vector<int> tcpListenSockets_t;
150tcpListenSockets_t g_tcpListenSockets; // shared across threads, but this is fine, never written to from a thread. All threads listen on all sockets
3159c9ef 151int g_tcpTimeout;
85c32340 152unsigned int g_maxMThreads;
183eb877 153__thread struct timeval g_now; // timestamp, updated (too) frequently
84433b79 154typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
155listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
cbc03320 156set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
18af64a8 157
d7dae798
BH
158__thread MT_t* MT; // the big MTasker
159
f8f243b0 160unsigned int g_numThreads, g_numWorkerThreads;
c3828c03 161
12cd44ee 162#define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
163// Bad Nets taken from both:
3ddb9247 164// http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
12cd44ee 165// and
166// http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
167// where such a network may not be considered a valid destination
168#define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
169#define DONT_QUERY LOCAL_NETS ", " BAD_NETS
49a699c4 170
d7dae798 171//! used to send information to a newborn mthread
ea634573 172struct DNSComboWriter {
3ddb9247 173 DNSComboWriter(const char* data, uint16_t len, const struct timeval& now) : d_mdp(data, len), d_now(now),
232f0877 174 d_tcp(false), d_socket(-1)
ea634573
BH
175 {}
176 MOADNSParser d_mdp;
00c9b8c1 177 void setRemote(const ComboAddress* sa)
ea634573 178 {
37d3f960 179 d_remote=*sa;
ea634573
BH
180 }
181
b71b60ee 182 void setLocal(const ComboAddress& sa)
183 {
184 d_local=sa;
185 }
186
187
ea634573
BH
188 void setSocket(int sock)
189 {
190 d_socket=sock;
191 }
a1754c6a
BH
192
193 string getRemote() const
194 {
37d3f960 195 return d_remote.toString();
a1754c6a
BH
196 }
197
c9e9e5e0 198 struct timeval d_now;
b71b60ee 199 ComboAddress d_remote, d_local;
aa7929a3
RG
200#ifdef HAVE_PROTOBUF
201 boost::uuids::uuid d_uuid;
e824728a 202 Netmask ednssubnet;
aa7929a3 203#endif
ea634573
BH
204 bool d_tcp;
205 int d_socket;
a82f68f0 206 int d_tag{0};
49a3500d 207 string d_query;
cd989c87 208 shared_ptr<TCPConnection> d_tcpConnection;
e8340d27 209 vector<pair<uint16_t, string> > d_ednsOpts;
ea634573
BH
210};
211
212
288f4aa9
BH
213ArgvMap &arg()
214{
215 static ArgvMap theArg;
216 return theArg;
217}
4ef015cd 218
09e6702a 219
d8f6d49f 220void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 221
50c81227 222// -1 is error, 0 is timeout, 1 is success
3ddb9247 223int asendtcp(const string& data, Socket* sock)
5c633640
BH
224{
225 PacketID pident;
226 pident.sock=sock;
227 pident.outMSG=data;
3ddb9247 228
bb4bdbaf 229 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
50c81227 230 string packet;
5c633640 231
5b0ddd18 232 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
23db0a09 233
9170fbaf 234 if(!ret || ret==-1) { // timeout
bb4bdbaf 235 t_fdm->removeWriteFD(sock->getHandle());
5c633640 236 }
50c81227
BH
237 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
238 return -1;
239 }
9170fbaf 240 return ret;
5c633640
BH
241}
242
d8f6d49f 243void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 244
9170fbaf 245// -1 is error, 0 is timeout, 1 is success
a683e8bd 246int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
288f4aa9 247{
50c81227 248 data.clear();
5c633640
BH
249 PacketID pident;
250 pident.sock=sock;
251 pident.inNeeded=len;
825fa717 252 pident.inIncompleteOkay=incompleteOkay;
bb4bdbaf 253 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
5c633640 254
bb4bdbaf 255 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
9170fbaf 256 if(!ret || ret==-1) { // timeout
bb4bdbaf 257 t_fdm->removeReadFD(sock->getHandle());
288f4aa9 258 }
50c81227
BH
259 else if(data.empty()) {// error, EOF or other
260 return -1;
261 }
262
9170fbaf 263 return ret;
288f4aa9
BH
264}
265
fba1e944 266void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
4465e941 267{
fba1e944 268 PacketID pident=*any_cast<PacketID>(&var);
4465e941 269 char resp[512];
a683e8bd 270 ssize_t ret=recv(fd, resp, sizeof(resp), 0);
4465e941 271 t_fdm->removeReadFD(fd);
272 if(ret >= 0) {
a683e8bd 273 string data(resp, (size_t) ret);
fba1e944 274 MT->sendEvent(pident, &data);
4465e941 275 }
276 else {
fba1e944 277 string empty;
278 MT->sendEvent(pident, &empty);
279 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
4465e941 280 }
281}
fba1e944 282string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
4465e941 283{
4465e941 284 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
285 s.setNonBlocking();
286 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
287
288 s.bind(local);
289 s.connect(dest);
4465e941 290 s.send(query);
291
292 PacketID pident;
293 pident.sock=&s;
294 pident.type=0;
fba1e944 295 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
4465e941 296
297 string data;
fba1e944 298
4465e941 299 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
fba1e944 300
4465e941 301 if(!ret || ret==-1) { // timeout
4465e941 302 t_fdm->removeReadFD(s.getHandle());
303 }
304 else if(data.empty()) {// error, EOF or other
fba1e944 305 // we could special case this
4465e941 306 return data;
307 }
4465e941 308 return data;
309}
310
311
3ddb9247 312vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
046c5a5d 313const ComboAddress g_local4("0.0.0.0"), g_local6("::");
1652a63e 314
d7dae798 315//! pick a random query local address
1652a63e 316ComboAddress getQueryLocalAddress(int family, uint16_t port)
5a38281c 317{
1652a63e 318 ComboAddress ret;
5a38281c 319 if(family==AF_INET) {
3ddb9247 320 if(g_localQueryAddresses4.empty())
1652a63e 321 ret = g_local4;
3ddb9247 322 else
1652a63e
BH
323 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
324 ret.sin4.sin_port = htons(port);
5a38281c
BH
325 }
326 else {
327 if(g_localQueryAddresses6.empty())
1652a63e
BH
328 ret = g_local6;
329 else
330 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
3ddb9247 331
1652a63e 332 ret.sin6.sin6_port = htons(port);
5a38281c 333 }
1652a63e 334 return ret;
5a38281c 335}
4ef015cd 336
d8f6d49f 337void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
09e6702a 338
d7dae798
BH
339void setSocketBuffer(int fd, int optname, uint32_t size)
340{
341 uint32_t psize=0;
342 socklen_t len=sizeof(psize);
3ddb9247 343
d7dae798
BH
344 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
345 L<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
3ddb9247 346 return;
d7dae798
BH
347 }
348
349 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
c057bfaa 350 L<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
d7dae798
BH
351}
352
353
354static void setSocketReceiveBuffer(int fd, uint32_t size)
355{
356 setSocketBuffer(fd, SO_RCVBUF, size);
357}
358
359static void setSocketSendBuffer(int fd, uint32_t size)
360{
361 setSocketBuffer(fd, SO_SNDBUF, size);
362}
363
364
4ef015cd
BH
365// you can ask this class for a UDP socket to send a query from
366// this socket is not yours, don't even think about deleting it
367// but after you call 'returnSocket' on it, don't assume anything anymore
368class UDPClientSocks
369{
4ef015cd 370 unsigned int d_numsocks;
4ef015cd 371public:
e2642526 372 UDPClientSocks() : d_numsocks(0)
4ef015cd
BH
373 {
374 }
375
996c89cc 376 typedef set<int> socks_t;
4ef015cd
BH
377 socks_t d_socks;
378
2ee280cf 379 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
d8f6d49f 380 int getSocket(const ComboAddress& toaddr, int* fd)
4ef015cd 381 {
d8f6d49f
BH
382 *fd=makeClientSocket(toaddr.sin4.sin_family);
383 if(*fd < 0) // temporary error - receive exception otherwise
2ee280cf 384 return -2;
d8f6d49f
BH
385
386 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
387 int err = errno;
41ff43f8 388 // returnSocket(*fd);
3897b9e1 389 closesocket(*fd);
d8f6d49f 390 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
4957a608 391 return -2;
998a4334 392 return -1;
d8f6d49f 393 }
998a4334 394
d8f6d49f 395 d_socks.insert(*fd);
998a4334 396 d_numsocks++;
d8f6d49f 397 return 0;
4ef015cd
BH
398 }
399
095c3045
BH
400 void returnSocket(int fd)
401 {
402 socks_t::iterator i=d_socks.find(fd);
34801ab1 403 if(i==d_socks.end()) {
335da0ba 404 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd)+") not in the pool");
34801ab1 405 }
bb4bdbaf 406 returnSocketLocked(i);
095c3045
BH
407 }
408
4ef015cd 409 // return a socket to the pool, or simply erase it
bb4bdbaf 410 void returnSocketLocked(socks_t::iterator& i)
4ef015cd 411 {
600fc20b 412 if(i==d_socks.end()) {
3f81d239 413 throw PDNSException("Trying to return a socket not in the pool");
600fc20b 414 }
80baf329 415 try {
bb4bdbaf 416 t_fdm->removeReadFD(*i);
80baf329
BH
417 }
418 catch(FDMultiplexerException& e) {
bb4bdbaf 419 // we sometimes return a socket that has not yet been assigned to t_fdm
80baf329 420 }
3897b9e1 421 closesocket(*i);
3ddb9247 422
998a4334
BH
423 d_socks.erase(i++);
424 --d_numsocks;
4ef015cd 425 }
d8f6d49f
BH
426
427 // returns -1 for errors which might go away, throws for ones that won't
bb4bdbaf 428 static int makeClientSocket(int family)
d8f6d49f 429 {
a683e8bd 430 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
42c235e5 431
d8f6d49f
BH
432 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
433 return ret;
3ddb9247
PD
434
435 if(ret<0)
335da0ba 436 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
36855b53 437
7eb73ffa 438 // setCloseOnExec(ret); // we're not going to exec
5a38281c 439
d8f6d49f 440 int tries=10;
3aa91c3e 441 ComboAddress sin;
d8f6d49f 442 while(--tries) {
1652a63e 443 uint16_t port;
3ddb9247 444
d8f6d49f 445 if(tries==1) // fall back to kernel 'random'
4957a608 446 port = 0;
1652a63e
BH
447 else
448 port = 1025 + dns_random(64510);
5a38281c 449
3aa91c3e 450 sin=getQueryLocalAddress(family, port); // does htons for us
5a38281c 451
3ddb9247 452 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
4957a608 453 break;
d8f6d49f
BH
454 }
455 if(!tries)
3aa91c3e 456 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
3ddb9247 457
3897b9e1 458 setNonBlocking(ret);
d8f6d49f
BH
459 return ret;
460 }
49a699c4
BH
461};
462
463static __thread UDPClientSocks* t_udpclientsocks;
4ef015cd 464
288f4aa9 465/* these two functions are used by LWRes */
34801ab1 466// -2 is OS error, -1 is error that depends on the remote, > 0 is success
a683e8bd 467int asendto(const char *data, size_t len, int flags,
3ddb9247 468 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
288f4aa9 469{
34801ab1
BH
470
471 PacketID pident;
787e5eab
BH
472 pident.domain = domain;
473 pident.remote = toaddr;
474 pident.type = qtype;
34801ab1
BH
475
476 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
477 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
478
479 for(; chain.first != chain.second; chain.first++) {
480 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
e27e91a8 481 /*
4665c31e
BH
482 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
483 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
4957a608 484 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
e27e91a8 485 */
34801ab1
BH
486 chain.first->key.chain.insert(id); // we can chain
487 *fd=-1; // gets used in waitEvent / sendEvent later on
488 return 1;
489 }
490 }
491
49a699c4 492 int ret=t_udpclientsocks->getSocket(toaddr, fd);
d8f6d49f
BH
493 if(ret < 0)
494 return ret;
34801ab1 495
998a4334
BH
496 pident.fd=*fd;
497 pident.id=id;
3ddb9247 498
bb4bdbaf
BH
499 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
500 ret = send(*fd, data, len, 0);
501
5b0ddd18 502 int tmp = errno;
bb4bdbaf 503
7302ed0a 504 if(ret < 0)
49a699c4 505 t_udpclientsocks->returnSocket(*fd);
bb4bdbaf 506
5b0ddd18 507 errno = tmp; // this is for logging purposes only
7302ed0a 508 return ret;
288f4aa9
BH
509}
510
9170fbaf 511// -1 is error, 0 is timeout, 1 is success
a683e8bd 512int arecvfrom(char *data, size_t len, int flags, const ComboAddress& fromaddr, size_t *d_len,
c5c066bf 513 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
288f4aa9 514{
0d5f0a9f 515 static optional<unsigned int> nearMissLimit;
3ddb9247 516 if(!nearMissLimit)
0d5f0a9f
BH
517 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
518
288f4aa9 519 PacketID pident;
4ef015cd 520 pident.fd=fd;
288f4aa9 521 pident.id=id;
0d5f0a9f 522 pident.domain=domain;
787e5eab 523 pident.type = qtype;
996c89cc 524 pident.remote=fromaddr;
b636533b 525
288f4aa9 526 string packet;
5b0ddd18 527 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
34801ab1 528
9170fbaf 529 if(ret > 0) {
996c89cc 530 if(packet.empty()) // means "error"
3ddb9247 531 return -1;
998a4334 532
a683e8bd 533 *d_len=packet.size();
9170fbaf 534 memcpy(data,packet.c_str(),min(len,*d_len));
0d5f0a9f 535 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
996c89cc 536 L<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
0d5f0a9f 537 g_stats.spoofCount++;
35ce8576
BH
538 return -1;
539 }
288f4aa9 540 }
09e6702a 541 else {
34801ab1 542 if(fd >= 0)
49a699c4 543 t_udpclientsocks->returnSocket(fd);
09e6702a 544 }
9170fbaf 545 return ret;
288f4aa9
BH
546}
547
aa4e4cbf 548
87a5ea63 549string s_pidfname;
88def049
BH
550static void writePid(void)
551{
191f2e47 552 if(!::arg().mustDo("write-pid"))
553 return;
18e7758c 554 ofstream of(s_pidfname.c_str(), std::ios_base::app);
88def049 555 if(of)
705f31ae 556 of<< Utility::getpid() <<endl;
88def049 557 else
c057bfaa 558 L<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
88def049
BH
559}
560
bd0289fc
BH
561typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
562tcpClientCounts_t __thread* t_tcpClientCounts;
0e9d9ce2 563
cd989c87 564TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : d_remote(addr), d_fd(fd)
3ddb9247
PD
565{
566 ++s_currentConnections;
cd989c87 567 (*t_tcpClientCounts)[d_remote]++;
0e408828 568}
cd989c87
BH
569
570TCPConnection::~TCPConnection()
0e408828 571{
3ddb9247 572 if(closesocket(d_fd) < 0)
cd989c87 573 unixDie("closing socket for TCPConnection");
3ddb9247 574 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
cd989c87 575 t_tcpClientCounts->erase(d_remote);
1bc9e6bd 576 --s_currentConnections;
0e408828 577}
0e9d9ce2 578
3ddb9247 579AtomicCounter TCPConnection::s_currentConnections;
d8f6d49f 580void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
6dcd28c3 581
92011b8f 582// the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
c5c066bf 583void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
2cc13433 584{
92011b8f 585 if(packetsize > 1000 && t_largeanswerremotes)
586 t_largeanswerremotes->push_back(remote);
2cc13433
BH
587 switch(res) {
588 case RCode::ServFail:
92011b8f 589 if(t_servfailremotes) {
590 t_servfailremotes->push_back(remote);
591 if(query) // packet cache
592 t_servfailqueryring->push_back(make_pair(*query, qtype));
593 }
2cc13433
BH
594 g_stats.servFails++;
595 break;
596 case RCode::NXDomain:
597 g_stats.nxDomains++;
598 break;
599 case RCode::NoError:
600 g_stats.noErrors++;
601 break;
602 }
603}
604
a903b39c 605static string makeLoginfo(DNSComboWriter* dc)
606try
607{
c5c066bf 608 return "("+dc->d_mdp.d_qname.toString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->d_remote.toString())+")";
a903b39c 609}
610catch(...)
611{
612 return "Exception making error message for exception";
613}
614
aa7929a3
RG
615#ifdef HAVE_PROTOBUF
616static void protobufFillMessageFromDC(PBDNSMessage& message, const DNSComboWriter* dc)
617{
ec469dd7
RG
618 std::string* messageId = message.mutable_messageid();
619 messageId->resize(dc->d_uuid.size());
620 std::copy(dc->d_uuid.begin(), dc->d_uuid.end(), messageId->begin());
621
aa7929a3
RG
622 message.set_socketfamily(dc->d_remote.sin4.sin_family == AF_INET ? PBDNSMessage_SocketFamily_INET : PBDNSMessage_SocketFamily_INET6);
623 message.set_socketprotocol(dc->d_tcp ? PBDNSMessage_SocketProtocol_TCP : PBDNSMessage_SocketProtocol_UDP);
624 if (dc->d_local.sin4.sin_family == AF_INET) {
625 message.set_to(&dc->d_local.sin4.sin_addr.s_addr, sizeof(dc->d_local.sin4.sin_addr.s_addr));
626 }
627 else if (dc->d_local.sin4.sin_family == AF_INET6) {
628 message.set_to(&dc->d_local.sin6.sin6_addr.s6_addr, sizeof(dc->d_local.sin6.sin6_addr.s6_addr));
629 }
630 if (dc->d_remote.sin4.sin_family == AF_INET) {
631 message.set_from(&dc->d_remote.sin4.sin_addr.s_addr, sizeof(dc->d_remote.sin4.sin_addr.s_addr));
632 }
633 else if (dc->d_remote.sin4.sin_family == AF_INET6) {
634 message.set_from(&dc->d_remote.sin6.sin6_addr.s6_addr, sizeof(dc->d_remote.sin6.sin6_addr.s6_addr));
635 }
e824728a
RG
636 if (!dc->ednssubnet.empty()) {
637 const ComboAddress ca = dc->ednssubnet.getNetwork();
638 if (ca.sin4.sin_family == AF_INET) {
639 message.set_originalrequestorsubnet(&ca.sin4.sin_addr.s_addr, sizeof(ca.sin4.sin_addr.s_addr));
640 }
641 else if (ca.sin4.sin_family == AF_INET6) {
642 message.set_originalrequestorsubnet(&ca.sin6.sin6_addr.s6_addr, sizeof(ca.sin6.sin6_addr.s6_addr));
643 }
644 }
645
aa7929a3
RG
646 struct timespec ts;
647 clock_gettime(CLOCK_REALTIME, &ts);
648 message.set_timesec(ts.tv_sec);
649 message.set_timeusec(ts.tv_nsec / 1000);
650 message.set_id(ntohs(dc->d_mdp.d_header.id));
ec469dd7
RG
651
652 PBDNSMessage_DNSQuestion* question = message.mutable_question();
653 question->set_qname(dc->d_mdp.d_qname.toString());
654 question->set_qtype(dc->d_mdp.d_qtype);
655 question->set_qclass(dc->d_mdp.d_qclass);
aa7929a3
RG
656}
657
658static void protobufLogQuery(const std::shared_ptr<RemoteLogger>& logger, const DNSComboWriter* dc)
659{
660 PBDNSMessage message;
661 message.set_type(PBDNSMessage_Type_DNSQueryType);
662 message.set_inbytes(dc->d_query.length());
663 protobufFillMessageFromDC(message, dc);
664
50e8601e 665// cerr <<message.DebugString()<<endl;
aa7929a3
RG
666 std::string str;
667 message.SerializeToString(&str);
668 logger->queueData(str);
669 message.release_question();
670}
671
672static void protobufLogResponse(const std::shared_ptr<RemoteLogger>& logger, const DNSComboWriter* dc, size_t responseSize, PBDNSMessage_DNSResponse& protobufResponse)
673{
674 PBDNSMessage message;
675 message.set_type(PBDNSMessage_Type_DNSResponseType);
676 message.set_inbytes(responseSize);
677 protobufFillMessageFromDC(message, dc);
678
679 message.set_allocated_response(&protobufResponse);
680
50e8601e 681// cerr <<message.DebugString()<<endl;
aa7929a3
RG
682 std::string str;
683 message.SerializeToString(&str);
684 logger->queueData(str);
685 message.release_response();
686}
687#endif
688
288f4aa9
BH
689void startDoResolve(void *p)
690{
7b1469bb 691 DNSComboWriter* dc=(DNSComboWriter *)p;
288f4aa9 692 try {
92011b8f 693 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
694
b18ace73 695 uint32_t maxanswersize= dc->d_tcp ? 65535 : min((uint16_t) 512, g_udpTruncationThreshold);
7f7b8d55 696 EDNSOpts edo;
8e079f3a 697 bool haveEDNS=false;
698 if(getEDNSOpts(dc->d_mdp, &edo)) {
699 if(!dc->d_tcp)
700 maxanswersize = min(edo.d_packetsize, g_udpTruncationThreshold);
e8340d27 701 dc->d_ednsOpts = edo.d_options;
8e079f3a 702 haveEDNS=true;
10321a98 703 }
e325f20c 704 vector<DNSRecord> ret;
ea634573 705 vector<uint8_t> packet;
b23b8614 706
ad42489c 707 auto luaconfsLocal = g_luaconfs.getLocal();
83971888 708 std::string appliedPolicy;
aa7929a3
RG
709#ifdef HAVE_PROTOBUF
710 PBDNSMessage_DNSResponse protobufResponse;
711 if(luaconfsLocal->protobufServer) {
712 protobufLogQuery(luaconfsLocal->protobufServer, dc);
713 }
714#endif
ad42489c 715
3ddb9247 716 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
ea634573
BH
717
718 pw.getHeader()->aa=0;
719 pw.getHeader()->ra=1;
c154c8a4 720 pw.getHeader()->qr=1;
bb4bdbaf 721 pw.getHeader()->tc=0;
ea634573 722 pw.getHeader()->id=dc->d_mdp.d_header.id;
10321a98 723 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
57769f13 724 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
ea634573 725
1059837e 726 // DO NOT MOVE THIS CODE UP - DNSPacketWriter needs to get the original-cased version
b3adda56
PD
727 if (g_lowercaseOutgoing)
728 dc->d_mdp.d_qname = DNSName(toLower(dc->d_mdp.d_qname.toString()));
729
904d3219
PD
730 uint32_t minTTL=std::numeric_limits<uint32_t>::max();
731
732 SyncRes sr(dc->d_now);
2e921ec6 733 bool DNSSECOK=false;
3457a2a0 734 if(t_pdl) {
735 sr.setLuaEngine(*t_pdl);
4ea94941 736 sr.d_requestor=dc->d_remote;
3457a2a0 737 }
2e921ec6 738
9eec8c98 739 if(g_dnssecmode != DNSSECMode::Off) {
2e921ec6 740 sr.d_doDNSSEC=true;
9eec8c98
PL
741
742 // Does the requestor want DNSSEC records?
743 if(edo.d_Z & EDNSOpts::DNSSECOK) {
744 DNSSECOK=true;
745 g_stats.dnssecQueries++;
746 }
747 } else {
748 // Ignore the client-set CD flag
749 pw.getHeader()->cd=0;
5b9853c9 750 }
57769f13 751
904d3219
PD
752 bool tracedQuery=false; // we could consider letting Lua know about this too
753 bool variableAnswer = false;
754
56b4d21b 755 int res;
39ec5d29 756 DNSFilterEngine::Policy dfepol;
757 DNSRecord spoofed;
e661a20b 758 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
56b4d21b
PD
759 pw.getHeader()->tc = 1;
760 res = 0;
761 variableAnswer = true;
e661a20b
PD
762 goto sendit;
763 }
764
c5c066bf 765 if(t_traceRegex->get() && (*t_traceRegex)->match(dc->d_mdp.d_qname.toString())) {
77499b05
BH
766 sr.setLogMode(SyncRes::Store);
767 tracedQuery=true;
768 }
3ddb9247 769
8f7473d7 770
976ec823 771 if(!g_quiet || tracedQuery) {
461df9d2 772 L<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
976ec823 773 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
969cbf08 774#ifdef HAVE_PROTOBUF
976ec823 775 if(!dc->ednssubnet.empty()) {
776 L<<" (ecs "<<dc->ednssubnet.toString()<<")";
777 }
969cbf08 778#endif
976ec823 779 L<<endl;
780 }
c75a6a9e 781
fededf47 782 sr.setId(MT->getTid());
67828389 783 if(!dc->d_mdp.d_header.rd)
c836dc19
BH
784 sr.setCacheOnly();
785
84433b79 786
3ddb9247 787 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
e325f20c 788
ad42489c 789 dfepol = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_remote);
39ec5d29 790
791 switch(dfepol.d_kind) {
792 case DNSFilterEngine::PolicyKind::NoAction:
644dd1da 793 break;
39ec5d29 794 case DNSFilterEngine::PolicyKind::Drop:
644dd1da 795 g_stats.policyDrops++;
796 delete dc;
797 dc=0;
798 return;
39ec5d29 799 case DNSFilterEngine::PolicyKind::NXDOMAIN:
644dd1da 800 res=RCode::NXDomain;
83971888 801 appliedPolicy=dfepol.d_name;
644dd1da 802 goto haveAnswer;
803
39ec5d29 804 case DNSFilterEngine::PolicyKind::NODATA:
805 res=RCode::NoError;
83971888 806 appliedPolicy=dfepol.d_name;
39ec5d29 807 goto haveAnswer;
808
809 case DNSFilterEngine::PolicyKind::Custom:
644dd1da 810 res=RCode::NoError;
39ec5d29 811 spoofed.d_name=dc->d_mdp.d_qname;
5a1f298f 812 spoofed.d_type=dfepol.d_custom->getType();
3876ee44 813 spoofed.d_ttl = dfepol.d_ttl;
39ec5d29 814 spoofed.d_class = 1;
815 spoofed.d_content = dfepol.d_custom;
589ad24b 816 spoofed.d_place = DNSResourceRecord::ANSWER;
39ec5d29 817 ret.push_back(spoofed);
83971888 818 appliedPolicy=dfepol.d_name;
644dd1da 819 goto haveAnswer;
820
39ec5d29 821
822 case DNSFilterEngine::PolicyKind::Truncate:
644dd1da 823 if(!dc->d_tcp) {
824 res=RCode::NoError;
825 pw.getHeader()->tc=1;
83971888 826 appliedPolicy=dfepol.d_name;
644dd1da 827 goto haveAnswer;
828 }
829 break;
830 }
831
808c5ef7 832
ba461517 833 if(!t_pdl->get() || !(*t_pdl)->preresolve(dc->d_remote, dc->d_local, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret, dc->d_ednsOpts.empty() ? 0 : &dc->d_ednsOpts, dc->d_tag, res, &variableAnswer)) {
44971ca0
PD
834 try {
835 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
836 }
837 catch(ImmediateServFailException &e) {
854d44e3 838 if(g_logCommonErrors)
839 L<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
44971ca0
PD
840 res = RCode::ServFail;
841 }
4485aa35 842
ad42489c 843 dfepol = luaconfsLocal->dfe.getPostPolicy(ret);
39ec5d29 844 switch(dfepol.d_kind) {
845 case DNSFilterEngine::PolicyKind::NoAction:
644dd1da 846 break;
39ec5d29 847 case DNSFilterEngine::PolicyKind::Drop:
644dd1da 848 g_stats.policyDrops++;
849 delete dc;
850 dc=0;
851 return;
39ec5d29 852 case DNSFilterEngine::PolicyKind::NXDOMAIN:
644dd1da 853 ret.clear();
854 res=RCode::NXDomain;
83971888 855 appliedPolicy=dfepol.d_name;
644dd1da 856 goto haveAnswer;
857
39ec5d29 858 case DNSFilterEngine::PolicyKind::NODATA:
644dd1da 859 ret.clear();
860 res=RCode::NoError;
83971888 861 appliedPolicy=dfepol.d_name;
644dd1da 862 goto haveAnswer;
863
39ec5d29 864 case DNSFilterEngine::PolicyKind::Truncate:
644dd1da 865 if(!dc->d_tcp) {
866 ret.clear();
867 res=RCode::NoError;
868 pw.getHeader()->tc=1;
83971888 869 appliedPolicy=dfepol.d_name;
644dd1da 870 goto haveAnswer;
871 }
872 break;
39ec5d29 873
874 case DNSFilterEngine::PolicyKind::Custom:
ad42489c 875 ret.clear();
39ec5d29 876 res=RCode::NoError;
877 spoofed.d_name=dc->d_mdp.d_qname;
5a1f298f 878 spoofed.d_type=dfepol.d_custom->getType();
3876ee44 879 spoofed.d_ttl = dfepol.d_ttl;
39ec5d29 880 spoofed.d_class = 1;
881 spoofed.d_content = dfepol.d_custom;
589ad24b 882 spoofed.d_place = DNSResourceRecord::ANSWER;
39ec5d29 883 ret.push_back(spoofed);
83971888 884 appliedPolicy=dfepol.d_name;
39ec5d29 885 goto haveAnswer;
644dd1da 886 }
a3e7b735 887
674cf0f6 888 if(t_pdl->get()) {
bd53ea9d 889 if(res == RCode::NoError) {
e325f20c 890 auto i=ret.cbegin();
891 for(; i!= ret.cend(); ++i)
e693ff5a 892 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
232f0877 893 break;
e325f20c 894 if(i == ret.cend())
a6147cd2 895 (*t_pdl)->nodata(dc->d_remote, dc->d_local, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret, res, &variableAnswer);
a3e7b735 896 }
897 else if(res == RCode::NXDomain)
a6147cd2 898 (*t_pdl)->nxdomain(dc->d_remote, dc->d_local, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret, res, &variableAnswer);
644dd1da 899
a3e7b735 900
a6147cd2 901 (*t_pdl)->postresolve(dc->d_remote, dc->d_local, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret, res, &variableAnswer);
93f0da94 902
d2322a5e 903 }
4485aa35 904 }
644dd1da 905 haveAnswer:;
3e8216c8 906 if(res == PolicyDecision::DROP) {
e9c2ad3a 907 g_stats.policyDrops++;
ae7e77ad 908 delete dc;
909 dc=0;
910 return;
3ddb9247 911 }
3e8216c8 912 if(tracedQuery || res == PolicyDecision::PASS || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1dc8f4d0 913 {
85ffbc53
PD
914 string trace(sr.getTrace());
915 if(!trace.empty()) {
916 vector<string> lines;
917 boost::split(lines, trace, boost::is_any_of("\n"));
1dc8f4d0 918 for(const string& line : lines) {
85ffbc53
PD
919 if(!line.empty())
920 L<<Logger::Warning<< line << endl;
921 }
922 }
923 }
3ddb9247 924
b3f0ed10 925 if(res == PolicyDecision::PASS) { // XXX what does this MEAN? Why servfail on PASS?
0fe1d080
PD
926 pw.getHeader()->rcode=RCode::ServFail;
927 // no commit here, because no record
928 g_stats.servFails++;
929 }
288f4aa9 930 else {
ea634573 931 pw.getHeader()->rcode=res;
92011b8f 932
f3fe4ae6
PL
933 // Does the validation mode or query demand validation?
934 if(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || (dc->d_mdp.d_header.ad && g_dnssecmode==DNSSECMode::Process)) {
935 if(sr.doLog()) {
936 L<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<" for "<<dc->d_remote.toStringWithPort()<<endl;
937 }
938
939 auto state=validateRecords(ret);
940 if(state == Secure) {
2e921ec6 941 if(sr.doLog()) {
f3fe4ae6
PL
942 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<" for "<<dc->d_remote.toStringWithPort()<<" validates correctly"<<endl;
943 }
944
945 // Is the query source interested in the value of the ad-bit?
946 if (dc->d_mdp.d_header.ad)
947 pw.getHeader()->ad=1;
948 }
949 else if(state == Insecure) {
950 if(sr.doLog()) {
951 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<" for "<<dc->d_remote.toStringWithPort()<<" validates as Insecure"<<endl;
952 }
953
954 pw.getHeader()->ad=0;
955 }
956 else if(state == Bogus) {
957 if(sr.doLog()) {
958 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<" for "<<dc->d_remote.toStringWithPort()<<" validates as Bogus"<<endl;
2e921ec6 959 }
2e921ec6 960
f3fe4ae6
PL
961 // Does the query or validation mode sending out a SERVFAIL on validation errors?
962 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || (dc->d_mdp.d_header.ad && g_dnssecmode != DNSSECMode::Off))) {
2e921ec6 963 if(sr.doLog()) {
f3fe4ae6 964 L<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<" because recursor or query demands it for Bogus results"<<endl;
2e921ec6 965 }
2e921ec6 966
f3fe4ae6
PL
967 pw.getHeader()->rcode=RCode::ServFail;
968 goto sendit;
969 } else {
970 if(sr.doLog()) {
971 L<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<" Bogus validation since neither config nor query demands this"<<endl;
12ce523e 972 }
f3fe4ae6
PL
973 }
974 }
b3f0ed10 975 }
976
8e079f3a 977
978
c154c8a4 979 if(ret.size()) {
92476c8b 980 orderAndShuffle(ret);
ad42489c 981 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_remote)) {
3e61e7f7 982 sort(ret.begin(), ret.end(), *sl);
983 variableAnswer=true;
984 }
8e079f3a 985 }
986 if(haveEDNS) {
987 ret.push_back(makeOpt(edo.d_packetsize, 0, edo.d_Z));
988 }
989
990 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
2e921ec6 991 if(!DNSSECOK && (i->d_type == QType::RRSIG || i->d_type==QType::NSEC || i->d_type==QType::NSEC3))
992 continue;
8e079f3a 993 pw.startRecord(i->d_name, i->d_type, i->d_ttl, i->d_class, i->d_place);
994 if(i->d_type != QType::OPT) // their TTL ain't real
995 minTTL = min(minTTL, i->d_ttl);
996 i->d_content->toPacket(pw);
997 if(pw.size() > maxanswersize) {
998 pw.rollback();
999 if(i->d_place==DNSResourceRecord::ANSWER) // only truncate if we actually omitted parts of the answer
add935a2 1000 {
4957a608 1001 pw.getHeader()->tc=1;
add935a2
PD
1002 pw.truncate();
1003 }
8e079f3a 1004 goto sendit; // need to jump over pw.commit
1005 }
aa7929a3
RG
1006#ifdef HAVE_PROTOBUF
1007 if(luaconfsLocal->protobufServer && (i->d_type == QType::A || i->d_type == QType::AAAA)) {
1008 PBDNSMessage_DNSResponse_DNSRR* pbRR = protobufResponse.add_rrs();
1009 if(pbRR) {
1010 pbRR->set_name(i->d_name.toString());
1011 pbRR->set_type(i->d_type);
1012 pbRR->set_class_(i->d_class);
1013 pbRR->set_ttl(i->d_ttl);
1014 if (i->d_type == QType::A) {
1015 const ARecordContent& arc = dynamic_cast<const ARecordContent&>(*(i->d_content));
1016 ComboAddress data = arc.getCA();
1017 pbRR->set_rdata(&data.sin4.sin_addr.s_addr, sizeof(data.sin4.sin_addr.s_addr));
1018 }
1019 else if (i->d_type == QType::AAAA) {
1020 const AAAARecordContent& arc = dynamic_cast<const AAAARecordContent&>(*(i->d_content));
1021 ComboAddress data = arc.getCA();
1022 pbRR->set_rdata(&data.sin6.sin6_addr.s6_addr, sizeof(data.sin6.sin6_addr.s6_addr));
1023 }
1024 }
1025 }
1026#endif
ea634573 1027 }
8e079f3a 1028 if(ret.size())
1029 pw.commit();
288f4aa9 1030 }
10321a98 1031 sendit:;
b3f0ed10 1032
79332bff 1033 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
92011b8f 1034 updateResponseStats(res, dc->d_remote, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
aa7929a3
RG
1035#ifdef HAVE_PROTOBUF
1036 if (luaconfsLocal->protobufServer) {
1037 protobufResponse.set_rcode(pw.getHeader()->rcode);
83971888
RG
1038 if (!appliedPolicy.empty()) {
1039 protobufResponse.set_appliedpolicy(appliedPolicy);
1040 }
aa7929a3
RG
1041 protobufLogResponse(luaconfsLocal->protobufServer, dc, packet.size(), protobufResponse);
1042 }
1043#endif
ea634573 1044 if(!dc->d_tcp) {
b71b60ee 1045 struct msghdr msgh;
1046 struct iovec iov;
1047 char cbuf[256];
1048 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
cbc03320 1049 if(g_fromtosockets.count(dc->d_socket)) {
fbe2a2e0 1050 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
cbc03320 1051 } else
579cae19 1052 msgh.msg_control=NULL;
cbc03320 1053 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
1054 L<<Logger::Warning<<"Sending UDP reply to client "<<dc->d_remote.toStringWithPort()<<" failed with: "<<strerror(errno)<<endl;
3762e821 1055 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
76e2b9e3 1056
1057 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_query,
1058 string((const char*)&*packet.begin(), packet.size()),
3ddb9247 1059 g_now.tv_sec,
76e2b9e3 1060 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
1061 min(minTTL,SyncRes::s_packetcachettl));
1051f8a9 1062 }
3762e821 1063 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
feccc9fc 1064 }
9c495589
BH
1065 else {
1066 char buf[2];
ea634573
BH
1067 buf[0]=packet.size()/256;
1068 buf[1]=packet.size()%256;
feccc9fc 1069
c038218b 1070 Utility::iovec iov[2];
feccc9fc 1071
ea634573
BH
1072 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1073 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
feccc9fc 1074
c038218b 1075 int ret=Utility::writev(dc->d_socket, iov, 2);
0e9d9ce2 1076 bool hadError=true;
feccc9fc 1077
3ddb9247 1078 if(ret == 0)
18af64a8 1079 L<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
3ddb9247 1080 else if(ret < 0 )
18af64a8 1081 L<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
ea634573 1082 else if((unsigned int)ret != 2 + packet.size())
18af64a8 1083 L<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<ret<<")"<<endl;
0e9d9ce2 1084 else
18af64a8 1085 hadError=false;
3ddb9247 1086
09e6702a 1087 // update tcp connection status, either by closing or moving to 'BYTE0'
3ddb9247 1088
09e6702a 1089 if(hadError) {
18af64a8 1090 // no need to remove us from FDM, we weren't there
c36bc97a 1091 dc->d_socket = -1;
09e6702a 1092 }
a6ae6414 1093 else {
cd989c87 1094 dc->d_tcpConnection->state=TCPConnection::BYTE0;
18af64a8 1095 Utility::gettimeofday(&g_now, 0); // needs to be updated
cd989c87
BH
1096 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection);
1097 t_fdm->setReadTTD(dc->d_socket, g_now, g_tcpTimeout);
0e9d9ce2 1098 }
9c495589 1099 }
3ddb9247 1100
1d5b3ce6 1101 if(!g_quiet) {
461df9d2 1102 L<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
ea634573 1103 L<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
9de3e034 1104 sr.d_totUsec/1000.0<<" ms, "<<
1105 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<<res<<endl;
c75a6a9e 1106 }
b23b8614 1107
3ddb9247 1108 sr.d_outqueries ? t_RC->cacheMisses++ : t_RC->cacheHits++;
fe213470
BH
1109 float spent=makeFloat(sr.d_now-dc->d_now);
1110 if(spent < 0.001)
1111 g_stats.answers0_1++;
1112 else if(spent < 0.010)
1113 g_stats.answers1_10++;
1114 else if(spent < 0.1)
1115 g_stats.answers10_100++;
1116 else if(spent < 1.0)
1117 g_stats.answers100_1000++;
1118 else
1119 g_stats.answersSlow++;
1120
574af7ea 1121 uint64_t newLat=(uint64_t)(spent*1000000);
b841314c 1122 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
08f3f638 1123 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
0a6b1027 1124 // no worries, we do this for packet cache hits elsewhere
c6d04bdc 1125 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
ea634573 1126 delete dc;
c36bc97a 1127 dc=0;
288f4aa9 1128 }
3f81d239 1129 catch(PDNSException &ae) {
a903b39c 1130 L<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
c36bc97a 1131 delete dc;
288f4aa9 1132 }
7b1469bb 1133 catch(MOADNSException& e) {
a903b39c 1134 L<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<e.what()<<endl;
c36bc97a 1135 delete dc;
7b1469bb 1136 }
fdbf35ac 1137 catch(std::exception& e) {
a903b39c 1138 L<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what()<<endl;
c36bc97a 1139 delete dc;
c154c8a4 1140 }
288f4aa9 1141 catch(...) {
a903b39c 1142 L<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
288f4aa9 1143 }
3ddb9247 1144
ec6eacbc 1145 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
288f4aa9
BH
1146}
1147
677e2a46 1148void makeControlChannelSocket(int processNum=-1)
1d5b3ce6 1149{
2d733c0f 1150 string sockname=::arg()["socket-dir"]+"/"+s_programname;
677e2a46 1151 if(processNum >= 0)
335da0ba 1152 sockname += "."+std::to_string(processNum);
677e2a46 1153 sockname+=".controlsocket";
41f7a068 1154 s_rcc.listen(sockname);
3ddb9247 1155
387de317
BH
1156 int sockowner = -1;
1157 int sockgroup = -1;
1158
1159 if (!::arg().isEmpty("socket-group"))
1160 sockgroup=::arg().asGid("socket-group");
1161 if (!::arg().isEmpty("socket-owner"))
1162 sockowner=::arg().asUid("socket-owner");
3ddb9247 1163
f838ad8d
BH
1164 if (sockgroup > -1 || sockowner > -1) {
1165 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1166 unixDie("Failed to chown control socket");
1167 }
1168 }
387de317
BH
1169
1170 // do mode change if socket-mode is given
1171 if(!::arg().isEmpty("socket-mode")) {
1172 mode_t sockmode=::arg().asMode("socket-mode");
34c513f9
RG
1173 if(chmod(sockname.c_str(), sockmode) < 0) {
1174 unixDie("Failed to chmod control socket");
1175 }
387de317 1176 }
1d5b3ce6
BH
1177}
1178
d8f6d49f 1179void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1180{
cd989c87 1181 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
c038218b 1182
879b3f70 1183 if(conn->state==TCPConnection::BYTE0) {
b841314c 1184 ssize_t bytes=recv(conn->getFD(), conn->data, 2, 0);
09e6702a 1185 if(bytes==1)
667f7e60 1186 conn->state=TCPConnection::BYTE1;
3ddb9247 1187 if(bytes==2) {
a0aa4f64 1188 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
667f7e60
BH
1189 conn->bytesread=0;
1190 conn->state=TCPConnection::GETQUESTION;
09e6702a
BH
1191 }
1192 if(!bytes || bytes < 0) {
bb4bdbaf 1193 t_fdm->removeReadFD(fd);
09e6702a
BH
1194 return;
1195 }
1196 }
667f7e60 1197 else if(conn->state==TCPConnection::BYTE1) {
b841314c 1198 ssize_t bytes=recv(conn->getFD(), conn->data+1, 1, 0);
09e6702a 1199 if(bytes==1) {
667f7e60 1200 conn->state=TCPConnection::GETQUESTION;
a0aa4f64 1201 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
667f7e60 1202 conn->bytesread=0;
09e6702a
BH
1203 }
1204 if(!bytes || bytes < 0) {
1205 if(g_logCommonErrors)
cd989c87 1206 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected after first byte"<<endl;
bb4bdbaf 1207 t_fdm->removeReadFD(fd);
09e6702a
BH
1208 return;
1209 }
1210 }
667f7e60 1211 else if(conn->state==TCPConnection::GETQUESTION) {
b841314c 1212 ssize_t bytes=recv(conn->getFD(), conn->data + conn->bytesread, conn->qlen - conn->bytesread, 0);
f9d67b41 1213 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
cd989c87 1214 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected while reading question body"<<endl;
bb4bdbaf 1215 t_fdm->removeReadFD(fd);
09e6702a
BH
1216 return;
1217 }
b841314c 1218 conn->bytesread+=(uint16_t)bytes;
667f7e60 1219 if(conn->bytesread==conn->qlen) {
bb4bdbaf 1220 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
879b3f70 1221
09e6702a
BH
1222 DNSComboWriter* dc=0;
1223 try {
cd989c87 1224 dc=new DNSComboWriter(conn->data, conn->qlen, g_now);
09e6702a
BH
1225 }
1226 catch(MOADNSException &mde) {
3ddb9247 1227 g_stats.clientParseError++;
4957a608 1228 if(g_logCommonErrors)
cd989c87 1229 L<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toString() <<endl;
4957a608 1230 return;
09e6702a 1231 }
cd989c87
BH
1232 dc->d_tcpConnection = conn; // carry the torch
1233 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
09e6702a 1234 dc->d_tcp=true;
cd989c87 1235 dc->setRemote(&conn->d_remote);
a6147cd2 1236 ComboAddress dest;
1237 memset(&dest, 0, sizeof(dest));
1238 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1239 socklen_t len = dest.getSocklen();
1240 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1241 dc->setLocal(dest);
aa7929a3
RG
1242#ifdef HAVE_PROTOBUF
1243 dc->d_uuid = (*t_uuidGenerator)();
1244#endif
879b3f70 1245 if(dc->d_mdp.d_header.qr) {
4957a608 1246 delete dc;
048f5db6 1247 g_stats.ignoredCount++;
4328f463 1248 L<<Logger::Error<<"Ignoring answer from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
4957a608 1249 return;
879b3f70 1250 }
3abcdab2
PD
1251 if(dc->d_mdp.d_header.opcode) {
1252 delete dc;
048f5db6 1253 g_stats.ignoredCount++;
4328f463 1254 L<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
3abcdab2
PD
1255 return;
1256 }
09e6702a 1257 else {
4957a608
BH
1258 ++g_stats.qcounter;
1259 ++g_stats.tcpqcounter;
50a5ef72 1260 MT->makeThread(startDoResolve, dc); // deletes dc, will set state to BYTE0 again
4957a608 1261 return;
09e6702a
BH
1262 }
1263 }
1264 }
1265}
1266
6dcd28c3 1267//! Handle new incoming TCP connection
d8f6d49f 1268void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
09e6702a 1269{
37d3f960 1270 ComboAddress addr;
09e6702a 1271 socklen_t addrlen=sizeof(addr);
a683e8bd 1272 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
b841314c 1273 if(newsock>=0) {
85c32340
BH
1274 if(MT->numProcesses() > g_maxMThreads) {
1275 g_stats.overCapacityDrops++;
3897b9e1 1276 closesocket(newsock);
85c32340
BH
1277 return;
1278 }
1279
92011b8f 1280 if(t_remotes)
1281 t_remotes->push_back(addr);
49a699c4 1282 if(t_allowFrom && !t_allowFrom->match(&addr)) {
3ddb9247 1283 if(!g_quiet)
4957a608 1284 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2914b022 1285
09e6702a 1286 g_stats.unauthorizedTCP++;
3897b9e1 1287 closesocket(newsock);
09e6702a
BH
1288 return;
1289 }
bd0289fc 1290 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
09e6702a 1291 g_stats.tcpClientOverflow++;
3897b9e1 1292 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
09e6702a
BH
1293 return;
1294 }
3ddb9247 1295
3897b9e1 1296 setNonBlocking(newsock);
cd989c87
BH
1297 shared_ptr<TCPConnection> tc(new TCPConnection(newsock, addr));
1298 tc->state=TCPConnection::BYTE0;
3ddb9247 1299
cd989c87 1300 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc);
c038218b 1301
0bff046b 1302 struct timeval now;
c038218b 1303 Utility::gettimeofday(&now, 0);
cd989c87 1304 t_fdm->setReadTTD(tc->getFD(), now, g_tcpTimeout);
09e6702a
BH
1305 }
1306}
3ddb9247 1307
55a1378f 1308void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, Netmask* ednssubnet)
1309{
1310 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
1311 size_t questionLen = question.length();
1312 unsigned int consumed=0;
1313 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, 0, &consumed);
1314
1315 size_t pos= sizeof(dnsheader)+consumed+4;
1316 /* at least OPT root label (1), type (2), class (2) and ttl (4) + OPT RR rdlen (2)
1317 = 11 */
1318 if(ntohs(dh->arcount) == 1 && questionLen > pos + 11) { // this code can extract one (1) EDNS Subnet option
1319 /* OPT root label (1) followed by type (2) */
1320 if(question.at(pos)==0 && question.at(pos+1)==0 && question.at(pos+2)==QType::OPT) {
1321 char* ecsStart = nullptr;
1322 size_t ecsLen = 0;
1323 int res = getEDNSOption((char*)question.c_str()+pos+9, questionLen - pos - 9, EDNSOptionCode::ECS, &ecsStart, &ecsLen);
1324 if (res == 0 && ecsLen > 4) {
1325 EDNSSubnetOpts eso;
1326 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1327 *ednssubnet=eso.source;
1328 }
1329 }
1330 }
1331 }
1332}
1333
b71b60ee 1334string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
1bc3c142 1335{
183eb877 1336 gettimeofday(&g_now, 0);
b71b60ee 1337 struct timeval diff = g_now - tv;
1338 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
183eb877 1339
22cf1fda 1340 if(tv.tv_sec && delta > 1000.0) {
b71b60ee 1341 g_stats.tooOldDrops++;
1342 return 0;
1343 }
1344
1bc3c142 1345 ++g_stats.qcounter;
d7f10541
BH
1346 if(fromaddr.sin4.sin_family==AF_INET6)
1347 g_stats.ipv6qcounter++;
1bc3c142
BH
1348
1349 string response;
93f0da94 1350 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
49a3500d 1351 unsigned int ctag=0;
12aff2e5
RG
1352 bool needECS = false;
1353#ifdef HAVE_PROTOBUF
1354 needECS = true;
1355#endif
e824728a 1356 Netmask ednssubnet;
1bc3c142
BH
1357 try {
1358 uint32_t age;
8f7473d7 1359#ifdef MALLOC_TRACE
1360 /*
1361 static uint64_t last=0;
1362 if(!last)
1363 g_mtracer->clearAllocators();
1364 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
1365 last=g_mtracer->getAllocs();
1366 cout<<g_mtracer->topAllocatorsString()<<endl;
1367 g_mtracer->clearAllocators();
1368 */
1369#endif
55a1378f 1370
12aff2e5 1371 if(needECS || (t_pdl->get() && (*t_pdl)->d_gettag)) {
49a3500d 1372 uint16_t qtype=0;
b2eacd67 1373 try {
55a1378f 1374 DNSName qname;
55a1378f 1375 getQNameAndSubnet(question, &qname, &qtype, &ednssubnet);
12aff2e5
RG
1376
1377 if(t_pdl->get() && (*t_pdl)->d_gettag) {
1378 try {
1379 ctag=(*t_pdl)->gettag(fromaddr, ednssubnet, destaddr, qname, qtype);
1380 }
1381 catch(std::exception& e) {
1382 if(g_logCommonErrors)
1383 L<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
1384 }
8ea8c302 1385 }
b2eacd67 1386 }
1387 catch(std::exception& e)
1388 {
1389 if(g_logCommonErrors)
1390 L<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 1391 }
12ce523e 1392 }
3ddb9247 1393
49a3500d 1394 if(!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, g_now.tv_sec, &response, &age)) {
1395 if(!g_quiet)
1396 L<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<fromaddr.toString()<<endl;
8f7473d7 1397
1bc3c142
BH
1398 g_stats.packetCacheHits++;
1399 SyncRes::s_queries++;
1400 ageDNSPacket(response, age);
b71b60ee 1401 struct msghdr msgh;
1402 struct iovec iov;
1403 char cbuf[256];
1404 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
cbc03320 1405 if(g_fromtosockets.count(fd)) {
fbe2a2e0 1406 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
b71b60ee 1407 }
579cae19
PD
1408 else {
1409 msgh.msg_control=NULL;
1410 }
cbc03320 1411 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
1412 L<<Logger::Warning<<"Sending UDP reply to client "<<fromaddr.toStringWithPort()<<" failed with: "<<strerror(errno)<<endl;
b71b60ee 1413
97bee66d
BH
1414 if(response.length() >= sizeof(struct dnsheader)) {
1415 struct dnsheader dh;
1416 memcpy(&dh, response.c_str(), sizeof(dh));
92011b8f 1417 updateResponseStats(dh.rcode, fromaddr, response.length(), 0, 0);
97bee66d 1418 }
08f3f638 1419 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
1bc3c142
BH
1420 return 0;
1421 }
3ddb9247 1422 }
1bc3c142
BH
1423 catch(std::exception& e) {
1424 L<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1425 return 0;
1426 }
3ddb9247 1427
4ea94941 1428 if(t_pdl->get()) {
93f0da94 1429 if((*t_pdl)->ipfilter(fromaddr, destaddr, *dh)) {
4ea94941 1430 if(!g_quiet)
1431 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<" based on policy"<<endl;
1432 g_stats.policyDrops++;
1433 return 0;
1434 }
1435 }
1436
1bc3c142 1437 if(MT->numProcesses() > g_maxMThreads) {
461df9d2 1438 if(!g_quiet)
854d44e3 1439 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<", over capacity"<<endl;
461df9d2 1440
1bc3c142
BH
1441 g_stats.overCapacityDrops++;
1442 return 0;
1443 }
3ddb9247 1444
1bc3c142
BH
1445 DNSComboWriter* dc = new DNSComboWriter(question.c_str(), question.size(), g_now);
1446 dc->setSocket(fd);
49a3500d 1447 dc->d_tag=ctag;
1448 dc->d_query = question;
1bc3c142 1449 dc->setRemote(&fromaddr);
b71b60ee 1450 dc->setLocal(destaddr);
1bc3c142 1451 dc->d_tcp=false;
aa7929a3
RG
1452#ifdef HAVE_PROTOBUF
1453 dc->d_uuid = (*t_uuidGenerator)();
e824728a 1454 dc->ednssubnet = ednssubnet;
aa7929a3
RG
1455#endif
1456
1bc3c142
BH
1457 MT->makeThread(startDoResolve, (void*) dc); // deletes dc
1458 return 0;
3ddb9247
PD
1459}
1460
b71b60ee 1461
d8f6d49f 1462void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
5db529f8 1463{
a683e8bd 1464 ssize_t len;
5db529f8
BH
1465 char data[1500];
1466 ComboAddress fromaddr;
b71b60ee 1467 struct msghdr msgh;
1468 struct iovec iov;
1469 char cbuf[256];
1470
1471 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
1472 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), data, sizeof(data), &fromaddr);
1473
3ddb9247 1474 for(;;)
b71b60ee 1475 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
92011b8f 1476 if(t_remotes)
1477 t_remotes->push_back(fromaddr);
b23b8614 1478
49a699c4 1479 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
3ddb9247 1480 if(!g_quiet)
4957a608 1481 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2914b022 1482
5db529f8 1483 g_stats.unauthorizedUDP++;
a9af3782 1484 return;
5db529f8 1485 }
15c01deb 1486 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
81859ba5 1487 if(!fromaddr.sin4.sin_port) { // also works for IPv6
3ddb9247 1488 if(!g_quiet)
81859ba5 1489 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
1490
1491 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
1492 return;
1493 }
5db529f8 1494 try {
b23b8614 1495 dnsheader* dh=(dnsheader*)data;
3ddb9247 1496
b23b8614 1497 if(dh->qr) {
048f5db6 1498 g_stats.ignoredCount++;
4957a608
BH
1499 if(g_logCommonErrors)
1500 L<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
5db529f8 1501 }
3abcdab2 1502 else if(dh->opcode) {
048f5db6 1503 g_stats.ignoredCount++;
3abcdab2
PD
1504 if(g_logCommonErrors)
1505 L<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
1506 }
5db529f8 1507 else {
a683e8bd 1508 string question(data, (size_t)len);
b71b60ee 1509 struct timeval tv={0,0};
1510 HarvestTimestamp(&msgh, &tv);
1511 ComboAddress dest;
1512 memset(&dest, 0, sizeof(dest)); // this makes sure we igore this address if not returned by recvmsg above
a6147cd2 1513 auto loc = rplookup(g_listenSocketsAddresses, fd);
1514 if(HarvestDestinationAddress(&msgh, &dest)) {
1515 // but.. need to get port too
1516 if(loc)
1517 dest.sin4.sin_port = loc->sin4.sin_port;
1518 }
1519 else {
1520 if(loc) {
1521 dest = *loc;
1522 }
1523 else {
1524 dest.sin4.sin_family = fromaddr.sin4.sin_family;
a683e8bd
RG
1525 socklen_t slen = dest.getSocklen();
1526 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
a6147cd2 1527 }
1528 }
232f0877 1529 if(g_weDistributeQueries)
b71b60ee 1530 distributeAsyncFunction(question, boost::bind(doProcessUDPQuestion, question, fromaddr, dest, tv, fd));
232f0877 1531 else
b71b60ee 1532 doProcessUDPQuestion(question, fromaddr, dest, tv, fd);
5db529f8
BH
1533 }
1534 }
1535 catch(MOADNSException& mde) {
3ddb9247 1536 g_stats.clientParseError++;
84e66a59 1537 if(g_logCommonErrors)
4957a608 1538 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
5db529f8 1539 }
0b602819
KM
1540 catch(std::runtime_error& e) {
1541 g_stats.clientParseError++;
1542 if(g_logCommonErrors)
1543 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
5db529f8
BH
1544 }
1545 }
ac0e821b
BH
1546 else {
1547 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
3ddb9247 1548 if(errno == EAGAIN)
9326cae1 1549 g_stats.noPacketError++;
bf3b0cec 1550 break;
ac0e821b 1551 }
5db529f8
BH
1552}
1553
1bc3c142 1554
5db529f8
BH
1555typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
1556deferredAdd_t deferredAdd;
1557
f28307ad 1558void makeTCPServerSockets()
9c495589 1559{
37d3f960 1560 int fd;
f28307ad 1561 vector<string>locals;
2e3d8a19 1562 stringtok(locals,::arg()["local-address"]," ,");
9c495589 1563
f28307ad 1564 if(locals.empty())
3f81d239 1565 throw PDNSException("No local address specified");
3ddb9247 1566
f28307ad 1567 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
1568 ServiceTuple st;
1569 st.port=::arg().asNum("local-port");
1570 parseService(*i, st);
3ddb9247 1571
32252594
BH
1572 ComboAddress sin;
1573
f28307ad 1574 memset((char *)&sin,0, sizeof(sin));
37d3f960 1575 sin.sin4.sin_family = AF_INET;
32252594 1576 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 1577 sin.sin6.sin6_family = AF_INET6;
f71bc087 1578 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 1579 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
37d3f960
BH
1580 }
1581
1582 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
3ddb9247 1583 if(fd<0)
3f81d239 1584 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
f28307ad 1585
3897b9e1 1586 setCloseOnExec(fd);
a903b39c 1587
f28307ad 1588 int tmp=1;
37d3f960 1589 if(setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&tmp,sizeof tmp)<0) {
f28307ad 1590 L<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
c8ddb7c2 1591 exit(1);
f28307ad 1592 }
0dfa94ab 1593 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
1594 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
1595 }
1596
c8ddb7c2 1597#ifdef TCP_DEFER_ACCEPT
37d3f960
BH
1598 if(setsockopt(fd, SOL_TCP,TCP_DEFER_ACCEPT,(char*)&tmp,sizeof tmp) >= 0) {
1599 if(i==locals.begin())
4957a608 1600 L<<Logger::Error<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
c8ddb7c2
BH
1601 }
1602#endif
1603
fec7dd5a
SS
1604 if( ::arg().mustDo("non-local-bind") )
1605 Utility::setBindAny(AF_INET, fd);
1606
2332f42d 1607#ifdef SO_REUSEPORT
1608 if(::arg().mustDo("reuseport")) {
1609 int one=1;
1610 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
1611 throw PDNSException("SO_REUSEPORT: "+stringerror());
1612 }
1613#endif
1614
32252594 1615 sin.sin4.sin_port = htons(st.port);
a683e8bd 1616 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
3ddb9247 1617 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
3f81d239 1618 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
3ddb9247 1619
3897b9e1 1620 setNonBlocking(fd);
49a699c4 1621 setSocketSendBuffer(fd, 65000);
37d3f960 1622 listen(fd, 128);
5db529f8 1623 deferredAdd.push_back(make_pair(fd, handleNewTCPQuestion));
c2136bf0 1624 g_tcpListenSockets.push_back(fd);
84433b79 1625 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
1626 // - fd is not that which we know here, but returned from accept()
3ddb9247 1627 if(sin.sin4.sin_family == AF_INET)
32252594 1628 L<<Logger::Error<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 1629 else
32252594 1630 L<<Logger::Error<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 1631 }
9c495589
BH
1632}
1633
f28307ad 1634void makeUDPServerSockets()
288f4aa9 1635{
fec7dd5a 1636 int one=1;
f28307ad 1637 vector<string>locals;
2e3d8a19 1638 stringtok(locals,::arg()["local-address"]," ,");
288f4aa9 1639
f28307ad 1640 if(locals.empty())
3f81d239 1641 throw PDNSException("No local address specified");
3ddb9247 1642
f28307ad 1643 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
1644 ServiceTuple st;
1645 st.port=::arg().asNum("local-port");
1646 parseService(*i, st);
1647
37d3f960 1648 ComboAddress sin;
996c89cc 1649
37d3f960
BH
1650 memset(&sin, 0, sizeof(sin));
1651 sin.sin4.sin_family = AF_INET;
32252594 1652 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 1653 sin.sin6.sin6_family = AF_INET6;
f71bc087 1654 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 1655 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
37d3f960 1656 }
3ddb9247 1657
bb4bdbaf 1658 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
d3b4137e 1659 if(fd < 0) {
3f81d239 1660 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
d3b4137e 1661 }
915b0c39
AT
1662 if (!setSocketTimestamps(fd))
1663 L<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
0dfa94ab 1664
b71b60ee 1665 if(IsAnyAddress(sin)) {
cbc03320 1666 if(sin.sin4.sin_family == AF_INET)
1667 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
1668 g_fromtosockets.insert(fd);
757d3179 1669#ifdef IPV6_RECVPKTINFO
cbc03320 1670 if(sin.sin4.sin_family == AF_INET6)
1671 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
1672 g_fromtosockets.insert(fd);
757d3179 1673#endif
0dfa94ab 1674 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
1675 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
1676 }
b71b60ee 1677 }
fec7dd5a
SS
1678 if( ::arg().mustDo("non-local-bind") )
1679 Utility::setBindAny(AF_INET6, fd);
1680
3897b9e1 1681 setCloseOnExec(fd);
a903b39c 1682
4e9a20e6 1683 setSocketReceiveBuffer(fd, 250000);
32252594 1684 sin.sin4.sin_port = htons(st.port);
37d3f960 1685
2332f42d 1686
1687#ifdef SO_REUSEPORT
1688 if(::arg().mustDo("reuseport")) {
1689 int one=1;
1690 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
1691 throw PDNSException("SO_REUSEPORT: "+stringerror());
1692 }
1693#endif
a683e8bd 1694 socklen_t socklen=sin.getSocklen();
3ddb9247 1695 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
335da0ba 1696 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
3ddb9247 1697
3897b9e1 1698 setNonBlocking(fd);
c2136bf0 1699
0aaecd50 1700 deferredAdd.push_back(make_pair(fd, handleNewUDPQuestion));
40a3dd64 1701 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
3ddb9247 1702 if(sin.sin4.sin_family == AF_INET)
32252594 1703 L<<Logger::Error<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 1704 else
32252594 1705 L<<Logger::Error<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 1706 }
c836dc19 1707}
caa6eefa 1708
9c495589 1709
c836dc19
BH
1710void daemonize(void)
1711{
1712 if(fork())
1713 exit(0); // bye bye
3ddb9247
PD
1714
1715 setsid();
c836dc19 1716
27a5ead5 1717 int i=open("/dev/null",O_RDWR); /* open stdin */
3ddb9247 1718 if(i < 0)
27a5ead5
BH
1719 L<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
1720 else {
1721 dup2(i,0); /* stdin */
1722 dup2(i,1); /* stderr */
1723 dup2(i,2); /* stderr */
1724 close(i);
1725 }
288f4aa9 1726}
caa6eefa 1727
cc59bce6 1728AtomicCounter counter;
c75a6a9e
BH
1729bool statsWanted;
1730
1731void usr1Handler(int)
1732{
1733 statsWanted=true;
1734}
ae1b2e98 1735
9170fbaf
BH
1736void usr2Handler(int)
1737{
f1f34cc2 1738 g_quiet= !g_quiet;
1739 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
1740 ::arg().set("quiet")=g_quiet ? "" : "no";
9170fbaf
BH
1741}
1742
c75a6a9e
BH
1743void doStats(void)
1744{
16beeaa4
BH
1745 static time_t lastOutputTime;
1746 static uint64_t lastQueryCount;
d299d4f5 1747
1748 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
1749 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
3ddb9247 1750
d299d4f5 1751 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
bd301954 1752 L<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
3427fa8a
BH
1753 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
1754 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
3ddb9247
PD
1755 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
1756
bd301954 1757 L<<Logger::Notice<<"stats: throttle map: "
3427fa8a 1758 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
3ddb9247 1759 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
bd301954
JB
1760 L<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
1761 L<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
525b8a7c 1762 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
bd301954 1763 L<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
3427fa8a 1764 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
81883dcc 1765
bd301954 1766 //L<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
16beeaa4 1767 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
3ddb9247 1768
bd301954 1769 L<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
16beeaa4 1770 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
3ddb9247 1771
16beeaa4
BH
1772 time_t now = time(0);
1773 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
bd301954 1774 L<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
16beeaa4
BH
1775 }
1776 lastOutputTime = now;
1777 lastQueryCount = SyncRes::s_queries;
c75a6a9e 1778 }
3ddb9247 1779 else if(statsWanted)
bd301954 1780 L<<Logger::Notice<<"stats: no stats yet!"<<endl;
7becf07f 1781
c75a6a9e
BH
1782 statsWanted=false;
1783}
c836dc19 1784
29f0b1ce 1785static void houseKeeping(void *)
c836dc19 1786{
d67620e4 1787 static __thread time_t last_stat, last_rootupdate, last_prune, last_secpoll;
8baca3fa 1788 static __thread int cleanCounter=0;
cc59bce6 1789 static __thread bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
1790 try {
1791 if(s_running)
1792 return;
1793 s_running=true;
3ddb9247 1794
cc59bce6 1795 struct timeval now;
1796 Utility::gettimeofday(&now, 0);
3ddb9247
PD
1797
1798 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
cc59bce6 1799 DTime dt;
1800 dt.setTimeval(now);
1801 t_RC->doPrune(); // this function is local to a thread, so fine anyhow
f8f243b0 1802 t_packetCache->doPruneTo(::arg().asNum("max-packetcache-entries") / g_numWorkerThreads);
3ddb9247 1803
f8f243b0 1804 pruneCollection(t_sstorage->negcache, ::arg().asNum("max-cache-entries") / (g_numWorkerThreads * 10), 200);
3ddb9247 1805
cc59bce6 1806 if(!((cleanCounter++)%40)) { // this is a full scan!
1807 time_t limit=now.tv_sec-300;
1808 for(SyncRes::nsspeeds_t::iterator i = t_sstorage->nsSpeeds.begin() ; i!= t_sstorage->nsSpeeds.end(); )
1809 if(i->second.stale(limit))
1810 t_sstorage->nsSpeeds.erase(i++);
1811 else
1812 ++i;
1813 }
1814 last_prune=time(0);
d67620e4 1815 }
3ddb9247 1816
cc59bce6 1817 if(now.tv_sec - last_rootupdate > 7200) {
1818 SyncRes sr(now);
1819 sr.setDoEDNS0(true);
e325f20c 1820 vector<DNSRecord> ret;
3ddb9247 1821
cc59bce6 1822 sr.setNoCache();
1823 int res=-1;
18b73338 1824 try {
6ed9a611 1825 res=sr.beginResolve(DNSName("."), QType(QType::NS), 1, ret);
cc59bce6 1826 }
3aa91c3e 1827 catch(PDNSException& e)
1828 {
1829 L<<Logger::Error<<"Failed to update . records, got an exception: "<<e.reason<<endl;
1830 }
1831
1832 catch(std::exception& e)
1833 {
1834 L<<Logger::Error<<"Failed to update . records, got an exception: "<<e.what()<<endl;
1835 }
1836
cc59bce6 1837 catch(...)
1838 {
1839 L<<Logger::Error<<"Failed to update . records, got an exception"<<endl;
1840 }
1841 if(!res) {
1842 L<<Logger::Notice<<"Refreshed . records"<<endl;
1843 last_rootupdate=now.tv_sec;
1844 }
1845 else
1846 L<<Logger::Error<<"Failed to update . records, RCODE="<<res<<endl;
1847 }
3ddb9247 1848
cc59bce6 1849 if(!t_id) {
3ddb9247 1850 if(now.tv_sec - last_stat >= 1800) {
cc59bce6 1851 doStats();
1852 last_stat=time(0);
1853 }
3ddb9247 1854
cc59bce6 1855 if(now.tv_sec - last_secpoll >= 3600) {
1856 try {
1857 doSecPoll(&last_secpoll);
1858 }
1859 catch(...) {}
18b73338 1860 }
d67620e4 1861 }
cc59bce6 1862 s_running=false;
d67620e4 1863 }
cc59bce6 1864 catch(PDNSException& ae)
1865 {
1866 s_running=false;
1867 L<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
1868 throw;
1869 }
779828c4 1870}
d6d5dea7 1871
49a699c4
BH
1872void makeThreadPipes()
1873{
c3828c03 1874 for(unsigned int n=0; n < g_numThreads; ++n) {
49a699c4
BH
1875 struct ThreadPipeSet tps;
1876 int fd[2];
1877 if(pipe(fd) < 0)
1878 unixDie("Creating pipe for inter-thread communications");
3ddb9247 1879
49a699c4
BH
1880 tps.readToThread = fd[0];
1881 tps.writeToThread = fd[1];
3ddb9247 1882
49a699c4
BH
1883 if(pipe(fd) < 0)
1884 unixDie("Creating pipe for inter-thread communications");
1885 tps.readFromThread = fd[0];
1886 tps.writeFromThread = fd[1];
3ddb9247 1887
49a699c4
BH
1888 g_pipes.push_back(tps);
1889 }
1890}
1891
00c9b8c1
BH
1892struct ThreadMSG
1893{
1894 pipefunc_t func;
1895 bool wantAnswer;
1896};
1897
49a699c4
BH
1898void broadcastFunction(const pipefunc_t& func, bool skipSelf)
1899{
49a699c4 1900 unsigned int n = 0;
1dc8f4d0 1901 for(ThreadPipeSet& tps : g_pipes)
49a699c4
BH
1902 {
1903 if(n++ == t_id) {
1904 if(!skipSelf)
1905 func(); // don't write to ourselves!
1906 continue;
1907 }
3ddb9247 1908
00c9b8c1
BH
1909 ThreadMSG* tmsg = new ThreadMSG();
1910 tmsg->func = func;
1911 tmsg->wantAnswer = true;
b841314c
RG
1912 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
1913 delete tmsg;
49a699c4 1914 unixDie("write to thread pipe returned wrong size or error");
b841314c 1915 }
3ddb9247 1916
49a699c4
BH
1917 string* resp;
1918 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
1919 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 1920
49a699c4
BH
1921 if(resp) {
1922// cerr <<"got response: " << *resp << endl;
1923 delete resp;
1924 }
1925 }
1926}
06ea9015 1927
2fafb640 1928static uint32_t g_disthashseed;
8171ab83 1929void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
00c9b8c1 1930{
8171ab83 1931 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
06ea9015 1932 unsigned int target = 1 + (hash % (g_pipes.size()-1));
1933
00c9b8c1
BH
1934 if(target == t_id) {
1935 func();
1936 return;
1937 }
3ddb9247 1938 ThreadPipeSet& tps = g_pipes[target];
00c9b8c1
BH
1939 ThreadMSG* tmsg = new ThreadMSG();
1940 tmsg->func = func;
1941 tmsg->wantAnswer = false;
3ddb9247 1942
b841314c
RG
1943 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
1944 delete tmsg;
3ddb9247 1945 unixDie("write to thread pipe returned wrong size or error");
b841314c 1946 }
00c9b8c1 1947}
3427fa8a 1948
49a699c4
BH
1949void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
1950{
00c9b8c1 1951 ThreadMSG* tmsg;
3ddb9247
PD
1952
1953 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread
49a699c4
BH
1954 unixDie("read from thread pipe returned wrong size or error");
1955 }
3ddb9247 1956
2f22827a 1957 void *resp=0;
1958 try {
1959 resp = tmsg->func();
1960 }
1961 catch(std::exception& e) {
6d2010a8 1962 if(g_logCommonErrors)
1963 L<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2f22827a 1964 }
1965 catch(PDNSException& e) {
6d2010a8 1966 if(g_logCommonErrors)
1967 L<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2f22827a 1968 }
00c9b8c1
BH
1969 if(tmsg->wantAnswer)
1970 if(write(g_pipes[t_id].writeFromThread, &resp, sizeof(resp)) != sizeof(resp))
1971 unixDie("write to thread pipe returned wrong size or error");
3ddb9247 1972
00c9b8c1 1973 delete tmsg;
49a699c4 1974}
09e6702a 1975
13034931
BH
1976template<class T> void *voider(const boost::function<T*()>& func)
1977{
1978 return func();
1979}
1980
b3b5459d
BH
1981vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
1982{
1983 a.insert(a.end(), b.begin(), b.end());
1984 return a;
1985}
1986
92011b8f 1987vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
1988{
1989 a.insert(a.end(), b.begin(), b.end());
1990 return a;
1991}
1992
3ddb9247
PD
1993vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
1994{
1995 a.insert(a.end(), b.begin(), b.end());
1996 return a;
1997}
1998
92011b8f 1999
13034931 2000template<class T> T broadcastAccFunction(const boost::function<T*()>& func, bool skipSelf)
3427fa8a
BH
2001{
2002 unsigned int n = 0;
2003 T ret=T();
1dc8f4d0 2004 for(ThreadPipeSet& tps : g_pipes)
3427fa8a
BH
2005 {
2006 if(n++ == t_id) {
2007 if(!skipSelf) {
2008 T* resp = (T*)func(); // don't write to ourselves!
2009 if(resp) {
2010 //~ cerr <<"got direct: " << *resp << endl;
2011 ret += *resp;
2012 delete resp;
2013 }
2014 }
2015 continue;
2016 }
3ddb9247 2017
00c9b8c1
BH
2018 ThreadMSG* tmsg = new ThreadMSG();
2019 tmsg->func = boost::bind(voider<T>, func);
2020 tmsg->wantAnswer = true;
3ddb9247 2021
b841314c
RG
2022 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2023 delete tmsg;
3427fa8a 2024 unixDie("write to thread pipe returned wrong size or error");
b841314c 2025 }
3ddb9247 2026
3427fa8a
BH
2027 T* resp;
2028 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2029 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 2030
3427fa8a
BH
2031 if(resp) {
2032 //~ cerr <<"got response: " << *resp << endl;
2033 ret += *resp;
2034 delete resp;
2035 }
2036 }
2037 return ret;
2038}
2039
13034931
BH
2040template string broadcastAccFunction(const boost::function<string*()>& fun, bool skipSelf); // explicit instantiation
2041template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun, bool skipSelf); // explicit instantiation
b3b5459d 2042template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun, bool skipSelf); // explicit instantiation
3ddb9247 2043template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun, bool skipSelf); // explicit instantiation
3427fa8a 2044
d8f6d49f 2045void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
09e6702a
BH
2046{
2047 string remote;
2048 string msg=s_rcc.recv(&remote);
2049 RecursorControlParser rcp;
2050 RecursorControlParser::func_t* command;
3ddb9247 2051
09e6702a 2052 string answer=rcp.getAnswer(msg, &command);
f0f3f0b0
PL
2053
2054 // If we are inside a chroot, we need to strip
2055 if (!arg()["chroot"].empty()) {
a683e8bd 2056 size_t len = arg()["chroot"].length();
f0f3f0b0
PL
2057 remote = remote.substr(len);
2058 }
2059
ab5c053d
BH
2060 try {
2061 s_rcc.send(answer, &remote);
2062 command();
2063 }
fdbf35ac 2064 catch(std::exception& e) {
ab5c053d
BH
2065 L<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
2066 }
3f81d239 2067 catch(PDNSException& ae) {
ab5c053d
BH
2068 L<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
2069 }
09e6702a
BH
2070}
2071
d8f6d49f 2072void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2073{
0b18b22e 2074 PacketID* pident=any_cast<PacketID>(&var);
667f7e60 2075 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
09e6702a 2076
667f7e60 2077 shared_array<char> buffer(new char[pident->inNeeded]);
09e6702a 2078
a683e8bd 2079 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
09e6702a 2080 if(ret > 0) {
667f7e60 2081 pident->inMSG.append(&buffer[0], &buffer[ret]);
a683e8bd 2082 pident->inNeeded-=(size_t)ret;
825fa717 2083 if(!pident->inNeeded || pident->inIncompleteOkay) {
667f7e60
BH
2084 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
2085 PacketID pid=*pident;
2086 string msg=pident->inMSG;
3ddb9247 2087
bb4bdbaf 2088 t_fdm->removeReadFD(fd);
3ddb9247 2089 MT->sendEvent(pid, &msg);
09e6702a
BH
2090 }
2091 else {
667f7e60 2092 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
09e6702a
BH
2093 }
2094 }
2095 else {
667f7e60 2096 PacketID tmp=*pident;
bb4bdbaf 2097 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
09e6702a
BH
2098 string empty;
2099 MT->sendEvent(tmp, &empty); // this conveys error status
2100 }
2101}
2102
d8f6d49f 2103void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2104{
0b18b22e 2105 PacketID* pid=any_cast<PacketID>(&var);
a683e8bd 2106 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
09e6702a 2107 if(ret > 0) {
a683e8bd 2108 pid->outPos+=(ssize_t)ret;
667f7e60
BH
2109 if(pid->outPos==pid->outMSG.size()) {
2110 PacketID tmp=*pid;
bb4bdbaf 2111 t_fdm->removeWriteFD(fd);
09e6702a
BH
2112 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
2113 }
2114 }
2115 else { // error or EOF
667f7e60 2116 PacketID tmp(*pid);
bb4bdbaf 2117 t_fdm->removeWriteFD(fd);
09e6702a 2118 string sent;
998a4334 2119 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
09e6702a
BH
2120 }
2121}
2122
34801ab1
BH
2123// resend event to everybody chained onto it
2124void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
2125{
2126 if(iter->key.chain.empty())
2127 return;
e27e91a8 2128 // cerr<<"doResends called!\n";
34801ab1
BH
2129 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
2130 resend.fd=-1;
2131 resend.id=*i;
e27e91a8 2132 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
4665c31e 2133
34801ab1
BH
2134 MT->sendEvent(resend, &content);
2135 g_stats.chainResends++;
34801ab1
BH
2136 }
2137}
2138
d8f6d49f 2139void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2140{
600fc20b 2141 PacketID pid=any_cast<PacketID>(var);
a683e8bd 2142 ssize_t len;
e45beeda 2143 char data[g_outgoingEDNSBufsize];
996c89cc 2144 ComboAddress fromaddr;
09e6702a
BH
2145 socklen_t addrlen=sizeof(fromaddr);
2146
998a4334 2147 len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen);
c1da7976 2148
a683e8bd 2149 if(len < (ssize_t) sizeof(dnsheader)) {
998a4334 2150 if(len < 0)
996c89cc 2151 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
09e6702a 2152 else {
3ddb9247 2153 g_stats.serverParseError++;
09e6702a 2154 if(g_logCommonErrors)
85db02c5 2155 L<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
e44d9fa7 2156 ": packet smaller than DNS header"<<endl;
998a4334 2157 }
34801ab1 2158
49a699c4 2159 t_udpclientsocks->returnSocket(fd);
34801ab1
BH
2160 string empty;
2161
2162 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3ddb9247 2163 if(iter != MT->d_waiters.end())
34801ab1 2164 doResends(iter, pid, empty);
3ddb9247 2165
34801ab1 2166 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
998a4334 2167 return;
3ddb9247 2168 }
998a4334
BH
2169
2170 dnsheader dh;
2171 memcpy(&dh, data, sizeof(dh));
3ddb9247 2172
6da3b3ad
PD
2173 PacketID pident;
2174 pident.remote=fromaddr;
2175 pident.id=dh.id;
2176 pident.fd=fd;
34801ab1 2177
33a928af 2178 if(!dh.qr && g_logCommonErrors) {
854d44e3 2179 L<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
6da3b3ad
PD
2180 }
2181
2182 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
2183 !dh.qr) { // one weird server
2184 pident.domain.clear();
2185 pident.type = 0;
2186 }
2187 else {
2188 try {
0b31e67e 2189 if(len > 12)
2190 pident.domain=DNSName(data, len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
6da3b3ad
PD
2191 }
2192 catch(std::exception& e) {
2193 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
0b31e67e 2194 L<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
6da3b3ad 2195 return;
34801ab1 2196 }
6da3b3ad
PD
2197 }
2198 string packet;
2199 packet.assign(data, len);
34801ab1 2200
6da3b3ad
PD
2201 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
2202 if(iter != MT->d_waiters.end()) {
2203 doResends(iter, pident, packet);
2204 }
c1da7976 2205
6da3b3ad 2206retryWithName:
4957a608 2207
6da3b3ad
PD
2208 if(!MT->sendEvent(pident, &packet)) {
2209 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
2210 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
2211 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
e325f20c 2212 pident.domain == mthread->key.domain) {
6da3b3ad 2213 mthread->key.nearMisses++;
998a4334 2214 }
6da3b3ad
PD
2215
2216 // be a bit paranoid here since we're weakening our matching
3ddb9247 2217 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
6da3b3ad
PD
2218 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
2219 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
2220 pident.domain = mthread->key.domain;
2221 pident.type = mthread->key.type;
2222 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
d4fb76e9 2223 }
09e6702a 2224 }
6da3b3ad
PD
2225 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
2226 if(g_logCommonErrors) {
8a464ee3 2227 L<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
d8f6d49f 2228 }
09e6702a 2229 }
6da3b3ad
PD
2230 else if(fd >= 0) {
2231 t_udpclientsocks->returnSocket(fd);
2232 }
09e6702a
BH
2233}
2234
1f4abb20
BH
2235FDMultiplexer* getMultiplexer()
2236{
2237 FDMultiplexer* ret;
2238 for(FDMultiplexer::FDMultiplexermap_t::const_iterator i = FDMultiplexer::getMultiplexerMap().begin();
2239 i != FDMultiplexer::getMultiplexerMap().end(); ++i) {
2240 try {
2241 ret=i->second();
1f4abb20
BH
2242 return ret;
2243 }
98d0ee4a 2244 catch(FDMultiplexerException &fe) {
0a7f24cb 2245 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
98d0ee4a
BH
2246 }
2247 catch(...) {
2248 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
2249 }
1f4abb20
BH
2250 }
2251 L<<Logger::Error<<"No working multiplexer found!"<<endl;
2252 exit(1);
2253}
2254
3ddb9247 2255
0f39c1a3 2256string* doReloadLuaScript()
4485aa35 2257{
674cf0f6 2258 string fname= ::arg()["lua-dns-script"];
4485aa35 2259 try {
674cf0f6
BH
2260 if(fname.empty()) {
2261 t_pdl->reset();
2262 L<<Logger::Error<<t_id<<" Unloaded current lua script"<<endl;
0f39c1a3 2263 return new string("unloaded\n");
4485aa35
BH
2264 }
2265 else {
a3e7b735 2266 *t_pdl = shared_ptr<RecursorLua4>(new RecursorLua4(fname));
4485aa35
BH
2267 }
2268 }
fdbf35ac 2269 catch(std::exception& e) {
674cf0f6 2270 L<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
0f39c1a3 2271 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
4485aa35 2272 }
3ddb9247 2273
674cf0f6 2274 L<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
0f39c1a3 2275 return new string("(re)loaded '"+fname+"'\n");
4485aa35
BH
2276}
2277
49a699c4
BH
2278string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2279{
3ddb9247 2280 if(begin != end)
49a699c4 2281 ::arg().set("lua-dns-script") = *begin;
3ddb9247 2282
0f39c1a3 2283 return broadcastAccFunction<string>(doReloadLuaScript);
3ddb9247 2284}
49a699c4 2285
77499b05
BH
2286string* pleaseUseNewTraceRegex(const std::string& newRegex)
2287try
2288{
2289 if(newRegex.empty()) {
2290 t_traceRegex->reset();
2291 return new string("unset\n");
2292 }
2293 else {
2294 (*t_traceRegex) = shared_ptr<Regex>(new Regex(newRegex));
2295 return new string("ok\n");
2296 }
2297}
3f81d239 2298catch(PDNSException& ae)
77499b05
BH
2299{
2300 return new string(ae.reason+"\n");
2301}
2302
2303string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2304{
2305 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
2306}
2307
4e9a20e6 2308static void checkLinuxIPv6Limits()
2309{
2310#ifdef __linux__
2311 string line;
2312 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
335da0ba 2313 int lim=std::stoi(line);
4e9a20e6 2314 if(lim < 16384) {
36849ff2 2315 L<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
4e9a20e6 2316 }
2317 }
2318#endif
2319}
36849ff2 2320static void checkOrFixFDS()
4e9a20e6 2321{
c0063e60 2322 unsigned int availFDs=getFilenumLimit();
2323 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
2324
2325 if(wantFDs > availFDs) {
067ad20e 2326 unsigned int hardlimit= getFilenumLimit(true);
2327 if(hardlimit >= wantFDs) {
c0063e60 2328 setFilenumLimit(wantFDs);
2329 L<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
36849ff2 2330 }
2331 else {
067ad20e 2332 int newval = (hardlimit - 25) / g_numWorkerThreads;
2333 L<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
36849ff2 2334 g_maxMThreads = newval;
067ad20e 2335 setFilenumLimit(hardlimit);
36849ff2 2336 }
2337 }
4e9a20e6 2338}
77499b05 2339
bb4bdbaf 2340void* recursorThread(void*);
51e2144e 2341
3427fa8a 2342void* pleaseSupplantACLs(NetmaskGroup *ng)
49a699c4
BH
2343{
2344 t_allowFrom = ng;
3427fa8a 2345 return 0;
49a699c4
BH
2346}
2347
dbd23fc2
BH
2348int g_argc;
2349char** g_argv;
2350
18af64a8 2351void parseACLs()
f7c1d4e3 2352{
18af64a8 2353 static bool l_initialized;
3ddb9247 2354
49a699c4 2355 if(l_initialized) { // only reload configuration file on second call
18af64a8
BH
2356 string configname=::arg()["config-dir"]+"/recursor.conf";
2357 cleanSlashes(configname);
3ddb9247
PD
2358
2359 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
7e818521 2360 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
49a699c4 2361 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
242b90e1 2362 ::arg().preParseFile(configname.c_str(), "include-dir");
829849d6
AT
2363 ::arg().preParse(g_argc, g_argv, "include-dir");
2364
2365 // then process includes
2366 std::vector<std::string> extraConfigs;
242b90e1
AT
2367 ::arg().gatherIncludes(extraConfigs);
2368
1dc8f4d0 2369 for(const std::string& fn : extraConfigs) {
7e818521 2370 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
2371 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
2372 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
2373 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
829849d6 2374 }
ca2c884c
AT
2375
2376 ::arg().preParse(g_argc, g_argv, "allow-from-file");
2377 ::arg().preParse(g_argc, g_argv, "allow-from");
f27e6356 2378 }
49a699c4
BH
2379
2380 NetmaskGroup* oldAllowFrom = t_allowFrom, *allowFrom=new NetmaskGroup;
3ddb9247 2381
2c95fc65
BH
2382 if(!::arg()["allow-from-file"].empty()) {
2383 string line;
2c95fc65
BH
2384 ifstream ifs(::arg()["allow-from-file"].c_str());
2385 if(!ifs) {
3ddb9247 2386 delete allowFrom;
9c61b9d0 2387 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2c95fc65
BH
2388 }
2389
2390 string::size_type pos;
2391 while(getline(ifs,line)) {
2392 pos=line.find('#');
2393 if(pos!=string::npos)
2394 line.resize(pos);
2395 trim(line);
2396 if(line.empty())
2397 continue;
2398
18af64a8 2399 allowFrom->addMask(line);
2c95fc65 2400 }
49a699c4 2401 L<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2c95fc65
BH
2402 }
2403 else if(!::arg()["allow-from"].empty()) {
f7c1d4e3
BH
2404 vector<string> ips;
2405 stringtok(ips, ::arg()["allow-from"], ", ");
3ddb9247 2406
f7c1d4e3
BH
2407 L<<Logger::Warning<<"Only allowing queries from: ";
2408 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
18af64a8 2409 allowFrom->addMask(*i);
f7c1d4e3 2410 if(i!=ips.begin())
674cf0f6 2411 L<<Logger::Warning<<", ";
f7c1d4e3
BH
2412 L<<Logger::Warning<<*i;
2413 }
2414 L<<Logger::Warning<<endl;
2415 }
49a699c4 2416 else {
3ddb9247 2417 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
49a699c4
BH
2418 L<<Logger::Error<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
2419 delete allowFrom;
2420 allowFrom = 0;
2421 }
3ddb9247 2422
49a699c4 2423 g_initialAllowFrom = allowFrom;
d7dae798 2424 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
49a699c4 2425 delete oldAllowFrom;
3ddb9247 2426
49a699c4 2427 l_initialized = true;
18af64a8
BH
2428}
2429
795215f2 2430boost::optional<Netmask> getEDNSSubnetMask(const ComboAddress& local, const DNSName&dn, const ComboAddress& rem)
2431{
2432 if(local.sin4.sin_family != AF_INET || local.sin4.sin_addr.s_addr) { // detect unset 'requestor'
2433 if(g_ednsdomains.check(dn) || g_ednssubnets.match(rem)) {
1353273b 2434 int bits =local.sin4.sin_family == AF_INET ? 24 : 56;
795215f2 2435 ComboAddress trunc(local);
2436 trunc.truncate(bits);
2437 return boost::optional<Netmask>(Netmask(trunc, bits));
2438 }
2439 }
2440 return boost::optional<Netmask>();
2441}
2442
2443void parseEDNSSubnetWhitelist(const std::string& wlist)
2444{
2445 vector<string> parts;
39588f55 2446 stringtok(parts, wlist, ",; ");
795215f2 2447 for(const auto& a : parts) {
2448 try {
2449 Netmask nm(a);
2450 g_ednssubnets.addMask(nm);
2451 }
2452 catch(...) {
2453 g_ednsdomains.add(DNSName(a));
2454 }
2455 }
2456}
2457
756e82cf 2458SuffixMatchNode g_delegationOnly;
2459static void setupDelegationOnly()
2460{
2461 vector<string> parts;
2462 stringtok(parts, ::arg()["delegation-only"], ", \t");
2463 for(const auto& p : parts) {
2464 g_delegationOnly.add(DNSName(p));
2465 }
2466}
795215f2 2467
18af64a8
BH
2468int serviceMain(int argc, char*argv[])
2469{
5124de27 2470 L.setName(s_programname);
18af64a8 2471 L.setLoglevel((Logger::Urgency)(6)); // info and up
b6cfa948 2472 L.disableSyslog(::arg().mustDo("disable-syslog"));
18af64a8
BH
2473
2474 if(!::arg()["logging-facility"].empty()) {
f8499e52
BH
2475 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
2476 if(val >= 0)
2477 theL().setFacility(val);
18af64a8
BH
2478 else
2479 L<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
2480 }
2481
ba1a571d 2482 showProductVersion();
18af64a8 2483 seedRandom(::arg()["entropy-source"]);
06ea9015 2484 g_disthashseed=dns_random(0xffffffff);
2485
ad42489c 2486 loadRecursorLuaConfig(::arg()["lua-config-file"]);
2487
18af64a8 2488 parseACLs();
92011b8f 2489 sortPublicSuffixList();
2490
eb5bae86
BH
2491 if(!::arg()["dont-query"].empty()) {
2492 g_dontQuery=new NetmaskGroup;
2493 vector<string> ips;
2494 stringtok(ips, ::arg()["dont-query"], ", ");
66e0b6ea
BH
2495 ips.push_back("0.0.0.0");
2496 ips.push_back("::");
c36bc97a 2497
eb5bae86
BH
2498 L<<Logger::Warning<<"Will not send queries to: ";
2499 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
2500 g_dontQuery->addMask(*i);
2501 if(i!=ips.begin())
4957a608 2502 L<<Logger::Warning<<", ";
eb5bae86
BH
2503 L<<Logger::Warning<<*i;
2504 }
2505 L<<Logger::Warning<<endl;
2506 }
2507
f7c1d4e3 2508 g_quiet=::arg().mustDo("quiet");
3ddb9247 2509
1bc3c142
BH
2510 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
2511 if(g_weDistributeQueries) {
2512 L<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
2513 }
3ddb9247 2514
756e82cf 2515 setupDelegationOnly();
b33c2462 2516 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
756e82cf 2517
12ce523e 2518 if(::arg()["dnssec"]=="off")
2519 g_dnssecmode=DNSSECMode::Off;
2520 else if(::arg()["dnssec"]=="process")
2521 g_dnssecmode=DNSSECMode::Process;
2522 else if(::arg()["dnssec"]=="validate")
2523 g_dnssecmode=DNSSECMode::ValidateAll;
2524 else if(::arg()["dnssec"]=="log-fail")
2525 g_dnssecmode=DNSSECMode::ValidateForLog;
2526 else {
2527 L<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
2528 exit(1);
2529 }
2530
77499b05
BH
2531 if(::arg()["trace"]=="fail") {
2532 SyncRes::setDefaultLogMode(SyncRes::Store);
2533 }
2534 else if(::arg().mustDo("trace")) {
2535 SyncRes::setDefaultLogMode(SyncRes::Log);
f7c1d4e3
BH
2536 ::arg().set("quiet")="no";
2537 g_quiet=false;
3e9c6c0a 2538 g_dnssecLOG=true;
f7c1d4e3 2539 }
3ddb9247 2540
aadceba8 2541 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
2542
4e9a20e6 2543 checkLinuxIPv6Limits();
5a38281c 2544 try {
3ddb9247 2545 vector<string> addrs;
5a38281c
BH
2546 if(!::arg()["query-local-address6"].empty()) {
2547 SyncRes::s_doIPv6=true;
d4fb76e9 2548 L<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
3ddb9247 2549
5a38281c 2550 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
1dc8f4d0 2551 for(const string& addr : addrs) {
4957a608 2552 g_localQueryAddresses6.push_back(ComboAddress(addr));
5a38281c
BH
2553 }
2554 }
d4fb76e9
BH
2555 else {
2556 L<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
2557 }
5a38281c
BH
2558 addrs.clear();
2559 stringtok(addrs, ::arg()["query-local-address"], ", ;");
1dc8f4d0 2560 for(const string& addr : addrs) {
5a38281c
BH
2561 g_localQueryAddresses4.push_back(ComboAddress(addr));
2562 }
2563 }
2564 catch(std::exception& e) {
2565 L<<Logger::Error<<"Assigning local query addresses: "<<e.what();
2566 exit(99);
f7c1d4e3 2567 }
f555e92e 2568
1051f8a9
BH
2569 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
2570
f7c1d4e3 2571 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
c3e753c7 2572 SyncRes::s_maxcachettl=::arg().asNum("max-cache-ttl");
1051f8a9 2573 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
79ec0627
PL
2574 // Cap the packetcache-servfail-ttl to the packetcache-ttl
2575 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
2576 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
628e2c7b
PA
2577 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
2578 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
f7c1d4e3 2579 SyncRes::s_serverID=::arg()["server-id"];
173d790e 2580 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
9de3e034 2581 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
01402d56 2582 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
f7c1d4e3
BH
2583 if(SyncRes::s_serverID.empty()) {
2584 char tmp[128];
2585 gethostname(tmp, sizeof(tmp)-1);
2586 SyncRes::s_serverID=tmp;
2587 }
3ddb9247 2588
5b0ddd18 2589 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
bb4bdbaf 2590
49a699c4 2591 g_initialDomainMap = parseAuthAndForwards();
3ddb9247 2592
08f3f638 2593 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3ddb9247 2594
f7c1d4e3 2595 g_logCommonErrors=::arg().mustDo("log-common-errors");
e661a20b
PD
2596
2597 g_anyToTcp = ::arg().mustDo("any-to-tcp");
a09a8ce0
PD
2598 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
2599
b3adda56
PD
2600 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
2601
f7c1d4e3
BH
2602 makeUDPServerSockets();
2603 makeTCPServerSockets();
815099b2 2604
376effcf 2605 parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
2606
677e2a46
BH
2607 int forks;
2608 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
1bc3c142
BH
2609 if(!fork()) // we are child
2610 break;
2611 }
3ddb9247 2612
f7c1d4e3
BH
2613 if(::arg().mustDo("daemon")) {
2614 L<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
2615 L.toConsole(Logger::Critical);
f7c1d4e3
BH
2616 daemonize();
2617 }
2618 signal(SIGUSR1,usr1Handler);
2619 signal(SIGUSR2,usr2Handler);
2620 signal(SIGPIPE,SIG_IGN);
a6414fdc 2621 g_numThreads = ::arg().asNum("threads") + ::arg().mustDo("pdns-distributes-queries");
c0063e60 2622 g_numWorkerThreads = ::arg().asNum("threads");
a6414fdc
AT
2623 g_maxMThreads = ::arg().asNum("max-mthreads");
2624 checkOrFixFDS();
3ddb9247 2625
138435cb
BH
2626 int newgid=0;
2627 if(!::arg()["setgid"].empty())
2628 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
2629 int newuid=0;
2630 if(!::arg()["setuid"].empty())
2631 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
2632
f1d6a7ce
KM
2633 Utility::dropGroupPrivs(newuid, newgid);
2634
138435cb
BH
2635 if (!::arg()["chroot"].empty()) {
2636 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
2637 L<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
2638 exit(1);
2639 }
f0f3f0b0
PL
2640 else
2641 L<<Logger::Error<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
138435cb
BH
2642 }
2643
f0f3f0b0
PL
2644 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
2645 if(!s_pidfname.empty())
2646 unlink(s_pidfname.c_str()); // remove possible old pid file
2647 writePid();
2648
2649 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
2650
f1d6a7ce 2651 Utility::dropUserPrivs(newuid);
c0063e60 2652
49a699c4 2653 makeThreadPipes();
3ddb9247 2654
5d4dd7fe
BH
2655 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
2656 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
343257a4 2657
c3828c03 2658 if(g_numThreads == 1) {
76698c6e 2659 L<<Logger::Warning<<"Operating unthreaded"<<endl;
76698c6e
BH
2660 recursorThread(0);
2661 }
2662 else {
2663 pthread_t tid;
c3828c03
BH
2664 L<<Logger::Warning<<"Launching "<< g_numThreads <<" threads"<<endl;
2665 for(unsigned int n=0; n < g_numThreads; ++n) {
77499b05 2666 pthread_create(&tid, 0, recursorThread, (void*)(long)n);
76698c6e
BH
2667 }
2668 void* res;
49a699c4 2669
3ddb9247 2670
76698c6e 2671 pthread_join(tid, &res);
bb4bdbaf 2672 }
bb4bdbaf
BH
2673 return 0;
2674}
2675
2676void* recursorThread(void* ptr)
2677try
2678{
2e2cd8ec 2679 t_id=(int) (long) ptr;
49a699c4 2680 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
ac0e821b 2681 t_sstorage->domainmap = g_initialDomainMap;
49a699c4
BH
2682 t_allowFrom = g_initialAllowFrom;
2683 t_udpclientsocks = new UDPClientSocks();
bd0289fc 2684 t_tcpClientCounts = new tcpClientCounts_t();
49a699c4 2685 primeHints();
3ddb9247 2686
49a699c4 2687 t_packetCache = new RecursorPacketCache();
3ddb9247 2688
aa7929a3
RG
2689#ifdef HAVE_PROTOBUF
2690 t_uuidGenerator = new boost::uuids::random_generator();
2691#endif
49a699c4 2692 L<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3ddb9247 2693
a3e7b735 2694 t_pdl = new shared_ptr<RecursorLua4>();
3ddb9247 2695
674cf0f6
BH
2696 try {
2697 if(!::arg()["lua-dns-script"].empty()) {
a3e7b735 2698 *t_pdl = shared_ptr<RecursorLua4>(new RecursorLua4(::arg()["lua-dns-script"]));
674cf0f6
BH
2699 L<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
2700 }
674cf0f6
BH
2701 }
2702 catch(std::exception &e) {
2703 L<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
62f0ae62 2704 _exit(99);
674cf0f6 2705 }
3ddb9247 2706
77499b05 2707 t_traceRegex = new shared_ptr<Regex>();
f8f243b0 2708 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
92011b8f 2709 if(ringsize) {
60c8afa8 2710 t_remotes = new addrringbuf_t();
f8f243b0 2711 if(g_weDistributeQueries) // if so, only 1 thread does recvfrom
3ddb9247 2712 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries"));
f8f243b0 2713 else
3ddb9247 2714 t_remotes->set_capacity(ringsize);
60c8afa8 2715 t_servfailremotes = new addrringbuf_t();
3ddb9247 2716 t_servfailremotes->set_capacity(ringsize);
60c8afa8 2717 t_largeanswerremotes = new addrringbuf_t();
3ddb9247 2718 t_largeanswerremotes->set_capacity(ringsize);
92011b8f 2719
c5c066bf 2720 t_queryring = new boost::circular_buffer<pair<DNSName, uint16_t> >();
3ddb9247 2721 t_queryring->set_capacity(ringsize);
c5c066bf 2722 t_servfailqueryring = new boost::circular_buffer<pair<DNSName, uint16_t> >();
3ddb9247 2723 t_servfailqueryring->set_capacity(ringsize);
92011b8f 2724 }
3ddb9247 2725
bb4bdbaf 2726 MT=new MTasker<PacketID,string>(::arg().asNum("stack-size"));
3ddb9247 2727
bb4bdbaf
BH
2728 PacketID pident;
2729
2730 t_fdm=getMultiplexer();
f3d1d67b 2731 if(!t_id) {
d07bf7ff 2732 if(::arg().mustDo("webserver")) {
30a1aa92 2733 L<<Logger::Warning << "Enabling web server" << endl;
8989097d 2734 try {
1ce57618 2735 new RecursorWebServer(t_fdm);
8989097d
CH
2736 }
2737 catch(PDNSException &e) {
2738 L<<Logger::Error<<"Exception: "<<e.reason<<endl;
2739 exit(99);
2740 }
f3d1d67b 2741 }
83252304 2742 L<<Logger::Error<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
f3d1d67b 2743 }
83252304 2744
49a699c4 2745 t_fdm->addReadFD(g_pipes[t_id].readToThread, handlePipeRequest);
83252304 2746
1bc3c142 2747 if(!g_weDistributeQueries || !t_id) // if we distribute queries, only t_id = 0 listens
3ddb9247 2748 for(deferredAdd_t::const_iterator i=deferredAdd.begin(); i!=deferredAdd.end(); ++i)
1bc3c142 2749 t_fdm->addReadFD(i->first, i->second);
3ddb9247 2750
674cf0f6 2751 if(!t_id) {
674cf0f6
BH
2752 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
2753 }
1bc3c142 2754
f7c1d4e3 2755 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3ddb9247 2756
f7c1d4e3 2757 bool listenOnTCP(true);
49a699c4 2758
2c78bd57 2759 time_t last_carbon=0;
2760 time_t carbonInterval=::arg().asNum("carbon-interval");
cc59bce6 2761 counter=AtomicCounter(0); // used to periodically execute certain tasks
f7c1d4e3 2762 for(;;) {
ac0e821b 2763 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3ddb9247 2764
3427fa8a
BH
2765 if(!(counter%500)) {
2766 MT->makeThread(houseKeeping, 0);
f7c1d4e3
BH
2767 }
2768
d2392145 2769 if(!(counter%55)) {
d8f6d49f 2770 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
bb4bdbaf 2771 expired_t expired=t_fdm->getTimeouts(g_now);
3ddb9247 2772
f7c1d4e3 2773 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
cd989c87 2774 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4957a608 2775 if(g_logCommonErrors)
cd989c87 2776 L<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toString() <<endl;
4957a608 2777 t_fdm->removeReadFD(i->first);
f7c1d4e3
BH
2778 }
2779 }
3ddb9247 2780
f7c1d4e3
BH
2781 counter++;
2782
3427fa8a 2783 if(!t_id && statsWanted) {
f7c1d4e3
BH
2784 doStats();
2785 }
2786
2787 Utility::gettimeofday(&g_now, 0);
2c78bd57 2788
2789 if(!t_id && (g_now.tv_sec - last_carbon >= carbonInterval)) {
2790 MT->makeThread(doCarbonDump, 0);
2791 last_carbon = g_now.tv_sec;
2792 }
2793
bb4bdbaf 2794 t_fdm->run(&g_now);
3ea54bf0 2795 // 'run' updates g_now for us
f7c1d4e3 2796
b8ef5c5c 2797 if(!g_weDistributeQueries || !t_id) { // if pdns distributes queries, only tid 0 should do this
5c889cf5 2798 if(listenOnTCP) {
2799 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
2800 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
2801 t_fdm->removeReadFD(*i);
2802 listenOnTCP=false;
2803 }
f7c1d4e3 2804 }
5c889cf5 2805 else {
2806 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
2807 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
2808 t_fdm->addReadFD(*i, handleNewTCPQuestion);
2809 listenOnTCP=true;
2810 }
f7c1d4e3
BH
2811 }
2812 }
2813 }
2814}
3f81d239 2815catch(PDNSException &ae) {
bb4bdbaf
BH
2816 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
2817 return 0;
2818}
2819catch(std::exception &e) {
2820 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
2821 return 0;
2822}
2823catch(...) {
2824 L<<Logger::Error<<"any other exception in main: "<<endl;
2825 return 0;
2826}
2827
51e2144e 2828
3ddb9247 2829int main(int argc, char **argv)
288f4aa9 2830{
dbd23fc2
BH
2831 g_argc = argc;
2832 g_argv = argv;
5e3de507 2833 g_stats.startupTime=time(0);
3e135495 2834 versionSetProduct(ProductRecursor);
8a63d3ce 2835 reportBasicTypes();
0007c2e5 2836 reportOtherTypes();
ea634573 2837
22030c37 2838 int ret = EXIT_SUCCESS;
caa6eefa 2839
288f4aa9 2840 try {
f888311c 2841 ::arg().set("stack-size","stack size per mthread")="200000";
2e3d8a19 2842 ::arg().set("soa-minimum-ttl","Don't change")="0";
2e3d8a19 2843 ::arg().set("no-shuffle","Don't change")="off";
2e3d8a19 2844 ::arg().set("local-port","port to listen on")="53";
32252594 2845 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
fec7dd5a 2846 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
77499b05 2847 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
23b91202 2848 ::arg().set("dnssec", "DNSSEC mode: off/process (default)/log-fail/validate")="process";
d3f809bf 2849 ::arg().set("daemon","Operate as a daemon")="no";
191f2e47 2850 ::arg().setSwitch("write-pid","Write a PID file")="yes";
34162f8f 2851 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="4";
b6cfa948 2852 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
0e9d9ce2 2853 ::arg().set("log-common-errors","If we should log rather common errors")="yes";
2e3d8a19
BH
2854 ::arg().set("chroot","switch to chroot jail")="";
2855 ::arg().set("setgid","If set, change group id to this gid for more security")="";
2856 ::arg().set("setuid","If set, change user id to this uid for more security")="";
5b0ddd18 2857 ::arg().set("network-timeout", "Wait this nummer of milliseconds for network i/o")="1500";
bb4bdbaf 2858 ::arg().set("threads", "Launch this number of threads")="2";
1bc3c142 2859 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1";
5124de27 2860 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
d07bf7ff 2861 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
479e0976
CH
2862 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
2863 ::arg().set("api-logfile", "Location of the server logfile (used by the REST API)") = "/var/log/pdns.log";
2864 ::arg().set("api-readonly", "Disallow data modification through the REST API when set") = "no";
2865 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
d07bf7ff
PL
2866 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
2867 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
2868 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
69e7f117 2869 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="0.0.0.0/0,::/0";
cc08b5a9 2870 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
2c78bd57 2871 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server")="";
2872 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
c038218b 2873 ::arg().set("quiet","Suppress logging of questions and answers")="";
f27e6356 2874 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
2e3d8a19 2875 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
fdbf35ac
BH
2876 ::arg().set("socket-owner","Owner of socket")="";
2877 ::arg().set("socket-group","Group of socket")="";
2878 ::arg().set("socket-mode", "Permissions for socket")="";
3ddb9247 2879
f0f3f0b0 2880 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
2e3d8a19
BH
2881 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
2882 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
d4fb76e9 2883 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
2e3d8a19 2884 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
85c32340 2885 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
2e3d8a19 2886 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
324dc148 2887 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
979edd70 2888 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
2e3d8a19 2889 ::arg().set("hint-file", "If set, load root hints from this file")="";
b45eb27c 2890 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
a9af3782 2891 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
c3e753c7 2892 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
1051f8a9 2893 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
927c12b0 2894 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
1051f8a9 2895 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
7f7b8d55 2896 ::arg().set("server-id", "Returned when queried for 'server.id' TXT or NSID, defaults to hostname")="";
92011b8f 2897 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
ba1a571d 2898 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
49a699c4 2899 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
2c95fc65 2900 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
51e2144e 2901 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3ddb9247 2902 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4e120339 2903 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
0d5f0a9f 2904 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4ef015cd 2905 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
5605c067 2906 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3e61e7f7 2907 ::arg().set("lua-config-file", "More powerful configuration options")="";
644dd1da 2908
5605c067 2909 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
927c12b0
BH
2910 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
2911 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
5605c067 2912 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
ac0b4eb3 2913 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3ea54bf0 2914 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
9bc8c14c 2915 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="";
4485aa35 2916 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
08f3f638 2917 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3ddb9247 2918 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
3f975863 2919 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
966d3ba8 2920 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="";
cd6310a8 2921 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="no";
e661a20b 2922 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
b3adda56 2923 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
a09a8ce0 2924 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1680";
b33c2462 2925 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1680";
aadceba8 2926 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
173d790e 2927 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
c5950146 2928 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
a09a8ce0 2929
68e6df3c 2930 ::arg().set("include-dir","Include *.conf files from this directory")="";
d67620e4 2931 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
2332f42d 2932
2933 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
2e3d8a19
BH
2934
2935 ::arg().setCmd("help","Provide a helpful message");
ba1a571d 2936 ::arg().setCmd("version","Print version string");
d5141417 2937 ::arg().setCmd("config","Output blank configuration");
f27e6356 2938 L.toConsole(Logger::Info);
2e3d8a19 2939 ::arg().laxParse(argc,argv); // do a lax parse
c75a6a9e 2940
2d733c0f
CH
2941 string configname=::arg()["config-dir"]+"/recursor.conf";
2942 if(::arg()["config-name"]!="") {
2943 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
5124de27 2944 s_programname+="-"+::arg()["config-name"];
2d733c0f
CH
2945 }
2946 cleanSlashes(configname);
5124de27 2947
577cf284
BH
2948 if(::arg().mustDo("config")) {
2949 cout<<::arg().configstring()<<endl;
2950 exit(0);
2951 }
2952
3ddb9247 2953 if(!::arg().file(configname.c_str()))
c75a6a9e
BH
2954 L<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
2955
2e3d8a19 2956 ::arg().parse(argc,argv);
c836dc19 2957
f0f3f0b0
PL
2958 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() && !::arg().mustDo("api-readonly") ) {
2959 L<<Logger::Error<<"Using chroot and a writable API is not possible"<<endl;
2960 exit(EXIT_FAILURE);
2961 }
2962
2963 if (::arg()["socket-dir"].empty()) {
2964 if (::arg()["chroot"].empty())
2965 ::arg().set("socket-dir") = LOCALSTATEDIR;
2966 else
2967 ::arg().set("socket-dir") = "/";
2968 }
2969
2e3d8a19 2970 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
562588a3 2971
61d74169 2972 if(::arg().asNum("threads")==1)
2973 ::arg().set("pdns-distributes-queries")="no";
2974
2e3d8a19 2975 if(::arg().mustDo("help")) {
ff5ba4f9
WA
2976 cout<<"syntax:"<<endl<<endl;
2977 cout<<::arg().helpstring(::arg()["help"])<<endl;
2978 exit(0);
b636533b 2979 }
5e3de507 2980 if(::arg().mustDo("version")) {
ba1a571d 2981 showProductVersion();
3613a51c 2982 showBuildConfiguration();
67076869 2983 exit(0);
5e3de507 2984 }
b636533b 2985
34162f8f 2986 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
f48d7b65 2987
34162f8f
CH
2988 if (logUrgency < Logger::Error)
2989 logUrgency = Logger::Error;
f48d7b65 2990 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
2991 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
2992 }
34162f8f
CH
2993 L.setLoglevel(logUrgency);
2994 L.toConsole(logUrgency);
2995
f7c1d4e3 2996 serviceMain(argc, argv);
288f4aa9 2997 }
3f81d239 2998 catch(PDNSException &ae) {
c836dc19 2999 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
22030c37 3000 ret=EXIT_FAILURE;
288f4aa9 3001 }
fdbf35ac 3002 catch(std::exception &e) {
c836dc19 3003 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
22030c37 3004 ret=EXIT_FAILURE;
288f4aa9
BH
3005 }
3006 catch(...) {
c836dc19 3007 L<<Logger::Error<<"any other exception in main: "<<endl;
22030c37 3008 ret=EXIT_FAILURE;
288f4aa9 3009 }
3ddb9247 3010
22030c37 3011 return ret;
288f4aa9 3012}