]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/pdns_recursor.cc
Merge pull request #5163 from rgacogne/dnsdist-cleanup-tcp-downstreams
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
CommitLineData
288f4aa9 1/*
6edbf68a
PL
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
870a0fe4
AT
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
3e61e7f7 25
76473b92
KM
26#include <netdb.h>
27#include <sys/stat.h>
28#include <unistd.h>
fa8fd4d2 29
2470b36e 30#include "ws-recursor.hh"
49a699c4 31#include <pthread.h>
3ea54bf0 32#include "recpacketcache.hh"
3ddb9247 33#include "utility.hh"
51e2144e 34#include "dns_random.hh"
d1b28475
KM
35#ifdef HAVE_LIBSODIUM
36#include <sodium.h>
37#endif
3afde9b2 38#include "opensslsigners.hh"
288f4aa9
BH
39#include <iostream>
40#include <errno.h>
81859ba5 41#include <boost/static_assert.hpp>
288f4aa9
BH
42#include <map>
43#include <set>
97bb160b 44#include "recursor_cache.hh"
38c9ceaa 45#include "cachecleaner.hh"
288f4aa9 46#include <stdio.h>
c75a6a9e 47#include <signal.h>
288f4aa9 48#include <stdlib.h>
bb4bdbaf 49#include "misc.hh"
288f4aa9
BH
50#include "mtasker.hh"
51#include <utility>
288f4aa9
BH
52#include "arguments.hh"
53#include "syncres.hh"
88def049
BH
54#include <fcntl.h>
55#include <fstream>
3e61e7f7 56#include "sortlist.hh"
57extern SortList g_sortlist;
5c633640
BH
58#include "sstuff.hh"
59#include <boost/tuple/tuple.hpp>
60#include <boost/tuple/tuple_comparison.hpp>
72df400f 61#include <boost/shared_array.hpp>
7f1fa77d 62#include <boost/function.hpp>
5605c067 63#include <boost/algorithm/string.hpp>
8f7473d7 64#ifdef MALLOC_TRACE
65#include "malloctrace.hh"
66#endif
40a3dd64 67#include <netinet/tcp.h>
ea634573
BH
68#include "dnsparser.hh"
69#include "dnswriter.hh"
70#include "dnsrecords.hh"
f814d7c8 71#include "zoneparser-tng.hh"
1d5b3ce6 72#include "rec_channel.hh"
aaacf7f2 73#include "logger.hh"
c8ddb7c2 74#include "iputils.hh"
09e6702a 75#include "mplexer.hh"
c038218b 76#include "config.h"
808c5ef7 77#include "lua-recursor4.hh"
ba1a571d 78#include "version.hh"
79332bff 79#include "responsestats.hh"
d67620e4 80#include "secpoll-recursor.hh"
c5c066bf 81#include "dnsname.hh"
644dd1da 82#include "filterpo.hh"
83#include "rpzloader.hh"
b3f0ed10 84#include "validate-recursor.hh"
f3c18728 85#include "rec-lua-conf.hh"
5c3b5e7f 86#include "ednsoptions.hh"
85c7ca75 87#include "gettime.hh"
f3c18728 88
d9d3f9c1 89#include "rec-protobuf.hh"
d705aad9 90#include "rec-snmp.hh"
aa7929a3 91
6b6720de
PL
92#ifdef HAVE_SYSTEMD
93#include <systemd/sd-daemon.h>
94#endif
95
d187038c
RG
96#include "namespaces.hh"
97
98typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
99
100static __thread shared_ptr<RecursorLua4>* t_pdl;
b4015453 101static __thread unsigned int t_id;
d187038c
RG
102static __thread shared_ptr<Regex>* t_traceRegex;
103static __thread tcpClientCounts_t* t_tcpClientCounts;
60c8afa8 104
d187038c
RG
105__thread MT_t* MT; // the big MTasker
106__thread MemRecursorCache* t_RC;
107__thread RecursorPacketCache* t_packetCache;
108__thread FDMultiplexer* t_fdm;
60c8afa8 109__thread addrringbuf_t* t_remotes, *t_servfailremotes, *t_largeanswerremotes;
c5c066bf 110__thread boost::circular_buffer<pair<DNSName, uint16_t> >* t_queryring, *t_servfailqueryring;
d187038c 111__thread NetmaskGroup* t_allowFrom;
aa7929a3
RG
112#ifdef HAVE_PROTOBUF
113__thread boost::uuids::random_generator* t_uuidGenerator;
114#endif
d187038c 115__thread struct timeval g_now; // timestamp, updated (too) frequently
d7dae798
BH
116
117// for communicating with our threads
49a699c4
BH
118struct ThreadPipeSet
119{
120 int writeToThread;
121 int readToThread;
122 int writeFromThread;
123 int readFromThread;
124};
810ff705 125
d187038c
RG
126typedef vector<int> tcpListenSockets_t;
127typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
128typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
3ea54bf0 129
d187038c
RG
130static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
131static vector<ThreadPipeSet> g_pipes; // effectively readonly after startup
132static tcpListenSockets_t g_tcpListenSockets; // shared across threads, but this is fine, never written to from a thread. All threads listen on all sockets
133static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
810ff705 134static std::unordered_map<unsigned int, deferredAdd_t> deferredAdds;
d187038c
RG
135static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
136static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
137static AtomicCounter counter;
138static SyncRes::domainmap_t* g_initialDomainMap; // new threads needs this to be setup
49a699c4 139static NetmaskGroup* g_initialAllowFrom; // new thread needs to be setup with this
d187038c
RG
140static size_t g_tcpMaxQueriesPerConn;
141static uint64_t g_latencyStatSize;
142static uint32_t g_disthashseed;
143static unsigned int g_maxTCPPerClient;
144static unsigned int g_networkTimeoutMsec;
145static unsigned int g_maxMThreads;
146static unsigned int g_numWorkerThreads;
147static int g_tcpTimeout;
148static uint16_t g_udpTruncationThreshold;
149static std::atomic<bool> statsWanted;
150static std::atomic<bool> g_quiet;
151static bool g_logCommonErrors;
152static bool g_anyToTcp;
153static bool g_lowercaseOutgoing;
154static bool g_weDistributeQueries; // if true, only 1 thread listens on the incoming query sockets
810ff705
RG
155static bool g_reusePort{false};
156static bool g_useOneSocketPerThread;
49a699c4 157
d187038c
RG
158std::unordered_set<DNSName> g_delegationOnly;
159RecursorControlChannel s_rcc; // only active in thread 0
160RecursorStats g_stats;
eb5bae86 161NetmaskGroup* g_dontQuery;
2d733c0f 162string s_programname="pdns_recursor";
d187038c
RG
163string s_pidfname;
164unsigned int g_numThreads;
165uint16_t g_outgoingEDNSBufsize;
c3828c03 166
12cd44ee 167#define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
168// Bad Nets taken from both:
3ddb9247 169// http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
12cd44ee 170// and
171// http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
172// where such a network may not be considered a valid destination
173#define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
174#define DONT_QUERY LOCAL_NETS ", " BAD_NETS
49a699c4 175
d7dae798 176//! used to send information to a newborn mthread
ea634573 177struct DNSComboWriter {
27c0050c 178 DNSComboWriter(const char* data, uint16_t len, const struct timeval& now) : d_mdp(true, data, len), d_now(now),
232f0877 179 d_tcp(false), d_socket(-1)
ea634573
BH
180 {}
181 MOADNSParser d_mdp;
00c9b8c1 182 void setRemote(const ComboAddress* sa)
ea634573 183 {
37d3f960 184 d_remote=*sa;
ea634573
BH
185 }
186
b71b60ee 187 void setLocal(const ComboAddress& sa)
188 {
189 d_local=sa;
190 }
191
192
ea634573
BH
193 void setSocket(int sock)
194 {
195 d_socket=sock;
196 }
a1754c6a
BH
197
198 string getRemote() const
199 {
37d3f960 200 return d_remote.toString();
a1754c6a
BH
201 }
202
c9e9e5e0 203 struct timeval d_now;
b71b60ee 204 ComboAddress d_remote, d_local;
aa7929a3
RG
205#ifdef HAVE_PROTOBUF
206 boost::uuids::uuid d_uuid;
207#endif
b40562da
RG
208 EDNSSubnetOpts d_ednssubnet;
209 bool d_ecsFound{false};
210 bool d_ecsParsed{false};
ea634573
BH
211 bool d_tcp;
212 int d_socket;
b673817a 213 unsigned int d_tag{0};
e9f63d47 214 uint32_t d_qhash{0};
49a3500d 215 string d_query;
cd989c87 216 shared_ptr<TCPConnection> d_tcpConnection;
e8340d27 217 vector<pair<uint16_t, string> > d_ednsOpts;
02b47f43 218 std::vector<std::string> d_policyTags;
5fd2577f 219 LuaContext::LuaObject d_data;
ea634573
BH
220};
221
222
288f4aa9
BH
223ArgvMap &arg()
224{
225 static ArgvMap theArg;
226 return theArg;
227}
4ef015cd 228
b4015453
RG
229unsigned int getRecursorThreadId()
230{
231 return t_id;
232}
09e6702a 233
d187038c 234static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 235
50c81227 236// -1 is error, 0 is timeout, 1 is success
3ddb9247 237int asendtcp(const string& data, Socket* sock)
5c633640
BH
238{
239 PacketID pident;
240 pident.sock=sock;
241 pident.outMSG=data;
3ddb9247 242
bb4bdbaf 243 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
50c81227 244 string packet;
5c633640 245
5b0ddd18 246 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
23db0a09 247
9170fbaf 248 if(!ret || ret==-1) { // timeout
bb4bdbaf 249 t_fdm->removeWriteFD(sock->getHandle());
5c633640 250 }
50c81227
BH
251 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
252 return -1;
253 }
9170fbaf 254 return ret;
5c633640
BH
255}
256
d187038c 257static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 258
9170fbaf 259// -1 is error, 0 is timeout, 1 is success
a683e8bd 260int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
288f4aa9 261{
50c81227 262 data.clear();
5c633640
BH
263 PacketID pident;
264 pident.sock=sock;
265 pident.inNeeded=len;
825fa717 266 pident.inIncompleteOkay=incompleteOkay;
bb4bdbaf 267 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
5c633640 268
bb4bdbaf 269 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
9170fbaf 270 if(!ret || ret==-1) { // timeout
bb4bdbaf 271 t_fdm->removeReadFD(sock->getHandle());
288f4aa9 272 }
50c81227
BH
273 else if(data.empty()) {// error, EOF or other
274 return -1;
275 }
276
9170fbaf 277 return ret;
288f4aa9
BH
278}
279
d187038c 280static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
4465e941 281{
fba1e944 282 PacketID pident=*any_cast<PacketID>(&var);
4465e941 283 char resp[512];
a683e8bd 284 ssize_t ret=recv(fd, resp, sizeof(resp), 0);
4465e941 285 t_fdm->removeReadFD(fd);
286 if(ret >= 0) {
a683e8bd 287 string data(resp, (size_t) ret);
fba1e944 288 MT->sendEvent(pident, &data);
4465e941 289 }
290 else {
fba1e944 291 string empty;
292 MT->sendEvent(pident, &empty);
293 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
4465e941 294 }
295}
fba1e944 296string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
4465e941 297{
4465e941 298 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
299 s.setNonBlocking();
300 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
301
302 s.bind(local);
303 s.connect(dest);
4465e941 304 s.send(query);
305
306 PacketID pident;
307 pident.sock=&s;
308 pident.type=0;
fba1e944 309 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
4465e941 310
311 string data;
fba1e944 312
4465e941 313 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
fba1e944 314
4465e941 315 if(!ret || ret==-1) { // timeout
4465e941 316 t_fdm->removeReadFD(s.getHandle());
317 }
318 else if(data.empty()) {// error, EOF or other
fba1e944 319 // we could special case this
4465e941 320 return data;
321 }
4465e941 322 return data;
323}
324
d7dae798 325//! pick a random query local address
1652a63e 326ComboAddress getQueryLocalAddress(int family, uint16_t port)
5a38281c 327{
1652a63e 328 ComboAddress ret;
5a38281c 329 if(family==AF_INET) {
3ddb9247 330 if(g_localQueryAddresses4.empty())
1652a63e 331 ret = g_local4;
3ddb9247 332 else
1652a63e
BH
333 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
334 ret.sin4.sin_port = htons(port);
5a38281c
BH
335 }
336 else {
337 if(g_localQueryAddresses6.empty())
1652a63e
BH
338 ret = g_local6;
339 else
340 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
3ddb9247 341
1652a63e 342 ret.sin6.sin6_port = htons(port);
5a38281c 343 }
1652a63e 344 return ret;
5a38281c 345}
4ef015cd 346
d187038c 347static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
09e6702a 348
d187038c 349static void setSocketBuffer(int fd, int optname, uint32_t size)
d7dae798
BH
350{
351 uint32_t psize=0;
352 socklen_t len=sizeof(psize);
3ddb9247 353
d7dae798
BH
354 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
355 L<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
3ddb9247 356 return;
d7dae798
BH
357 }
358
359 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
c057bfaa 360 L<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
d7dae798
BH
361}
362
363
364static void setSocketReceiveBuffer(int fd, uint32_t size)
365{
366 setSocketBuffer(fd, SO_RCVBUF, size);
367}
368
369static void setSocketSendBuffer(int fd, uint32_t size)
370{
371 setSocketBuffer(fd, SO_SNDBUF, size);
372}
373
374
4ef015cd
BH
375// you can ask this class for a UDP socket to send a query from
376// this socket is not yours, don't even think about deleting it
377// but after you call 'returnSocket' on it, don't assume anything anymore
378class UDPClientSocks
379{
4ef015cd 380 unsigned int d_numsocks;
4ef015cd 381public:
e2642526 382 UDPClientSocks() : d_numsocks(0)
4ef015cd
BH
383 {
384 }
385
996c89cc 386 typedef set<int> socks_t;
4ef015cd
BH
387 socks_t d_socks;
388
2ee280cf 389 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
d8f6d49f 390 int getSocket(const ComboAddress& toaddr, int* fd)
4ef015cd 391 {
d8f6d49f
BH
392 *fd=makeClientSocket(toaddr.sin4.sin_family);
393 if(*fd < 0) // temporary error - receive exception otherwise
2ee280cf 394 return -2;
d8f6d49f
BH
395
396 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
397 int err = errno;
41ff43f8 398 // returnSocket(*fd);
a7b68ae7
RG
399 try {
400 closesocket(*fd);
401 }
402 catch(const PDNSException& e) {
403 L<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
404 }
405
d8f6d49f 406 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
4957a608 407 return -2;
998a4334 408 return -1;
d8f6d49f 409 }
998a4334 410
d8f6d49f 411 d_socks.insert(*fd);
998a4334 412 d_numsocks++;
d8f6d49f 413 return 0;
4ef015cd
BH
414 }
415
095c3045
BH
416 void returnSocket(int fd)
417 {
418 socks_t::iterator i=d_socks.find(fd);
34801ab1 419 if(i==d_socks.end()) {
335da0ba 420 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd)+") not in the pool");
34801ab1 421 }
bb4bdbaf 422 returnSocketLocked(i);
095c3045
BH
423 }
424
4ef015cd 425 // return a socket to the pool, or simply erase it
bb4bdbaf 426 void returnSocketLocked(socks_t::iterator& i)
4ef015cd 427 {
600fc20b 428 if(i==d_socks.end()) {
3f81d239 429 throw PDNSException("Trying to return a socket not in the pool");
600fc20b 430 }
80baf329 431 try {
bb4bdbaf 432 t_fdm->removeReadFD(*i);
80baf329
BH
433 }
434 catch(FDMultiplexerException& e) {
bb4bdbaf 435 // we sometimes return a socket that has not yet been assigned to t_fdm
80baf329 436 }
a7b68ae7
RG
437 try {
438 closesocket(*i);
439 }
440 catch(const PDNSException& e) {
441 L<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
442 }
3ddb9247 443
998a4334
BH
444 d_socks.erase(i++);
445 --d_numsocks;
4ef015cd 446 }
d8f6d49f
BH
447
448 // returns -1 for errors which might go away, throws for ones that won't
bb4bdbaf 449 static int makeClientSocket(int family)
d8f6d49f 450 {
a683e8bd 451 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
42c235e5 452
d8f6d49f
BH
453 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
454 return ret;
3ddb9247
PD
455
456 if(ret<0)
335da0ba 457 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
36855b53 458
7eb73ffa 459 // setCloseOnExec(ret); // we're not going to exec
5a38281c 460
d8f6d49f 461 int tries=10;
3aa91c3e 462 ComboAddress sin;
d8f6d49f 463 while(--tries) {
1652a63e 464 uint16_t port;
3ddb9247 465
d8f6d49f 466 if(tries==1) // fall back to kernel 'random'
4957a608 467 port = 0;
1652a63e
BH
468 else
469 port = 1025 + dns_random(64510);
5a38281c 470
3aa91c3e 471 sin=getQueryLocalAddress(family, port); // does htons for us
5a38281c 472
3ddb9247 473 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
4957a608 474 break;
d8f6d49f
BH
475 }
476 if(!tries)
3aa91c3e 477 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
3ddb9247 478
3897b9e1 479 setNonBlocking(ret);
d8f6d49f
BH
480 return ret;
481 }
49a699c4
BH
482};
483
484static __thread UDPClientSocks* t_udpclientsocks;
4ef015cd 485
288f4aa9 486/* these two functions are used by LWRes */
34801ab1 487// -2 is OS error, -1 is error that depends on the remote, > 0 is success
a683e8bd 488int asendto(const char *data, size_t len, int flags,
3ddb9247 489 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
288f4aa9 490{
34801ab1
BH
491
492 PacketID pident;
787e5eab
BH
493 pident.domain = domain;
494 pident.remote = toaddr;
495 pident.type = qtype;
34801ab1
BH
496
497 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
498 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
499
500 for(; chain.first != chain.second; chain.first++) {
501 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
e27e91a8 502 /*
4665c31e
BH
503 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
504 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
4957a608 505 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
e27e91a8 506 */
34801ab1
BH
507 chain.first->key.chain.insert(id); // we can chain
508 *fd=-1; // gets used in waitEvent / sendEvent later on
509 return 1;
510 }
511 }
512
49a699c4 513 int ret=t_udpclientsocks->getSocket(toaddr, fd);
d8f6d49f
BH
514 if(ret < 0)
515 return ret;
34801ab1 516
998a4334
BH
517 pident.fd=*fd;
518 pident.id=id;
3ddb9247 519
bb4bdbaf
BH
520 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
521 ret = send(*fd, data, len, 0);
522
5b0ddd18 523 int tmp = errno;
bb4bdbaf 524
7302ed0a 525 if(ret < 0)
49a699c4 526 t_udpclientsocks->returnSocket(*fd);
bb4bdbaf 527
5b0ddd18 528 errno = tmp; // this is for logging purposes only
7302ed0a 529 return ret;
288f4aa9
BH
530}
531
9170fbaf 532// -1 is error, 0 is timeout, 1 is success
a683e8bd 533int arecvfrom(char *data, size_t len, int flags, const ComboAddress& fromaddr, size_t *d_len,
c5c066bf 534 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
288f4aa9 535{
0d5f0a9f 536 static optional<unsigned int> nearMissLimit;
3ddb9247 537 if(!nearMissLimit)
0d5f0a9f
BH
538 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
539
288f4aa9 540 PacketID pident;
4ef015cd 541 pident.fd=fd;
288f4aa9 542 pident.id=id;
0d5f0a9f 543 pident.domain=domain;
787e5eab 544 pident.type = qtype;
996c89cc 545 pident.remote=fromaddr;
b636533b 546
288f4aa9 547 string packet;
5b0ddd18 548 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
34801ab1 549
9170fbaf 550 if(ret > 0) {
996c89cc 551 if(packet.empty()) // means "error"
3ddb9247 552 return -1;
998a4334 553
a683e8bd 554 *d_len=packet.size();
9170fbaf 555 memcpy(data,packet.c_str(),min(len,*d_len));
0d5f0a9f 556 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
996c89cc 557 L<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
0d5f0a9f 558 g_stats.spoofCount++;
35ce8576
BH
559 return -1;
560 }
288f4aa9 561 }
09e6702a 562 else {
34801ab1 563 if(fd >= 0)
49a699c4 564 t_udpclientsocks->returnSocket(fd);
09e6702a 565 }
9170fbaf 566 return ret;
288f4aa9
BH
567}
568
88def049
BH
569static void writePid(void)
570{
191f2e47 571 if(!::arg().mustDo("write-pid"))
572 return;
18e7758c 573 ofstream of(s_pidfname.c_str(), std::ios_base::app);
88def049 574 if(of)
705f31ae 575 of<< Utility::getpid() <<endl;
88def049 576 else
c057bfaa 577 L<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
88def049
BH
578}
579
cd989c87 580TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : d_remote(addr), d_fd(fd)
3ddb9247
PD
581{
582 ++s_currentConnections;
cd989c87 583 (*t_tcpClientCounts)[d_remote]++;
0e408828 584}
cd989c87
BH
585
586TCPConnection::~TCPConnection()
0e408828 587{
a7b68ae7
RG
588 try {
589 if(closesocket(d_fd) < 0)
590 L<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
591 }
592 catch(const PDNSException& e) {
593 L<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
594 }
595
3ddb9247 596 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
cd989c87 597 t_tcpClientCounts->erase(d_remote);
1bc9e6bd 598 --s_currentConnections;
0e408828 599}
0e9d9ce2 600
3ddb9247 601AtomicCounter TCPConnection::s_currentConnections;
d187038c
RG
602
603static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
6dcd28c3 604
92011b8f 605// the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
d187038c 606static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
2cc13433 607{
92011b8f 608 if(packetsize > 1000 && t_largeanswerremotes)
609 t_largeanswerremotes->push_back(remote);
2cc13433
BH
610 switch(res) {
611 case RCode::ServFail:
92011b8f 612 if(t_servfailremotes) {
613 t_servfailremotes->push_back(remote);
5af86fdc 614 if(query && t_servfailqueryring) // packet cache
92011b8f 615 t_servfailqueryring->push_back(make_pair(*query, qtype));
616 }
2cc13433
BH
617 g_stats.servFails++;
618 break;
619 case RCode::NXDomain:
620 g_stats.nxDomains++;
621 break;
622 case RCode::NoError:
623 g_stats.noErrors++;
624 break;
625 }
626}
627
a903b39c 628static string makeLoginfo(DNSComboWriter* dc)
629try
630{
5ad5bb7d 631 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->d_remote.toString())+")";
a903b39c 632}
633catch(...)
634{
635 return "Exception making error message for exception";
636}
637
aa7929a3 638#ifdef HAVE_PROTOBUF
07ebe7c6 639static void protobufLogQuery(const std::shared_ptr<RemoteLogger>& logger, uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags)
aa7929a3 640{
e1c8a4bb
RG
641 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
642 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
643 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
a94bc5d7 644 message.setEDNSSubnet(ednssubnet, ednssubnet.isIpv4() ? maskV4 : maskV6);
02b47f43 645
02b47f43 646 if (!policyTags.empty()) {
d9d3f9c1 647 message.setPolicyTags(policyTags);
02b47f43 648 }
aa7929a3 649
d9d3f9c1 650// cerr <<message.toDebugString()<<endl;
aa7929a3 651 std::string str;
d9d3f9c1 652 message.serialize(str);
aa7929a3 653 logger->queueData(str);
aa7929a3
RG
654}
655
d9d3f9c1 656static void protobufLogResponse(const std::shared_ptr<RemoteLogger>& logger, const RecProtoBufMessage& message)
aa7929a3 657{
d9d3f9c1 658// cerr <<message.toDebugString()<<endl;
aa7929a3 659 std::string str;
d9d3f9c1 660 message.serialize(str);
aa7929a3 661 logger->queueData(str);
aa7929a3
RG
662}
663#endif
664
53508135
PL
665/**
666 * Chases the CNAME provided by the PolicyCustom RPZ policy.
667 *
668 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
669 * @param qtype: The QType of the original query
670 * @param sr: A SyncRes
671 * @param res: An integer that will contain the RCODE of the lookup we do
672 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
673 */
d187038c 674static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
53508135
PL
675{
676 if (spoofed.d_type == QType::CNAME) {
677 bool oldWantsRPZ = sr.d_wantsRPZ;
678 sr.d_wantsRPZ = false;
679 vector<DNSRecord> ans;
680 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, 1, ans);
681 for (const auto& rec : ans) {
682 if(rec.d_place == DNSResourceRecord::ANSWER) {
683 ret.push_back(rec);
684 }
685 }
686 // Reset the RPZ state of the SyncRes
687 sr.d_wantsRPZ = oldWantsRPZ;
688 }
689}
690
d187038c 691static void startDoResolve(void *p)
288f4aa9 692{
7b1469bb 693 DNSComboWriter* dc=(DNSComboWriter *)p;
288f4aa9 694 try {
5af86fdc
RG
695 if (t_queryring)
696 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
92011b8f 697
b18ace73 698 uint32_t maxanswersize= dc->d_tcp ? 65535 : min((uint16_t) 512, g_udpTruncationThreshold);
7f7b8d55 699 EDNSOpts edo;
8e079f3a 700 bool haveEDNS=false;
701 if(getEDNSOpts(dc->d_mdp, &edo)) {
702 if(!dc->d_tcp)
703 maxanswersize = min(edo.d_packetsize, g_udpTruncationThreshold);
e8340d27 704 dc->d_ednsOpts = edo.d_options;
8e079f3a 705 haveEDNS=true;
b40562da
RG
706
707 if (g_useIncomingECS && !dc->d_ecsParsed) {
708 for (const auto& o : edo.d_options) {
709 if (o.first == EDNSOptionCode::ECS) {
710 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
711 break;
712 }
713 }
714 }
10321a98 715 }
b40562da
RG
716 /* perhaps there was no EDNS or no ECS but by now we looked */
717 dc->d_ecsParsed = true;
e325f20c 718 vector<DNSRecord> ret;
ea634573 719 vector<uint8_t> packet;
b23b8614 720
ad42489c 721 auto luaconfsLocal = g_luaconfs.getLocal();
b8470add
PL
722 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
723 bool wantsRPZ(true);
d9d3f9c1 724 RecProtoBufMessage pbMessage(RecProtoBufMessage::Response);
aa7929a3 725#ifdef HAVE_PROTOBUF
d9d3f9c1 726 if (luaconfsLocal->protobufServer) {
e1c8a4bb
RG
727 Netmask requestorNM(dc->d_remote, dc->d_remote.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
728 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
729 pbMessage.update(dc->d_uuid, &requestor, &dc->d_local, dc->d_tcp, dc->d_mdp.d_header.id);
b40562da 730 pbMessage.setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
d9d3f9c1
RG
731 pbMessage.setQuestion(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
732 }
733#endif /* HAVE_PROTOBUF */
ad42489c 734
3ddb9247 735 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
ea634573
BH
736
737 pw.getHeader()->aa=0;
738 pw.getHeader()->ra=1;
c154c8a4 739 pw.getHeader()->qr=1;
bb4bdbaf 740 pw.getHeader()->tc=0;
ea634573 741 pw.getHeader()->id=dc->d_mdp.d_header.id;
10321a98 742 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
57769f13 743 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
ea634573 744
1059837e 745 // DO NOT MOVE THIS CODE UP - DNSPacketWriter needs to get the original-cased version
b3adda56 746 if (g_lowercaseOutgoing)
3ebc80ce 747 dc->d_mdp.d_qname = dc->d_mdp.d_qname.makeLowerCase();
b3adda56 748
904d3219
PD
749 uint32_t minTTL=std::numeric_limits<uint32_t>::max();
750
751 SyncRes sr(dc->d_now);
2e921ec6 752 bool DNSSECOK=false;
3457a2a0 753 if(t_pdl) {
754 sr.setLuaEngine(*t_pdl);
4ea94941 755 sr.d_requestor=dc->d_remote;
3457a2a0 756 }
2e921ec6 757
9eec8c98 758 if(g_dnssecmode != DNSSECMode::Off) {
2e921ec6 759 sr.d_doDNSSEC=true;
9eec8c98
PL
760
761 // Does the requestor want DNSSEC records?
762 if(edo.d_Z & EDNSOpts::DNSSECOK) {
763 DNSSECOK=true;
764 g_stats.dnssecQueries++;
765 }
766 } else {
767 // Ignore the client-set CD flag
768 pw.getHeader()->cd=0;
5b9853c9 769 }
4898a348
RG
770#ifdef HAVE_PROTOBUF
771 sr.d_initialRequestId = dc->d_uuid;
772#endif
b40562da
RG
773 if (g_useIncomingECS) {
774 sr.d_incomingECSFound = dc->d_ecsFound;
775 if (dc->d_ecsFound) {
776 sr.d_incomingECS = dc->d_ednssubnet;
777 }
778 }
57769f13 779
904d3219
PD
780 bool tracedQuery=false; // we could consider letting Lua know about this too
781 bool variableAnswer = false;
9fc36e90 782 bool shouldNotValidate = false;
904d3219 783
ef3b6cd7
RG
784 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
785 int res = RCode::NoError;
1f1ca368 786 DNSFilterEngine::Policy appliedPolicy;
39ec5d29 787 DNSRecord spoofed;
6e505c5e
RG
788 RecursorLua4::DNSQuestion dq(dc->d_remote, dc->d_local, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ);
789 dq.ednsFlags = &edo.d_Z;
790 dq.ednsOptions = &dc->d_ednsOpts;
791 dq.tag = dc->d_tag;
792 dq.discardedPolicies = &sr.d_discardedPolicies;
793 dq.policyTags = &dc->d_policyTags;
794 dq.appliedPolicy = &appliedPolicy;
795 dq.currentRecords = &ret;
796 dq.dh = &dc->d_mdp.d_header;
05c74122 797 dq.data = dc->d_data;
ba21fcfe 798
e661a20b 799 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
56b4d21b
PD
800 pw.getHeader()->tc = 1;
801 res = 0;
802 variableAnswer = true;
e661a20b
PD
803 goto sendit;
804 }
805
c5c066bf 806 if(t_traceRegex->get() && (*t_traceRegex)->match(dc->d_mdp.d_qname.toString())) {
77499b05
BH
807 sr.setLogMode(SyncRes::Store);
808 tracedQuery=true;
809 }
3ddb9247 810
8f7473d7 811
976ec823 812 if(!g_quiet || tracedQuery) {
461df9d2 813 L<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
976ec823 814 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
b40562da
RG
815 if(!dc->d_ednssubnet.source.empty()) {
816 L<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
6e986f5e 817 }
976ec823 818 L<<endl;
819 }
c75a6a9e 820
fededf47 821 sr.setId(MT->getTid());
67828389 822 if(!dc->d_mdp.d_header.rd)
c836dc19
BH
823 sr.setCacheOnly();
824
0a273054 825 if (t_pdl->get()) {
ba21fcfe 826 (*t_pdl)->prerpz(dq, res);
0a273054
RG
827 }
828
db486de5 829 // Check if the query has a policy attached to it
0a273054 830 if (wantsRPZ) {
1f1ca368 831 appliedPolicy = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_remote, sr.d_discardedPolicies);
0a273054 832 }
644dd1da 833
54be222b 834 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
ba21fcfe 835 if(!t_pdl->get() || !(*t_pdl)->preresolve(dq, res)) {
b8470add
PL
836
837 sr.d_wantsRPZ = wantsRPZ;
838 if(wantsRPZ) {
839 switch(appliedPolicy.d_kind) {
840 case DNSFilterEngine::PolicyKind::NoAction:
841 break;
842 case DNSFilterEngine::PolicyKind::Drop:
843 g_stats.policyDrops++;
7a25883a 844 g_stats.policyResults[appliedPolicy.d_kind]++;
b8470add
PL
845 delete dc;
846 dc=0;
847 return;
848 case DNSFilterEngine::PolicyKind::NXDOMAIN:
849 g_stats.policyResults[appliedPolicy.d_kind]++;
850 res=RCode::NXDomain;
851 goto haveAnswer;
852 case DNSFilterEngine::PolicyKind::NODATA:
853 g_stats.policyResults[appliedPolicy.d_kind]++;
854 res=RCode::NoError;
db486de5 855 goto haveAnswer;
b8470add
PL
856 case DNSFilterEngine::PolicyKind::Custom:
857 g_stats.policyResults[appliedPolicy.d_kind]++;
858 res=RCode::NoError;
859 spoofed.d_name=dc->d_mdp.d_qname;
860 spoofed.d_type=appliedPolicy.d_custom->getType();
861 spoofed.d_ttl = appliedPolicy.d_ttl;
862 spoofed.d_class = 1;
863 spoofed.d_content = appliedPolicy.d_custom;
864 spoofed.d_place = DNSResourceRecord::ANSWER;
865 ret.push_back(spoofed);
53508135 866 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
b8470add
PL
867 goto haveAnswer;
868 case DNSFilterEngine::PolicyKind::Truncate:
869 if(!dc->d_tcp) {
870 g_stats.policyResults[appliedPolicy.d_kind]++;
871 res=RCode::NoError;
872 pw.getHeader()->tc=1;
873 goto haveAnswer;
874 }
875 break;
876 }
db486de5
PL
877 }
878
b8470add 879 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
44971ca0
PD
880 try {
881 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
9fc36e90 882 shouldNotValidate = sr.wasOutOfBand();
44971ca0
PD
883 }
884 catch(ImmediateServFailException &e) {
854d44e3 885 if(g_logCommonErrors)
886 L<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
44971ca0
PD
887 res = RCode::ServFail;
888 }
4485aa35 889
b8470add
PL
890 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
891 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
892 appliedPolicy = sr.d_appliedPolicy;
893 g_stats.policyResults[appliedPolicy.d_kind]++;
894 switch(appliedPolicy.d_kind) {
895 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
896 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
897 case DNSFilterEngine::PolicyKind::Drop:
898 g_stats.policyDrops++;
899 delete dc;
900 dc=0;
901 return;
902 case DNSFilterEngine::PolicyKind::NXDOMAIN:
903 ret.clear();
904 res=RCode::NXDomain;
905 goto haveAnswer;
906
907 case DNSFilterEngine::PolicyKind::NODATA:
908 ret.clear();
909 res=RCode::NoError;
910 goto haveAnswer;
911
912 case DNSFilterEngine::PolicyKind::Truncate:
913 if(!dc->d_tcp) {
914 ret.clear();
915 res=RCode::NoError;
916 pw.getHeader()->tc=1;
917 goto haveAnswer;
918 }
919 break;
920
921 case DNSFilterEngine::PolicyKind::Custom:
922 ret.clear();
923 res=RCode::NoError;
924 spoofed.d_name=dc->d_mdp.d_qname;
925 spoofed.d_type=appliedPolicy.d_custom->getType();
926 spoofed.d_ttl = appliedPolicy.d_ttl;
927 spoofed.d_class = 1;
928 spoofed.d_content = appliedPolicy.d_custom;
929 spoofed.d_place = DNSResourceRecord::ANSWER;
930 ret.push_back(spoofed);
53508135 931 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
b8470add
PL
932 goto haveAnswer;
933 }
934 }
935
936 if (wantsRPZ) {
1f1ca368 937 appliedPolicy = luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies);
b8470add 938 }
db486de5
PL
939
940 if(t_pdl->get()) {
941 if(res == RCode::NoError) {
942 auto i=ret.cbegin();
943 for(; i!= ret.cend(); ++i)
944 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
945 break;
ba21fcfe 946 if(i == ret.cend() && (*t_pdl)->nodata(dq, res))
3ca4e735
PL
947 shouldNotValidate = true;
948
db486de5 949 }
ba21fcfe 950 else if(res == RCode::NXDomain && (*t_pdl)->nxdomain(dq, res))
3ca4e735 951 shouldNotValidate = true;
db486de5 952
ba21fcfe 953 if((*t_pdl)->postresolve(dq, res))
3ca4e735 954 shouldNotValidate = true;
db486de5
PL
955 }
956
b8470add
PL
957 if (wantsRPZ) { //XXX This block is repeated, see above
958 g_stats.policyResults[appliedPolicy.d_kind]++;
959 switch(appliedPolicy.d_kind) {
960 case DNSFilterEngine::PolicyKind::NoAction:
961 break;
962 case DNSFilterEngine::PolicyKind::Drop:
963 g_stats.policyDrops++;
964 delete dc;
965 dc=0;
966 return;
967 case DNSFilterEngine::PolicyKind::NXDOMAIN:
968 ret.clear();
969 res=RCode::NXDomain;
970 goto haveAnswer;
971
972 case DNSFilterEngine::PolicyKind::NODATA:
973 ret.clear();
974 res=RCode::NoError;
975 goto haveAnswer;
976
977 case DNSFilterEngine::PolicyKind::Truncate:
978 if(!dc->d_tcp) {
979 ret.clear();
980 res=RCode::NoError;
981 pw.getHeader()->tc=1;
982 goto haveAnswer;
983 }
984 break;
985
986 case DNSFilterEngine::PolicyKind::Custom:
987 ret.clear();
988 res=RCode::NoError;
989 spoofed.d_name=dc->d_mdp.d_qname;
990 spoofed.d_type=appliedPolicy.d_custom->getType();
991 spoofed.d_ttl = appliedPolicy.d_ttl;
992 spoofed.d_class = 1;
993 spoofed.d_content = appliedPolicy.d_custom;
994 spoofed.d_place = DNSResourceRecord::ANSWER;
995 ret.push_back(spoofed);
53508135 996 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
b8470add
PL
997 goto haveAnswer;
998 }
644dd1da 999 }
4485aa35 1000 }
644dd1da 1001 haveAnswer:;
3e8216c8 1002 if(res == PolicyDecision::DROP) {
e9c2ad3a 1003 g_stats.policyDrops++;
ae7e77ad 1004 delete dc;
1005 dc=0;
1006 return;
3ddb9247 1007 }
3e8216c8 1008 if(tracedQuery || res == PolicyDecision::PASS || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1dc8f4d0 1009 {
85ffbc53
PD
1010 string trace(sr.getTrace());
1011 if(!trace.empty()) {
1012 vector<string> lines;
1013 boost::split(lines, trace, boost::is_any_of("\n"));
1dc8f4d0 1014 for(const string& line : lines) {
85ffbc53
PD
1015 if(!line.empty())
1016 L<<Logger::Warning<< line << endl;
1017 }
1018 }
1019 }
3ddb9247 1020
b3f0ed10 1021 if(res == PolicyDecision::PASS) { // XXX what does this MEAN? Why servfail on PASS?
0fe1d080
PD
1022 pw.getHeader()->rcode=RCode::ServFail;
1023 // no commit here, because no record
1024 g_stats.servFails++;
1025 }
288f4aa9 1026 else {
ea634573 1027 pw.getHeader()->rcode=res;
92011b8f 1028
f3fe4ae6 1029 // Does the validation mode or query demand validation?
9fc36e90 1030 if(!shouldNotValidate && (g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process))) {
b25cae9a 1031 try {
f3fe4ae6 1032 if(sr.doLog()) {
5fc44cd2 1033 L<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<endl;
2e921ec6 1034 }
b25cae9a 1035
4898a348
RG
1036 ResolveContext ctx;
1037#ifdef HAVE_PROTOBUF
1038 ctx.d_initialRequestId = dc->d_uuid;
1039#endif
1040 auto state=validateRecords(ctx, ret);
b25cae9a 1041 if(state == Secure) {
2e921ec6 1042 if(sr.doLog()) {
5fc44cd2 1043 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates correctly"<<endl;
2e921ec6 1044 }
b25cae9a 1045
1046 // Is the query source interested in the value of the ad-bit?
885c8881 1047 if (dc->d_mdp.d_header.ad || DNSSECOK)
b25cae9a 1048 pw.getHeader()->ad=1;
1049 }
1050 else if(state == Insecure) {
f3fe4ae6 1051 if(sr.doLog()) {
5fc44cd2 1052 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates as Insecure"<<endl;
12ce523e 1053 }
b25cae9a 1054
1055 pw.getHeader()->ad=0;
f3fe4ae6 1056 }
b25cae9a 1057 else if(state == Bogus) {
c87e1876 1058 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
5fc44cd2 1059 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates as Bogus"<<endl;
b25cae9a 1060 }
1061
1062 // Does the query or validation mode sending out a SERVFAIL on validation errors?
885c8881 1063 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
b25cae9a 1064 if(sr.doLog()) {
5fc44cd2 1065 L<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
b25cae9a 1066 }
1067
1068 pw.getHeader()->rcode=RCode::ServFail;
1069 goto sendit;
1070 } else {
1071 if(sr.doLog()) {
5fc44cd2 1072 L<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
b25cae9a 1073 }
1074 }
1075 }
1076 }
1077 catch(ImmediateServFailException &e) {
1078 if(g_logCommonErrors)
5fc44cd2 1079 L<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
b25cae9a 1080 pw.getHeader()->rcode=RCode::ServFail;
1081 goto sendit;
f3fe4ae6 1082 }
b3f0ed10 1083 }
1084
c154c8a4 1085 if(ret.size()) {
92476c8b 1086 orderAndShuffle(ret);
ad42489c 1087 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_remote)) {
3e61e7f7 1088 sort(ret.begin(), ret.end(), *sl);
1089 variableAnswer=true;
1090 }
8e079f3a 1091 }
1092 if(haveEDNS) {
1093 ret.push_back(makeOpt(edo.d_packetsize, 0, edo.d_Z));
1094 }
0afa32d4
RG
1095
1096 bool needCommit = false;
8e079f3a 1097 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
3e80ebce
KM
1098 if( ! DNSSECOK &&
1099 ( i->d_type == QType::NSEC3 ||
1100 (
1101 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1102 (
1103 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1104 i->d_place != DNSResourceRecord::ANSWER
1105 )
1106 )
1107 )
1108 ) {
2e921ec6 1109 continue;
3e80ebce
KM
1110 }
1111
8e079f3a 1112 pw.startRecord(i->d_name, i->d_type, i->d_ttl, i->d_class, i->d_place);
1113 if(i->d_type != QType::OPT) // their TTL ain't real
1114 minTTL = min(minTTL, i->d_ttl);
1115 i->d_content->toPacket(pw);
1116 if(pw.size() > maxanswersize) {
1117 pw.rollback();
1118 if(i->d_place==DNSResourceRecord::ANSWER) // only truncate if we actually omitted parts of the answer
add935a2 1119 {
4957a608 1120 pw.getHeader()->tc=1;
add935a2
PD
1121 pw.truncate();
1122 }
8e079f3a 1123 goto sendit; // need to jump over pw.commit
1124 }
0afa32d4 1125 needCommit = true;
aa7929a3 1126#ifdef HAVE_PROTOBUF
d9d3f9c1
RG
1127 if(luaconfsLocal->protobufServer && (i->d_type == QType::A || i->d_type == QType::AAAA || i->d_type == QType::CNAME)) {
1128 pbMessage.addRR(*i);
aa7929a3
RG
1129 }
1130#endif
ea634573 1131 }
0afa32d4 1132 if(needCommit)
8e079f3a 1133 pw.commit();
288f4aa9 1134 }
10321a98 1135 sendit:;
b3f0ed10 1136
79332bff 1137 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
92011b8f 1138 updateResponseStats(res, dc->d_remote, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
aa7929a3 1139#ifdef HAVE_PROTOBUF
b790ef3d 1140 if (luaconfsLocal->protobufServer && (!luaconfsLocal->protobufTaggedOnly || (appliedPolicy.d_name && !appliedPolicy.d_name->empty()) || !dc->d_policyTags.empty())) {
d9d3f9c1
RG
1141 pbMessage.setBytes(packet.size());
1142 pbMessage.setResponseCode(pw.getHeader()->rcode);
0a273054
RG
1143 if (appliedPolicy.d_name) {
1144 pbMessage.setAppliedPolicy(*appliedPolicy.d_name);
1145 }
d9d3f9c1 1146 pbMessage.setPolicyTags(dc->d_policyTags);
58307a85 1147 pbMessage.setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
02b47f43 1148 protobufLogResponse(luaconfsLocal->protobufServer, pbMessage);
aa7929a3
RG
1149 }
1150#endif
ea634573 1151 if(!dc->d_tcp) {
b71b60ee 1152 struct msghdr msgh;
1153 struct iovec iov;
1154 char cbuf[256];
1155 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
2c0af54f
PD
1156 msgh.msg_control=NULL;
1157
cbc03320 1158 if(g_fromtosockets.count(dc->d_socket)) {
fbe2a2e0 1159 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
2c0af54f 1160 }
cbc03320 1161 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
1162 L<<Logger::Warning<<"Sending UDP reply to client "<<dc->d_remote.toStringWithPort()<<" failed with: "<<strerror(errno)<<endl;
3762e821 1163 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
e9f63d47 1164 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
76e2b9e3 1165 string((const char*)&*packet.begin(), packet.size()),
3ddb9247 1166 g_now.tv_sec,
76e2b9e3 1167 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
d9d3f9c1
RG
1168 min(minTTL,SyncRes::s_packetcachettl),
1169 &pbMessage);
1051f8a9 1170 }
3762e821 1171 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
feccc9fc 1172 }
9c495589
BH
1173 else {
1174 char buf[2];
ea634573
BH
1175 buf[0]=packet.size()/256;
1176 buf[1]=packet.size()%256;
feccc9fc 1177
c038218b 1178 Utility::iovec iov[2];
feccc9fc 1179
ea634573
BH
1180 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1181 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
feccc9fc 1182
dd079764 1183 int wret=Utility::writev(dc->d_socket, iov, 2);
0e9d9ce2 1184 bool hadError=true;
feccc9fc 1185
dd079764 1186 if(wret == 0)
18af64a8 1187 L<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
dd079764 1188 else if(wret < 0 )
18af64a8 1189 L<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
dd079764
RG
1190 else if((unsigned int)wret != 2 + packet.size())
1191 L<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
0e9d9ce2 1192 else
18af64a8 1193 hadError=false;
3ddb9247 1194
09e6702a 1195 // update tcp connection status, either by closing or moving to 'BYTE0'
3ddb9247 1196
09e6702a 1197 if(hadError) {
18af64a8 1198 // no need to remove us from FDM, we weren't there
c36bc97a 1199 dc->d_socket = -1;
09e6702a 1200 }
a6ae6414 1201 else {
fde296a3
RG
1202 dc->d_tcpConnection->queriesCount++;
1203 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1204 dc->d_socket = -1;
1205 }
1206 else {
1207 dc->d_tcpConnection->state=TCPConnection::BYTE0;
1208 Utility::gettimeofday(&g_now, 0); // needs to be updated
1209 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection);
1210 t_fdm->setReadTTD(dc->d_socket, g_now, g_tcpTimeout);
1211 }
0e9d9ce2 1212 }
9c495589 1213 }
3ddb9247 1214
1d5b3ce6 1215 if(!g_quiet) {
461df9d2 1216 L<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
ea634573 1217 L<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
9de3e034 1218 sr.d_totUsec/1000.0<<" ms, "<<
1219 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<<res<<endl;
c75a6a9e 1220 }
b23b8614 1221
3ddb9247 1222 sr.d_outqueries ? t_RC->cacheMisses++ : t_RC->cacheHits++;
fe213470
BH
1223 float spent=makeFloat(sr.d_now-dc->d_now);
1224 if(spent < 0.001)
1225 g_stats.answers0_1++;
1226 else if(spent < 0.010)
1227 g_stats.answers1_10++;
1228 else if(spent < 0.1)
1229 g_stats.answers10_100++;
1230 else if(spent < 1.0)
1231 g_stats.answers100_1000++;
1232 else
1233 g_stats.answersSlow++;
1234
574af7ea 1235 uint64_t newLat=(uint64_t)(spent*1000000);
b841314c 1236 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
08f3f638 1237 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
0a6b1027 1238 // no worries, we do this for packet cache hits elsewhere
c6d04bdc 1239 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
ea634573 1240 delete dc;
c36bc97a 1241 dc=0;
288f4aa9 1242 }
3f81d239 1243 catch(PDNSException &ae) {
a903b39c 1244 L<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
c36bc97a 1245 delete dc;
288f4aa9 1246 }
7b1469bb 1247 catch(MOADNSException& e) {
a903b39c 1248 L<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<e.what()<<endl;
c36bc97a 1249 delete dc;
7b1469bb 1250 }
fdbf35ac 1251 catch(std::exception& e) {
068c7634
PD
1252 L<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
1253
1254 // Luawrapper nests the exception from Lua, so we unnest it here
1255 try {
1256 std::rethrow_if_nested(e);
1257 } catch(const std::exception& e) {
1258 L<<". Extra info: "<<e.what();
1259 } catch(...) {}
1260
1261 L<<endl;
c36bc97a 1262 delete dc;
c154c8a4 1263 }
288f4aa9 1264 catch(...) {
a903b39c 1265 L<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
288f4aa9 1266 }
3ddb9247 1267
ec6eacbc 1268 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
288f4aa9
BH
1269}
1270
d187038c 1271static void makeControlChannelSocket(int processNum=-1)
1d5b3ce6 1272{
2d733c0f 1273 string sockname=::arg()["socket-dir"]+"/"+s_programname;
677e2a46 1274 if(processNum >= 0)
335da0ba 1275 sockname += "."+std::to_string(processNum);
677e2a46 1276 sockname+=".controlsocket";
41f7a068 1277 s_rcc.listen(sockname);
3ddb9247 1278
387de317
BH
1279 int sockowner = -1;
1280 int sockgroup = -1;
1281
1282 if (!::arg().isEmpty("socket-group"))
1283 sockgroup=::arg().asGid("socket-group");
1284 if (!::arg().isEmpty("socket-owner"))
1285 sockowner=::arg().asUid("socket-owner");
3ddb9247 1286
f838ad8d
BH
1287 if (sockgroup > -1 || sockowner > -1) {
1288 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1289 unixDie("Failed to chown control socket");
1290 }
1291 }
387de317
BH
1292
1293 // do mode change if socket-mode is given
1294 if(!::arg().isEmpty("socket-mode")) {
1295 mode_t sockmode=::arg().asMode("socket-mode");
34c513f9
RG
1296 if(chmod(sockname.c_str(), sockmode) < 0) {
1297 unixDie("Failed to chmod control socket");
1298 }
387de317 1299 }
1d5b3ce6
BH
1300}
1301
b40562da 1302static bool getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass, EDNSSubnetOpts* ednssubnet)
02b47f43 1303{
b40562da 1304 bool found = false;
02b47f43
RG
1305 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
1306 size_t questionLen = question.length();
1307 unsigned int consumed=0;
1308 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1309
1310 size_t pos= sizeof(dnsheader)+consumed+4;
1311 /* at least OPT root label (1), type (2), class (2) and ttl (4) + OPT RR rdlen (2)
1312 = 11 */
1313 if(ntohs(dh->arcount) == 1 && questionLen > pos + 11) { // this code can extract one (1) EDNS Subnet option
1314 /* OPT root label (1) followed by type (2) */
1315 if(question.at(pos)==0 && question.at(pos+1)==0 && question.at(pos+2)==QType::OPT) {
1316 char* ecsStart = nullptr;
1317 size_t ecsLen = 0;
1318 int res = getEDNSOption((char*)question.c_str()+pos+9, questionLen - pos - 9, EDNSOptionCode::ECS, &ecsStart, &ecsLen);
1319 if (res == 0 && ecsLen > 4) {
1320 EDNSSubnetOpts eso;
1321 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
b40562da
RG
1322 *ednssubnet=eso;
1323 found = true;
02b47f43
RG
1324 }
1325 }
1326 }
1327 }
b40562da 1328 return found;
02b47f43
RG
1329}
1330
d187038c 1331static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1332{
cd989c87 1333 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
c038218b 1334
879b3f70 1335 if(conn->state==TCPConnection::BYTE0) {
b841314c 1336 ssize_t bytes=recv(conn->getFD(), conn->data, 2, 0);
09e6702a 1337 if(bytes==1)
667f7e60 1338 conn->state=TCPConnection::BYTE1;
3ddb9247 1339 if(bytes==2) {
a0aa4f64 1340 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
667f7e60
BH
1341 conn->bytesread=0;
1342 conn->state=TCPConnection::GETQUESTION;
09e6702a
BH
1343 }
1344 if(!bytes || bytes < 0) {
bb4bdbaf 1345 t_fdm->removeReadFD(fd);
09e6702a
BH
1346 return;
1347 }
1348 }
667f7e60 1349 else if(conn->state==TCPConnection::BYTE1) {
b841314c 1350 ssize_t bytes=recv(conn->getFD(), conn->data+1, 1, 0);
09e6702a 1351 if(bytes==1) {
667f7e60 1352 conn->state=TCPConnection::GETQUESTION;
a0aa4f64 1353 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
667f7e60 1354 conn->bytesread=0;
09e6702a
BH
1355 }
1356 if(!bytes || bytes < 0) {
1357 if(g_logCommonErrors)
cd989c87 1358 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected after first byte"<<endl;
bb4bdbaf 1359 t_fdm->removeReadFD(fd);
09e6702a
BH
1360 return;
1361 }
1362 }
667f7e60 1363 else if(conn->state==TCPConnection::GETQUESTION) {
b841314c 1364 ssize_t bytes=recv(conn->getFD(), conn->data + conn->bytesread, conn->qlen - conn->bytesread, 0);
f9d67b41 1365 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
cd989c87 1366 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected while reading question body"<<endl;
bb4bdbaf 1367 t_fdm->removeReadFD(fd);
09e6702a
BH
1368 return;
1369 }
b841314c 1370 conn->bytesread+=(uint16_t)bytes;
667f7e60 1371 if(conn->bytesread==conn->qlen) {
bb4bdbaf 1372 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
879b3f70 1373
09e6702a
BH
1374 DNSComboWriter* dc=0;
1375 try {
cd989c87 1376 dc=new DNSComboWriter(conn->data, conn->qlen, g_now);
09e6702a
BH
1377 }
1378 catch(MOADNSException &mde) {
3ddb9247 1379 g_stats.clientParseError++;
4957a608 1380 if(g_logCommonErrors)
cd989c87 1381 L<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toString() <<endl;
4957a608 1382 return;
09e6702a 1383 }
cd989c87
BH
1384 dc->d_tcpConnection = conn; // carry the torch
1385 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
09e6702a 1386 dc->d_tcp=true;
cd989c87 1387 dc->setRemote(&conn->d_remote);
a6147cd2 1388 ComboAddress dest;
1389 memset(&dest, 0, sizeof(dest));
1390 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1391 socklen_t len = dest.getSocklen();
1392 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1393 dc->setLocal(dest);
33dcceba
RG
1394 DNSName qname;
1395 uint16_t qtype=0;
1396 uint16_t qclass=0;
1397 bool needECS = false;
aa7929a3 1398#ifdef HAVE_PROTOBUF
02b47f43 1399 auto luaconfsLocal = g_luaconfs.getLocal();
33dcceba
RG
1400 if (luaconfsLocal->protobufServer) {
1401 needECS = true;
1402 }
1403#endif
1404
1405 if(needECS || (t_pdl->get() && (*t_pdl)->d_gettag)) {
1406
1407 try {
b40562da
RG
1408 dc->d_ecsParsed = true;
1409 dc->d_ecsFound = getQNameAndSubnet(std::string(conn->data, conn->qlen), &qname, &qtype, &qclass, &dc->d_ednssubnet);
02b47f43 1410
33dcceba
RG
1411 if(t_pdl->get() && (*t_pdl)->d_gettag) {
1412 try {
b40562da 1413 dc->d_tag = (*t_pdl)->gettag(conn->d_remote, dc->d_ednssubnet.source, dest, qname, qtype, &dc->d_policyTags, dc->d_data);
33dcceba
RG
1414 }
1415 catch(std::exception& e) {
1416 if(g_logCommonErrors)
1417 L<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
1418 }
1419 }
1420 }
1421 catch(std::exception& e)
1422 {
1423 if(g_logCommonErrors)
1424 L<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
1425 }
1426 }
1427#ifdef HAVE_PROTOBUF
4898a348 1428 if(luaconfsLocal->protobufServer || luaconfsLocal->outgoingProtobufServer) {
02b47f43 1429 dc->d_uuid = (*t_uuidGenerator)();
4898a348 1430 }
02b47f43 1431
4898a348 1432 if(luaconfsLocal->protobufServer) {
02b47f43 1433 try {
02b47f43 1434 const struct dnsheader* dh = (const struct dnsheader*) conn->data;
02b47f43 1435
b790ef3d 1436 if (!luaconfsLocal->protobufTaggedOnly) {
b40562da 1437 protobufLogQuery(luaconfsLocal->protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, conn->d_remote, dest, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags);
b790ef3d 1438 }
02b47f43
RG
1439 }
1440 catch(std::exception& e) {
1441 if(g_logCommonErrors)
1442 L<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
1443 }
1444 }
aa7929a3 1445#endif
879b3f70 1446 if(dc->d_mdp.d_header.qr) {
4957a608 1447 delete dc;
048f5db6 1448 g_stats.ignoredCount++;
4328f463 1449 L<<Logger::Error<<"Ignoring answer from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
4957a608 1450 return;
879b3f70 1451 }
3abcdab2
PD
1452 if(dc->d_mdp.d_header.opcode) {
1453 delete dc;
048f5db6 1454 g_stats.ignoredCount++;
4328f463 1455 L<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
3abcdab2
PD
1456 return;
1457 }
09e6702a 1458 else {
4957a608
BH
1459 ++g_stats.qcounter;
1460 ++g_stats.tcpqcounter;
50a5ef72 1461 MT->makeThread(startDoResolve, dc); // deletes dc, will set state to BYTE0 again
4957a608 1462 return;
09e6702a
BH
1463 }
1464 }
1465 }
1466}
1467
6dcd28c3 1468//! Handle new incoming TCP connection
d187038c 1469static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
09e6702a 1470{
37d3f960 1471 ComboAddress addr;
09e6702a 1472 socklen_t addrlen=sizeof(addr);
a683e8bd 1473 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
b841314c 1474 if(newsock>=0) {
85c32340
BH
1475 if(MT->numProcesses() > g_maxMThreads) {
1476 g_stats.overCapacityDrops++;
a7b68ae7
RG
1477 try {
1478 closesocket(newsock);
1479 }
1480 catch(const PDNSException& e) {
1481 L<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
1482 }
85c32340
BH
1483 return;
1484 }
1485
92011b8f 1486 if(t_remotes)
1487 t_remotes->push_back(addr);
49a699c4 1488 if(t_allowFrom && !t_allowFrom->match(&addr)) {
3ddb9247 1489 if(!g_quiet)
4957a608 1490 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2914b022 1491
09e6702a 1492 g_stats.unauthorizedTCP++;
a7b68ae7
RG
1493 try {
1494 closesocket(newsock);
1495 }
1496 catch(const PDNSException& e) {
1497 L<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
1498 }
09e6702a
BH
1499 return;
1500 }
bd0289fc 1501 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
09e6702a 1502 g_stats.tcpClientOverflow++;
a7b68ae7
RG
1503 try {
1504 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
1505 }
1506 catch(const PDNSException& e) {
1507 L<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
1508 }
09e6702a
BH
1509 return;
1510 }
3ddb9247 1511
3897b9e1 1512 setNonBlocking(newsock);
cd989c87
BH
1513 shared_ptr<TCPConnection> tc(new TCPConnection(newsock, addr));
1514 tc->state=TCPConnection::BYTE0;
3ddb9247 1515
cd989c87 1516 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc);
c038218b 1517
0bff046b 1518 struct timeval now;
c038218b 1519 Utility::gettimeofday(&now, 0);
cd989c87 1520 t_fdm->setReadTTD(tc->getFD(), now, g_tcpTimeout);
09e6702a
BH
1521 }
1522}
3ddb9247 1523
d187038c 1524static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
1bc3c142 1525{
183eb877 1526 gettimeofday(&g_now, 0);
b71b60ee 1527 struct timeval diff = g_now - tv;
1528 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
183eb877 1529
22cf1fda 1530 if(tv.tv_sec && delta > 1000.0) {
b71b60ee 1531 g_stats.tooOldDrops++;
1532 return 0;
1533 }
1534
1bc3c142 1535 ++g_stats.qcounter;
d7f10541
BH
1536 if(fromaddr.sin4.sin_family==AF_INET6)
1537 g_stats.ipv6qcounter++;
1bc3c142
BH
1538
1539 string response;
93f0da94 1540 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
49a3500d 1541 unsigned int ctag=0;
f57486f1 1542 uint32_t qhash = 0;
12aff2e5 1543 bool needECS = false;
02b47f43 1544 std::vector<std::string> policyTags;
5fd2577f 1545 LuaContext::LuaObject data;
12aff2e5 1546#ifdef HAVE_PROTOBUF
02b47f43 1547 boost::uuids::uuid uniqueId;
02b47f43
RG
1548 auto luaconfsLocal = g_luaconfs.getLocal();
1549 if (luaconfsLocal->protobufServer) {
4898a348 1550 uniqueId = (*t_uuidGenerator)();
02b47f43 1551 needECS = true;
4898a348 1552 } else if (luaconfsLocal->outgoingProtobufServer) {
02b47f43
RG
1553 uniqueId = (*t_uuidGenerator)();
1554 }
12aff2e5 1555#endif
b40562da
RG
1556 EDNSSubnetOpts ednssubnet;
1557 bool ecsFound = false;
1558 bool ecsParsed = false;
1bc3c142 1559 try {
02b47f43
RG
1560 DNSName qname;
1561 uint16_t qtype=0;
1562 uint16_t qclass=0;
1bc3c142 1563 uint32_t age;
c15ff3df 1564 bool qnameParsed=false;
8f7473d7 1565#ifdef MALLOC_TRACE
1566 /*
1567 static uint64_t last=0;
1568 if(!last)
1569 g_mtracer->clearAllocators();
1570 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
1571 last=g_mtracer->getAllocs();
1572 cout<<g_mtracer->topAllocatorsString()<<endl;
1573 g_mtracer->clearAllocators();
1574 */
1575#endif
55a1378f 1576
12aff2e5 1577 if(needECS || (t_pdl->get() && (*t_pdl)->d_gettag)) {
b2eacd67 1578 try {
b40562da 1579 ecsFound = getQNameAndSubnet(question, &qname, &qtype, &qclass, &ednssubnet);
c15ff3df
RG
1580 qnameParsed = true;
1581 ecsParsed = true;
12aff2e5
RG
1582
1583 if(t_pdl->get() && (*t_pdl)->d_gettag) {
1584 try {
b40562da 1585 ctag=(*t_pdl)->gettag(fromaddr, ednssubnet.source, destaddr, qname, qtype, &policyTags, data);
12aff2e5
RG
1586 }
1587 catch(std::exception& e) {
1588 if(g_logCommonErrors)
1589 L<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
1590 }
8ea8c302 1591 }
b2eacd67 1592 }
1593 catch(std::exception& e)
1594 {
1595 if(g_logCommonErrors)
1596 L<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 1597 }
12ce523e 1598 }
3ddb9247 1599
02b47f43 1600 bool cacheHit = false;
d9d3f9c1 1601 RecProtoBufMessage pbMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
02b47f43
RG
1602#ifdef HAVE_PROTOBUF
1603 if(luaconfsLocal->protobufServer) {
b790ef3d 1604 if (!luaconfsLocal->protobufTaggedOnly || !policyTags.empty()) {
b40562da 1605 protobufLogQuery(luaconfsLocal->protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, fromaddr, destaddr, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags);
b790ef3d 1606 }
d9d3f9c1
RG
1607 }
1608#endif /* HAVE_PROTOBUF */
02b47f43 1609
c15ff3df
RG
1610 if (qnameParsed) {
1611 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &qhash, &pbMessage));
1612 }
1613 else {
1614 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, g_now.tv_sec, &response, &age, &qhash, &pbMessage));
1615 }
1616
d9d3f9c1
RG
1617 if (cacheHit) {
1618#ifdef HAVE_PROTOBUF
b790ef3d 1619 if(luaconfsLocal->protobufServer && (!luaconfsLocal->protobufTaggedOnly || !pbMessage.getAppliedPolicy().empty() || !pbMessage.getPolicyTags().empty())) {
e1c8a4bb
RG
1620 Netmask requestorNM(fromaddr, fromaddr.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1621 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
1622 pbMessage.update(uniqueId, &requestor, &destaddr, false, dh->id);
b40562da 1623 pbMessage.setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
58307a85 1624 pbMessage.setQueryTime(g_now.tv_sec, g_now.tv_usec);
02b47f43
RG
1625 protobufLogResponse(luaconfsLocal->protobufServer, pbMessage);
1626 }
d9d3f9c1 1627#endif /* HAVE_PROTOBUF */
49a3500d 1628 if(!g_quiet)
1629 L<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<fromaddr.toString()<<endl;
8f7473d7 1630
1bc3c142
BH
1631 g_stats.packetCacheHits++;
1632 SyncRes::s_queries++;
1633 ageDNSPacket(response, age);
b71b60ee 1634 struct msghdr msgh;
1635 struct iovec iov;
1636 char cbuf[256];
1637 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
2c0af54f
PD
1638 msgh.msg_control=NULL;
1639
cbc03320 1640 if(g_fromtosockets.count(fd)) {
fbe2a2e0 1641 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
b71b60ee 1642 }
cbc03320 1643 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
1644 L<<Logger::Warning<<"Sending UDP reply to client "<<fromaddr.toStringWithPort()<<" failed with: "<<strerror(errno)<<endl;
b71b60ee 1645
97bee66d 1646 if(response.length() >= sizeof(struct dnsheader)) {
dd079764
RG
1647 struct dnsheader tmpdh;
1648 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
1649 updateResponseStats(tmpdh.rcode, fromaddr, response.length(), 0, 0);
97bee66d 1650 }
08f3f638 1651 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
1bc3c142
BH
1652 return 0;
1653 }
3ddb9247 1654 }
1bc3c142
BH
1655 catch(std::exception& e) {
1656 L<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1657 return 0;
1658 }
3ddb9247 1659
4ea94941 1660 if(t_pdl->get()) {
93f0da94 1661 if((*t_pdl)->ipfilter(fromaddr, destaddr, *dh)) {
4ea94941 1662 if(!g_quiet)
1663 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<" based on policy"<<endl;
1664 g_stats.policyDrops++;
1665 return 0;
1666 }
1667 }
1668
1bc3c142 1669 if(MT->numProcesses() > g_maxMThreads) {
461df9d2 1670 if(!g_quiet)
854d44e3 1671 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<", over capacity"<<endl;
461df9d2 1672
1bc3c142
BH
1673 g_stats.overCapacityDrops++;
1674 return 0;
1675 }
3ddb9247 1676
1bc3c142
BH
1677 DNSComboWriter* dc = new DNSComboWriter(question.c_str(), question.size(), g_now);
1678 dc->setSocket(fd);
49a3500d 1679 dc->d_tag=ctag;
e9f63d47 1680 dc->d_qhash=qhash;
49a3500d 1681 dc->d_query = question;
1bc3c142 1682 dc->setRemote(&fromaddr);
b71b60ee 1683 dc->setLocal(destaddr);
1bc3c142 1684 dc->d_tcp=false;
02b47f43 1685 dc->d_policyTags = policyTags;
05c74122 1686 dc->d_data = data;
b40562da
RG
1687 dc->d_ecsFound = ecsFound;
1688 dc->d_ecsParsed = ecsParsed;
1689 dc->d_ednssubnet = ednssubnet;
aa7929a3 1690#ifdef HAVE_PROTOBUF
4898a348 1691 if (luaconfsLocal->protobufServer || luaconfsLocal->outgoingProtobufServer) {
d9d3f9c1
RG
1692 dc->d_uuid = uniqueId;
1693 }
aa7929a3
RG
1694#endif
1695
1bc3c142
BH
1696 MT->makeThread(startDoResolve, (void*) dc); // deletes dc
1697 return 0;
3ddb9247
PD
1698}
1699
b71b60ee 1700
d187038c 1701static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
5db529f8 1702{
a683e8bd 1703 ssize_t len;
5db529f8
BH
1704 char data[1500];
1705 ComboAddress fromaddr;
b71b60ee 1706 struct msghdr msgh;
1707 struct iovec iov;
1708 char cbuf[256];
1709
1710 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
1711 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), data, sizeof(data), &fromaddr);
1712
3ddb9247 1713 for(;;)
b71b60ee 1714 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
92011b8f 1715 if(t_remotes)
1716 t_remotes->push_back(fromaddr);
b23b8614 1717
49a699c4 1718 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
3ddb9247 1719 if(!g_quiet)
4957a608 1720 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2914b022 1721
5db529f8 1722 g_stats.unauthorizedUDP++;
a9af3782 1723 return;
5db529f8 1724 }
15c01deb 1725 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
81859ba5 1726 if(!fromaddr.sin4.sin_port) { // also works for IPv6
3ddb9247 1727 if(!g_quiet)
81859ba5 1728 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
1729
1730 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
1731 return;
1732 }
5db529f8 1733 try {
b23b8614 1734 dnsheader* dh=(dnsheader*)data;
3ddb9247 1735
b23b8614 1736 if(dh->qr) {
048f5db6 1737 g_stats.ignoredCount++;
4957a608
BH
1738 if(g_logCommonErrors)
1739 L<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
5db529f8 1740 }
3abcdab2 1741 else if(dh->opcode) {
048f5db6 1742 g_stats.ignoredCount++;
3abcdab2
PD
1743 if(g_logCommonErrors)
1744 L<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
1745 }
5db529f8 1746 else {
a683e8bd 1747 string question(data, (size_t)len);
b71b60ee 1748 struct timeval tv={0,0};
1749 HarvestTimestamp(&msgh, &tv);
1750 ComboAddress dest;
c3cecd36 1751 memset(&dest, 0, sizeof(dest)); // this makes sure we ignore this address if not returned by recvmsg above
a6147cd2 1752 auto loc = rplookup(g_listenSocketsAddresses, fd);
1753 if(HarvestDestinationAddress(&msgh, &dest)) {
1754 // but.. need to get port too
1755 if(loc)
1756 dest.sin4.sin_port = loc->sin4.sin_port;
1757 }
1758 else {
1759 if(loc) {
1760 dest = *loc;
1761 }
1762 else {
1763 dest.sin4.sin_family = fromaddr.sin4.sin_family;
a683e8bd
RG
1764 socklen_t slen = dest.getSocklen();
1765 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
a6147cd2 1766 }
1767 }
232f0877 1768 if(g_weDistributeQueries)
b71b60ee 1769 distributeAsyncFunction(question, boost::bind(doProcessUDPQuestion, question, fromaddr, dest, tv, fd));
232f0877 1770 else
b71b60ee 1771 doProcessUDPQuestion(question, fromaddr, dest, tv, fd);
5db529f8
BH
1772 }
1773 }
1774 catch(MOADNSException& mde) {
3ddb9247 1775 g_stats.clientParseError++;
84e66a59 1776 if(g_logCommonErrors)
4957a608 1777 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
5db529f8 1778 }
0b602819
KM
1779 catch(std::runtime_error& e) {
1780 g_stats.clientParseError++;
1781 if(g_logCommonErrors)
1782 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
5db529f8
BH
1783 }
1784 }
ac0e821b
BH
1785 else {
1786 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
3ddb9247 1787 if(errno == EAGAIN)
9326cae1 1788 g_stats.noPacketError++;
bf3b0cec 1789 break;
ac0e821b 1790 }
5db529f8
BH
1791}
1792
810ff705 1793static void makeTCPServerSockets(unsigned int threadId)
9c495589 1794{
37d3f960 1795 int fd;
f28307ad 1796 vector<string>locals;
2e3d8a19 1797 stringtok(locals,::arg()["local-address"]," ,");
9c495589 1798
f28307ad 1799 if(locals.empty())
3f81d239 1800 throw PDNSException("No local address specified");
3ddb9247 1801
f28307ad 1802 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
1803 ServiceTuple st;
1804 st.port=::arg().asNum("local-port");
1805 parseService(*i, st);
3ddb9247 1806
32252594
BH
1807 ComboAddress sin;
1808
f28307ad 1809 memset((char *)&sin,0, sizeof(sin));
37d3f960 1810 sin.sin4.sin_family = AF_INET;
32252594 1811 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 1812 sin.sin6.sin6_family = AF_INET6;
f71bc087 1813 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 1814 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
37d3f960
BH
1815 }
1816
1817 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
3ddb9247 1818 if(fd<0)
3f81d239 1819 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
f28307ad 1820
3897b9e1 1821 setCloseOnExec(fd);
a903b39c 1822
f28307ad 1823 int tmp=1;
810ff705 1824 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
f28307ad 1825 L<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
c8ddb7c2 1826 exit(1);
f28307ad 1827 }
0dfa94ab 1828 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
1829 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
1830 }
1831
c8ddb7c2 1832#ifdef TCP_DEFER_ACCEPT
810ff705 1833 if(setsockopt(fd, SOL_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
37d3f960 1834 if(i==locals.begin())
4957a608 1835 L<<Logger::Error<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
c8ddb7c2
BH
1836 }
1837#endif
1838
fec7dd5a
SS
1839 if( ::arg().mustDo("non-local-bind") )
1840 Utility::setBindAny(AF_INET, fd);
1841
2332f42d 1842#ifdef SO_REUSEPORT
810ff705
RG
1843 if(g_reusePort) {
1844 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
2332f42d 1845 throw PDNSException("SO_REUSEPORT: "+stringerror());
1846 }
1847#endif
1848
0735b17e
RG
1849 if (::arg().asNum("tcp-fast-open") > 0) {
1850#ifdef TCP_FASTOPEN
1851 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
1852 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
1853 L<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno)<<endl;
1854 }
1855#else
1856 L<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
1857#endif
1858 }
1859
32252594 1860 sin.sin4.sin_port = htons(st.port);
a683e8bd 1861 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
3ddb9247 1862 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
3f81d239 1863 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
3ddb9247 1864
3897b9e1 1865 setNonBlocking(fd);
49a699c4 1866 setSocketSendBuffer(fd, 65000);
37d3f960 1867 listen(fd, 128);
810ff705 1868 deferredAdds[threadId].push_back(make_pair(fd, handleNewTCPQuestion));
c2136bf0 1869 g_tcpListenSockets.push_back(fd);
84433b79 1870 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
1871 // - fd is not that which we know here, but returned from accept()
3ddb9247 1872 if(sin.sin4.sin_family == AF_INET)
32252594 1873 L<<Logger::Error<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 1874 else
32252594 1875 L<<Logger::Error<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 1876 }
9c495589
BH
1877}
1878
810ff705 1879static void makeUDPServerSockets(unsigned int threadId)
288f4aa9 1880{
fec7dd5a 1881 int one=1;
f28307ad 1882 vector<string>locals;
2e3d8a19 1883 stringtok(locals,::arg()["local-address"]," ,");
288f4aa9 1884
f28307ad 1885 if(locals.empty())
3f81d239 1886 throw PDNSException("No local address specified");
3ddb9247 1887
f28307ad 1888 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
1889 ServiceTuple st;
1890 st.port=::arg().asNum("local-port");
1891 parseService(*i, st);
1892
37d3f960 1893 ComboAddress sin;
996c89cc 1894
37d3f960
BH
1895 memset(&sin, 0, sizeof(sin));
1896 sin.sin4.sin_family = AF_INET;
32252594 1897 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 1898 sin.sin6.sin6_family = AF_INET6;
f71bc087 1899 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 1900 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
37d3f960 1901 }
3ddb9247 1902
bb4bdbaf 1903 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
d3b4137e 1904 if(fd < 0) {
3f81d239 1905 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
d3b4137e 1906 }
915b0c39
AT
1907 if (!setSocketTimestamps(fd))
1908 L<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
0dfa94ab 1909
b71b60ee 1910 if(IsAnyAddress(sin)) {
cbc03320 1911 if(sin.sin4.sin_family == AF_INET)
1912 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
1913 g_fromtosockets.insert(fd);
757d3179 1914#ifdef IPV6_RECVPKTINFO
cbc03320 1915 if(sin.sin4.sin_family == AF_INET6)
1916 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
1917 g_fromtosockets.insert(fd);
757d3179 1918#endif
0dfa94ab 1919 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
1920 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
1921 }
b71b60ee 1922 }
fec7dd5a
SS
1923 if( ::arg().mustDo("non-local-bind") )
1924 Utility::setBindAny(AF_INET6, fd);
1925
3897b9e1 1926 setCloseOnExec(fd);
a903b39c 1927
4e9a20e6 1928 setSocketReceiveBuffer(fd, 250000);
32252594 1929 sin.sin4.sin_port = htons(st.port);
37d3f960 1930
2332f42d 1931
1932#ifdef SO_REUSEPORT
810ff705 1933 if(g_reusePort) {
2332f42d 1934 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
1935 throw PDNSException("SO_REUSEPORT: "+stringerror());
1936 }
1937#endif
a683e8bd 1938 socklen_t socklen=sin.getSocklen();
3ddb9247 1939 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
335da0ba 1940 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
3ddb9247 1941
3897b9e1 1942 setNonBlocking(fd);
c2136bf0 1943
810ff705 1944 deferredAdds[threadId].push_back(make_pair(fd, handleNewUDPQuestion));
40a3dd64 1945 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
3ddb9247 1946 if(sin.sin4.sin_family == AF_INET)
32252594 1947 L<<Logger::Error<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 1948 else
32252594 1949 L<<Logger::Error<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 1950 }
c836dc19 1951}
caa6eefa 1952
d187038c 1953static void daemonize(void)
c836dc19
BH
1954{
1955 if(fork())
1956 exit(0); // bye bye
3ddb9247
PD
1957
1958 setsid();
c836dc19 1959
27a5ead5 1960 int i=open("/dev/null",O_RDWR); /* open stdin */
3ddb9247 1961 if(i < 0)
27a5ead5
BH
1962 L<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
1963 else {
1964 dup2(i,0); /* stdin */
1965 dup2(i,1); /* stderr */
1966 dup2(i,2); /* stderr */
1967 close(i);
1968 }
288f4aa9 1969}
caa6eefa 1970
d187038c 1971static void usr1Handler(int)
c75a6a9e
BH
1972{
1973 statsWanted=true;
1974}
ae1b2e98 1975
d187038c 1976static void usr2Handler(int)
9170fbaf 1977{
f1f34cc2 1978 g_quiet= !g_quiet;
1979 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
1980 ::arg().set("quiet")=g_quiet ? "" : "no";
9170fbaf
BH
1981}
1982
d187038c 1983static void doStats(void)
c75a6a9e 1984{
16beeaa4
BH
1985 static time_t lastOutputTime;
1986 static uint64_t lastQueryCount;
d299d4f5 1987
1988 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
1989 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
3ddb9247 1990
d299d4f5 1991 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
bd301954 1992 L<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
3427fa8a
BH
1993 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
1994 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
3ddb9247
PD
1995 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
1996
bd301954 1997 L<<Logger::Notice<<"stats: throttle map: "
3427fa8a 1998 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
3ddb9247 1999 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
bd301954
JB
2000 L<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2001 L<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
525b8a7c 2002 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
bd301954 2003 L<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
3427fa8a 2004 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
81883dcc 2005
bd301954 2006 //L<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
16beeaa4 2007 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
3ddb9247 2008
bd301954 2009 L<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
16beeaa4 2010 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
3ddb9247 2011
16beeaa4
BH
2012 time_t now = time(0);
2013 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
bd301954 2014 L<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
16beeaa4
BH
2015 }
2016 lastOutputTime = now;
2017 lastQueryCount = SyncRes::s_queries;
c75a6a9e 2018 }
3ddb9247 2019 else if(statsWanted)
bd301954 2020 L<<Logger::Notice<<"stats: no stats yet!"<<endl;
7becf07f 2021
c75a6a9e
BH
2022 statsWanted=false;
2023}
c836dc19 2024
29f0b1ce 2025static void houseKeeping(void *)
c836dc19 2026{
d67620e4 2027 static __thread time_t last_stat, last_rootupdate, last_prune, last_secpoll;
8baca3fa 2028 static __thread int cleanCounter=0;
cc59bce6 2029 static __thread bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
2030 try {
2031 if(s_running)
2032 return;
2033 s_running=true;
3ddb9247 2034
cc59bce6 2035 struct timeval now;
2036 Utility::gettimeofday(&now, 0);
3ddb9247
PD
2037
2038 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
cc59bce6 2039 DTime dt;
2040 dt.setTimeval(now);
2041 t_RC->doPrune(); // this function is local to a thread, so fine anyhow
f8f243b0 2042 t_packetCache->doPruneTo(::arg().asNum("max-packetcache-entries") / g_numWorkerThreads);
3ddb9247 2043
f8f243b0 2044 pruneCollection(t_sstorage->negcache, ::arg().asNum("max-cache-entries") / (g_numWorkerThreads * 10), 200);
3ddb9247 2045
cc59bce6 2046 if(!((cleanCounter++)%40)) { // this is a full scan!
2047 time_t limit=now.tv_sec-300;
2048 for(SyncRes::nsspeeds_t::iterator i = t_sstorage->nsSpeeds.begin() ; i!= t_sstorage->nsSpeeds.end(); )
2049 if(i->second.stale(limit))
2050 t_sstorage->nsSpeeds.erase(i++);
2051 else
2052 ++i;
2053 }
2054 last_prune=time(0);
d67620e4 2055 }
3ddb9247 2056
cc59bce6 2057 if(now.tv_sec - last_rootupdate > 7200) {
7836f7b4
PL
2058 int res = getRootNS();
2059 if (!res)
2060 last_rootupdate=now.tv_sec;
cc59bce6 2061 }
3ddb9247 2062
cc59bce6 2063 if(!t_id) {
3ddb9247 2064 if(now.tv_sec - last_stat >= 1800) {
cc59bce6 2065 doStats();
2066 last_stat=time(0);
2067 }
3ddb9247 2068
cc59bce6 2069 if(now.tv_sec - last_secpoll >= 3600) {
2070 try {
2071 doSecPoll(&last_secpoll);
2072 }
2073 catch(...) {}
18b73338 2074 }
d67620e4 2075 }
cc59bce6 2076 s_running=false;
d67620e4 2077 }
cc59bce6 2078 catch(PDNSException& ae)
2079 {
2080 s_running=false;
2081 L<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
2082 throw;
2083 }
779828c4 2084}
d6d5dea7 2085
d187038c 2086static void makeThreadPipes()
49a699c4 2087{
c3828c03 2088 for(unsigned int n=0; n < g_numThreads; ++n) {
49a699c4
BH
2089 struct ThreadPipeSet tps;
2090 int fd[2];
2091 if(pipe(fd) < 0)
2092 unixDie("Creating pipe for inter-thread communications");
3ddb9247 2093
49a699c4
BH
2094 tps.readToThread = fd[0];
2095 tps.writeToThread = fd[1];
3ddb9247 2096
49a699c4
BH
2097 if(pipe(fd) < 0)
2098 unixDie("Creating pipe for inter-thread communications");
2099 tps.readFromThread = fd[0];
2100 tps.writeFromThread = fd[1];
3ddb9247 2101
49a699c4
BH
2102 g_pipes.push_back(tps);
2103 }
2104}
2105
00c9b8c1
BH
2106struct ThreadMSG
2107{
2108 pipefunc_t func;
2109 bool wantAnswer;
2110};
2111
49a699c4
BH
2112void broadcastFunction(const pipefunc_t& func, bool skipSelf)
2113{
49a699c4 2114 unsigned int n = 0;
1dc8f4d0 2115 for(ThreadPipeSet& tps : g_pipes)
49a699c4
BH
2116 {
2117 if(n++ == t_id) {
2118 if(!skipSelf)
2119 func(); // don't write to ourselves!
2120 continue;
2121 }
3ddb9247 2122
00c9b8c1
BH
2123 ThreadMSG* tmsg = new ThreadMSG();
2124 tmsg->func = func;
2125 tmsg->wantAnswer = true;
b841314c
RG
2126 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2127 delete tmsg;
49a699c4 2128 unixDie("write to thread pipe returned wrong size or error");
b841314c 2129 }
3ddb9247 2130
49a699c4
BH
2131 string* resp;
2132 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2133 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 2134
49a699c4
BH
2135 if(resp) {
2136// cerr <<"got response: " << *resp << endl;
2137 delete resp;
2138 }
2139 }
2140}
06ea9015 2141
8171ab83 2142void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
00c9b8c1 2143{
8171ab83 2144 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
06ea9015 2145 unsigned int target = 1 + (hash % (g_pipes.size()-1));
2146
00c9b8c1
BH
2147 if(target == t_id) {
2148 func();
2149 return;
2150 }
3ddb9247 2151 ThreadPipeSet& tps = g_pipes[target];
00c9b8c1
BH
2152 ThreadMSG* tmsg = new ThreadMSG();
2153 tmsg->func = func;
2154 tmsg->wantAnswer = false;
3ddb9247 2155
b841314c
RG
2156 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2157 delete tmsg;
3ddb9247 2158 unixDie("write to thread pipe returned wrong size or error");
b841314c 2159 }
00c9b8c1 2160}
3427fa8a 2161
d187038c 2162static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
49a699c4 2163{
00c9b8c1 2164 ThreadMSG* tmsg;
3ddb9247
PD
2165
2166 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread
49a699c4
BH
2167 unixDie("read from thread pipe returned wrong size or error");
2168 }
3ddb9247 2169
2f22827a 2170 void *resp=0;
2171 try {
2172 resp = tmsg->func();
2173 }
2174 catch(std::exception& e) {
6d2010a8 2175 if(g_logCommonErrors)
2176 L<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2f22827a 2177 }
2178 catch(PDNSException& e) {
6d2010a8 2179 if(g_logCommonErrors)
2180 L<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2f22827a 2181 }
d7c676a5
RG
2182 if(tmsg->wantAnswer) {
2183 if(write(g_pipes[t_id].writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
2184 delete tmsg;
00c9b8c1 2185 unixDie("write to thread pipe returned wrong size or error");
d7c676a5
RG
2186 }
2187 }
3ddb9247 2188
00c9b8c1 2189 delete tmsg;
49a699c4 2190}
09e6702a 2191
13034931
BH
2192template<class T> void *voider(const boost::function<T*()>& func)
2193{
2194 return func();
2195}
2196
b3b5459d
BH
2197vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
2198{
2199 a.insert(a.end(), b.begin(), b.end());
2200 return a;
2201}
2202
92011b8f 2203vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
2204{
2205 a.insert(a.end(), b.begin(), b.end());
2206 return a;
2207}
2208
3ddb9247
PD
2209vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
2210{
2211 a.insert(a.end(), b.begin(), b.end());
2212 return a;
2213}
2214
92011b8f 2215
13034931 2216template<class T> T broadcastAccFunction(const boost::function<T*()>& func, bool skipSelf)
3427fa8a
BH
2217{
2218 unsigned int n = 0;
2219 T ret=T();
1dc8f4d0 2220 for(ThreadPipeSet& tps : g_pipes)
3427fa8a
BH
2221 {
2222 if(n++ == t_id) {
2223 if(!skipSelf) {
2224 T* resp = (T*)func(); // don't write to ourselves!
2225 if(resp) {
2226 //~ cerr <<"got direct: " << *resp << endl;
2227 ret += *resp;
2228 delete resp;
2229 }
2230 }
2231 continue;
2232 }
3ddb9247 2233
00c9b8c1
BH
2234 ThreadMSG* tmsg = new ThreadMSG();
2235 tmsg->func = boost::bind(voider<T>, func);
2236 tmsg->wantAnswer = true;
3ddb9247 2237
b841314c
RG
2238 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2239 delete tmsg;
3427fa8a 2240 unixDie("write to thread pipe returned wrong size or error");
b841314c 2241 }
3ddb9247 2242
3427fa8a
BH
2243 T* resp;
2244 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2245 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 2246
3427fa8a
BH
2247 if(resp) {
2248 //~ cerr <<"got response: " << *resp << endl;
2249 ret += *resp;
2250 delete resp;
2251 }
2252 }
2253 return ret;
2254}
2255
13034931
BH
2256template string broadcastAccFunction(const boost::function<string*()>& fun, bool skipSelf); // explicit instantiation
2257template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun, bool skipSelf); // explicit instantiation
b3b5459d 2258template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun, bool skipSelf); // explicit instantiation
3ddb9247 2259template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun, bool skipSelf); // explicit instantiation
3427fa8a 2260
d187038c 2261static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
09e6702a
BH
2262{
2263 string remote;
2264 string msg=s_rcc.recv(&remote);
2265 RecursorControlParser rcp;
2266 RecursorControlParser::func_t* command;
3ddb9247 2267
09e6702a 2268 string answer=rcp.getAnswer(msg, &command);
f0f3f0b0
PL
2269
2270 // If we are inside a chroot, we need to strip
2271 if (!arg()["chroot"].empty()) {
a683e8bd 2272 size_t len = arg()["chroot"].length();
f0f3f0b0
PL
2273 remote = remote.substr(len);
2274 }
2275
ab5c053d
BH
2276 try {
2277 s_rcc.send(answer, &remote);
2278 command();
2279 }
fdbf35ac 2280 catch(std::exception& e) {
ab5c053d
BH
2281 L<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
2282 }
3f81d239 2283 catch(PDNSException& ae) {
ab5c053d
BH
2284 L<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
2285 }
09e6702a
BH
2286}
2287
d187038c 2288static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2289{
0b18b22e 2290 PacketID* pident=any_cast<PacketID>(&var);
667f7e60 2291 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
09e6702a 2292
667f7e60 2293 shared_array<char> buffer(new char[pident->inNeeded]);
09e6702a 2294
a683e8bd 2295 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
09e6702a 2296 if(ret > 0) {
667f7e60 2297 pident->inMSG.append(&buffer[0], &buffer[ret]);
a683e8bd 2298 pident->inNeeded-=(size_t)ret;
825fa717 2299 if(!pident->inNeeded || pident->inIncompleteOkay) {
667f7e60
BH
2300 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
2301 PacketID pid=*pident;
2302 string msg=pident->inMSG;
3ddb9247 2303
bb4bdbaf 2304 t_fdm->removeReadFD(fd);
3ddb9247 2305 MT->sendEvent(pid, &msg);
09e6702a
BH
2306 }
2307 else {
667f7e60 2308 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
09e6702a
BH
2309 }
2310 }
2311 else {
667f7e60 2312 PacketID tmp=*pident;
bb4bdbaf 2313 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
09e6702a
BH
2314 string empty;
2315 MT->sendEvent(tmp, &empty); // this conveys error status
2316 }
2317}
2318
d187038c 2319static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2320{
0b18b22e 2321 PacketID* pid=any_cast<PacketID>(&var);
a683e8bd 2322 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
09e6702a 2323 if(ret > 0) {
a683e8bd 2324 pid->outPos+=(ssize_t)ret;
667f7e60
BH
2325 if(pid->outPos==pid->outMSG.size()) {
2326 PacketID tmp=*pid;
bb4bdbaf 2327 t_fdm->removeWriteFD(fd);
09e6702a
BH
2328 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
2329 }
2330 }
2331 else { // error or EOF
667f7e60 2332 PacketID tmp(*pid);
bb4bdbaf 2333 t_fdm->removeWriteFD(fd);
09e6702a 2334 string sent;
998a4334 2335 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
09e6702a
BH
2336 }
2337}
2338
34801ab1 2339// resend event to everybody chained onto it
d187038c 2340static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
34801ab1
BH
2341{
2342 if(iter->key.chain.empty())
2343 return;
e27e91a8 2344 // cerr<<"doResends called!\n";
34801ab1
BH
2345 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
2346 resend.fd=-1;
2347 resend.id=*i;
e27e91a8 2348 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
4665c31e 2349
34801ab1
BH
2350 MT->sendEvent(resend, &content);
2351 g_stats.chainResends++;
34801ab1
BH
2352 }
2353}
2354
d187038c 2355static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2356{
600fc20b 2357 PacketID pid=any_cast<PacketID>(var);
a683e8bd 2358 ssize_t len;
e45beeda 2359 char data[g_outgoingEDNSBufsize];
996c89cc 2360 ComboAddress fromaddr;
09e6702a
BH
2361 socklen_t addrlen=sizeof(fromaddr);
2362
998a4334 2363 len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen);
c1da7976 2364
a683e8bd 2365 if(len < (ssize_t) sizeof(dnsheader)) {
998a4334 2366 if(len < 0)
996c89cc 2367 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
09e6702a 2368 else {
3ddb9247 2369 g_stats.serverParseError++;
09e6702a 2370 if(g_logCommonErrors)
85db02c5 2371 L<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
e44d9fa7 2372 ": packet smaller than DNS header"<<endl;
998a4334 2373 }
34801ab1 2374
49a699c4 2375 t_udpclientsocks->returnSocket(fd);
34801ab1
BH
2376 string empty;
2377
2378 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3ddb9247 2379 if(iter != MT->d_waiters.end())
34801ab1 2380 doResends(iter, pid, empty);
3ddb9247 2381
34801ab1 2382 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
998a4334 2383 return;
3ddb9247 2384 }
998a4334
BH
2385
2386 dnsheader dh;
2387 memcpy(&dh, data, sizeof(dh));
3ddb9247 2388
6da3b3ad
PD
2389 PacketID pident;
2390 pident.remote=fromaddr;
2391 pident.id=dh.id;
2392 pident.fd=fd;
34801ab1 2393
33a928af 2394 if(!dh.qr && g_logCommonErrors) {
854d44e3 2395 L<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
6da3b3ad
PD
2396 }
2397
2398 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
2399 !dh.qr) { // one weird server
2400 pident.domain.clear();
2401 pident.type = 0;
2402 }
2403 else {
2404 try {
0b31e67e 2405 if(len > 12)
2406 pident.domain=DNSName(data, len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
6da3b3ad
PD
2407 }
2408 catch(std::exception& e) {
2409 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
0b31e67e 2410 L<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
6da3b3ad 2411 return;
34801ab1 2412 }
6da3b3ad
PD
2413 }
2414 string packet;
2415 packet.assign(data, len);
34801ab1 2416
6da3b3ad
PD
2417 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
2418 if(iter != MT->d_waiters.end()) {
2419 doResends(iter, pident, packet);
2420 }
c1da7976 2421
6da3b3ad 2422retryWithName:
4957a608 2423
6da3b3ad
PD
2424 if(!MT->sendEvent(pident, &packet)) {
2425 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
2426 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
2427 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
e325f20c 2428 pident.domain == mthread->key.domain) {
6da3b3ad 2429 mthread->key.nearMisses++;
998a4334 2430 }
6da3b3ad
PD
2431
2432 // be a bit paranoid here since we're weakening our matching
3ddb9247 2433 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
6da3b3ad
PD
2434 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
2435 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
2436 pident.domain = mthread->key.domain;
2437 pident.type = mthread->key.type;
2438 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
d4fb76e9 2439 }
09e6702a 2440 }
6da3b3ad
PD
2441 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
2442 if(g_logCommonErrors) {
8a464ee3 2443 L<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
d8f6d49f 2444 }
09e6702a 2445 }
6da3b3ad
PD
2446 else if(fd >= 0) {
2447 t_udpclientsocks->returnSocket(fd);
2448 }
09e6702a
BH
2449}
2450
1f4abb20
BH
2451FDMultiplexer* getMultiplexer()
2452{
2453 FDMultiplexer* ret;
2454 for(FDMultiplexer::FDMultiplexermap_t::const_iterator i = FDMultiplexer::getMultiplexerMap().begin();
2455 i != FDMultiplexer::getMultiplexerMap().end(); ++i) {
2456 try {
2457 ret=i->second();
1f4abb20
BH
2458 return ret;
2459 }
98d0ee4a 2460 catch(FDMultiplexerException &fe) {
0a7f24cb 2461 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
98d0ee4a
BH
2462 }
2463 catch(...) {
2464 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
2465 }
1f4abb20
BH
2466 }
2467 L<<Logger::Error<<"No working multiplexer found!"<<endl;
2468 exit(1);
2469}
2470
3ddb9247 2471
d187038c 2472static string* doReloadLuaScript()
4485aa35 2473{
674cf0f6 2474 string fname= ::arg()["lua-dns-script"];
4485aa35 2475 try {
674cf0f6
BH
2476 if(fname.empty()) {
2477 t_pdl->reset();
2478 L<<Logger::Error<<t_id<<" Unloaded current lua script"<<endl;
0f39c1a3 2479 return new string("unloaded\n");
4485aa35
BH
2480 }
2481 else {
a3e7b735 2482 *t_pdl = shared_ptr<RecursorLua4>(new RecursorLua4(fname));
4485aa35
BH
2483 }
2484 }
fdbf35ac 2485 catch(std::exception& e) {
674cf0f6 2486 L<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
0f39c1a3 2487 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
4485aa35 2488 }
3ddb9247 2489
674cf0f6 2490 L<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
0f39c1a3 2491 return new string("(re)loaded '"+fname+"'\n");
4485aa35
BH
2492}
2493
49a699c4
BH
2494string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2495{
3ddb9247 2496 if(begin != end)
49a699c4 2497 ::arg().set("lua-dns-script") = *begin;
3ddb9247 2498
0f39c1a3 2499 return broadcastAccFunction<string>(doReloadLuaScript);
3ddb9247 2500}
49a699c4 2501
d187038c 2502static string* pleaseUseNewTraceRegex(const std::string& newRegex)
77499b05
BH
2503try
2504{
2505 if(newRegex.empty()) {
2506 t_traceRegex->reset();
2507 return new string("unset\n");
2508 }
2509 else {
2510 (*t_traceRegex) = shared_ptr<Regex>(new Regex(newRegex));
2511 return new string("ok\n");
2512 }
2513}
3f81d239 2514catch(PDNSException& ae)
77499b05
BH
2515{
2516 return new string(ae.reason+"\n");
2517}
2518
2519string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2520{
2521 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
2522}
2523
4e9a20e6 2524static void checkLinuxIPv6Limits()
2525{
2526#ifdef __linux__
2527 string line;
2528 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
335da0ba 2529 int lim=std::stoi(line);
4e9a20e6 2530 if(lim < 16384) {
36849ff2 2531 L<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
4e9a20e6 2532 }
2533 }
2534#endif
2535}
36849ff2 2536static void checkOrFixFDS()
4e9a20e6 2537{
c0063e60 2538 unsigned int availFDs=getFilenumLimit();
2539 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
2540
2541 if(wantFDs > availFDs) {
067ad20e 2542 unsigned int hardlimit= getFilenumLimit(true);
2543 if(hardlimit >= wantFDs) {
c0063e60 2544 setFilenumLimit(wantFDs);
2545 L<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
36849ff2 2546 }
2547 else {
067ad20e 2548 int newval = (hardlimit - 25) / g_numWorkerThreads;
2549 L<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
36849ff2 2550 g_maxMThreads = newval;
067ad20e 2551 setFilenumLimit(hardlimit);
36849ff2 2552 }
2553 }
4e9a20e6 2554}
77499b05 2555
d187038c 2556static void* recursorThread(void*);
51e2144e 2557
d187038c 2558static void* pleaseSupplantACLs(NetmaskGroup *ng)
49a699c4
BH
2559{
2560 t_allowFrom = ng;
3427fa8a 2561 return 0;
49a699c4
BH
2562}
2563
dbd23fc2
BH
2564int g_argc;
2565char** g_argv;
2566
18af64a8 2567void parseACLs()
f7c1d4e3 2568{
18af64a8 2569 static bool l_initialized;
3ddb9247 2570
49a699c4 2571 if(l_initialized) { // only reload configuration file on second call
18af64a8
BH
2572 string configname=::arg()["config-dir"]+"/recursor.conf";
2573 cleanSlashes(configname);
3ddb9247
PD
2574
2575 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
7e818521 2576 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
49a699c4 2577 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
242b90e1 2578 ::arg().preParseFile(configname.c_str(), "include-dir");
829849d6
AT
2579 ::arg().preParse(g_argc, g_argv, "include-dir");
2580
2581 // then process includes
2582 std::vector<std::string> extraConfigs;
242b90e1
AT
2583 ::arg().gatherIncludes(extraConfigs);
2584
1dc8f4d0 2585 for(const std::string& fn : extraConfigs) {
7e818521 2586 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
2587 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
2588 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
2589 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
829849d6 2590 }
ca2c884c
AT
2591
2592 ::arg().preParse(g_argc, g_argv, "allow-from-file");
2593 ::arg().preParse(g_argc, g_argv, "allow-from");
f27e6356 2594 }
49a699c4
BH
2595
2596 NetmaskGroup* oldAllowFrom = t_allowFrom, *allowFrom=new NetmaskGroup;
3ddb9247 2597
2c95fc65
BH
2598 if(!::arg()["allow-from-file"].empty()) {
2599 string line;
2c95fc65
BH
2600 ifstream ifs(::arg()["allow-from-file"].c_str());
2601 if(!ifs) {
3ddb9247 2602 delete allowFrom;
9c61b9d0 2603 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2c95fc65
BH
2604 }
2605
2606 string::size_type pos;
2607 while(getline(ifs,line)) {
2608 pos=line.find('#');
2609 if(pos!=string::npos)
2610 line.resize(pos);
2611 trim(line);
2612 if(line.empty())
2613 continue;
2614
18af64a8 2615 allowFrom->addMask(line);
2c95fc65 2616 }
49a699c4 2617 L<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2c95fc65
BH
2618 }
2619 else if(!::arg()["allow-from"].empty()) {
f7c1d4e3
BH
2620 vector<string> ips;
2621 stringtok(ips, ::arg()["allow-from"], ", ");
3ddb9247 2622
f7c1d4e3
BH
2623 L<<Logger::Warning<<"Only allowing queries from: ";
2624 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
18af64a8 2625 allowFrom->addMask(*i);
f7c1d4e3 2626 if(i!=ips.begin())
674cf0f6 2627 L<<Logger::Warning<<", ";
f7c1d4e3
BH
2628 L<<Logger::Warning<<*i;
2629 }
2630 L<<Logger::Warning<<endl;
2631 }
49a699c4 2632 else {
3ddb9247 2633 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
49a699c4
BH
2634 L<<Logger::Error<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
2635 delete allowFrom;
2636 allowFrom = 0;
2637 }
3ddb9247 2638
49a699c4 2639 g_initialAllowFrom = allowFrom;
d7dae798 2640 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
49a699c4 2641 delete oldAllowFrom;
3ddb9247 2642
49a699c4 2643 l_initialized = true;
18af64a8
BH
2644}
2645
795215f2 2646
756e82cf 2647static void setupDelegationOnly()
2648{
2649 vector<string> parts;
2650 stringtok(parts, ::arg()["delegation-only"], ", \t");
2651 for(const auto& p : parts) {
9ea28e46 2652 g_delegationOnly.insert(DNSName(p));
756e82cf 2653 }
2654}
795215f2 2655
d187038c 2656static int serviceMain(int argc, char*argv[])
18af64a8 2657{
5124de27 2658 L.setName(s_programname);
18af64a8 2659 L.setLoglevel((Logger::Urgency)(6)); // info and up
b6cfa948 2660 L.disableSyslog(::arg().mustDo("disable-syslog"));
18af64a8
BH
2661
2662 if(!::arg()["logging-facility"].empty()) {
f8499e52
BH
2663 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
2664 if(val >= 0)
2665 theL().setFacility(val);
18af64a8
BH
2666 else
2667 L<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
2668 }
2669
ba1a571d 2670 showProductVersion();
18af64a8 2671 seedRandom(::arg()["entropy-source"]);
3afde9b2 2672
06ea9015 2673 g_disthashseed=dns_random(0xffffffff);
2674
b7ef5828
PL
2675 checkLinuxIPv6Limits();
2676 try {
2677 vector<string> addrs;
2678 if(!::arg()["query-local-address6"].empty()) {
2679 SyncRes::s_doIPv6=true;
2680 L<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
2681
2682 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
2683 for(const string& addr : addrs) {
2684 g_localQueryAddresses6.push_back(ComboAddress(addr));
2685 }
2686 }
2687 else {
2688 L<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
2689 }
2690 addrs.clear();
2691 stringtok(addrs, ::arg()["query-local-address"], ", ;");
2692 for(const string& addr : addrs) {
2693 g_localQueryAddresses4.push_back(ComboAddress(addr));
2694 }
2695 }
2696 catch(std::exception& e) {
2697 L<<Logger::Error<<"Assigning local query addresses: "<<e.what();
2698 exit(99);
2699 }
2700
e48c6b8a
PL
2701 // keep this ABOVE loadRecursorLuaConfig!
2702 if(::arg()["dnssec"]=="off")
2703 g_dnssecmode=DNSSECMode::Off;
2704 else if(::arg()["dnssec"]=="process-no-validate")
2705 g_dnssecmode=DNSSECMode::ProcessNoValidate;
2706 else if(::arg()["dnssec"]=="process")
2707 g_dnssecmode=DNSSECMode::Process;
2708 else if(::arg()["dnssec"]=="validate")
2709 g_dnssecmode=DNSSECMode::ValidateAll;
2710 else if(::arg()["dnssec"]=="log-fail")
2711 g_dnssecmode=DNSSECMode::ValidateForLog;
2712 else {
2713 L<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
2714 exit(1);
2715 }
2716
2717 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
2718
0f5785a6
PL
2719 try {
2720 loadRecursorLuaConfig(::arg()["lua-config-file"], ::arg().mustDo("daemon"));
2721 }
2722 catch (PDNSException &e) {
2723 L<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
2724 exit(1);
2725 }
ad42489c 2726
18af64a8 2727 parseACLs();
92011b8f 2728 sortPublicSuffixList();
2729
eb5bae86
BH
2730 if(!::arg()["dont-query"].empty()) {
2731 g_dontQuery=new NetmaskGroup;
2732 vector<string> ips;
2733 stringtok(ips, ::arg()["dont-query"], ", ");
66e0b6ea
BH
2734 ips.push_back("0.0.0.0");
2735 ips.push_back("::");
c36bc97a 2736
eb5bae86
BH
2737 L<<Logger::Warning<<"Will not send queries to: ";
2738 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
2739 g_dontQuery->addMask(*i);
2740 if(i!=ips.begin())
4957a608 2741 L<<Logger::Warning<<", ";
eb5bae86
BH
2742 L<<Logger::Warning<<*i;
2743 }
2744 L<<Logger::Warning<<endl;
2745 }
2746
f7c1d4e3 2747 g_quiet=::arg().mustDo("quiet");
3ddb9247 2748
1bc3c142
BH
2749 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
2750 if(g_weDistributeQueries) {
2751 L<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
2752 }
3ddb9247 2753
756e82cf 2754 setupDelegationOnly();
b33c2462 2755 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
756e82cf 2756
77499b05
BH
2757 if(::arg()["trace"]=="fail") {
2758 SyncRes::setDefaultLogMode(SyncRes::Store);
2759 }
2760 else if(::arg().mustDo("trace")) {
2761 SyncRes::setDefaultLogMode(SyncRes::Log);
f7c1d4e3
BH
2762 ::arg().set("quiet")="no";
2763 g_quiet=false;
3e9c6c0a 2764 g_dnssecLOG=true;
f7c1d4e3 2765 }
3ddb9247 2766
aadceba8 2767 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
2768
1051f8a9
BH
2769 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
2770
f7c1d4e3 2771 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
63637fd8 2772 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
1051f8a9 2773 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
79ec0627
PL
2774 // Cap the packetcache-servfail-ttl to the packetcache-ttl
2775 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
2776 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
628e2c7b
PA
2777 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
2778 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
f7c1d4e3 2779 SyncRes::s_serverID=::arg()["server-id"];
173d790e 2780 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
9de3e034 2781 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
7c3398aa 2782 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
01402d56 2783 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
f7c1d4e3
BH
2784 if(SyncRes::s_serverID.empty()) {
2785 char tmp[128];
2786 gethostname(tmp, sizeof(tmp)-1);
2787 SyncRes::s_serverID=tmp;
2788 }
3ddb9247 2789
5b0ddd18 2790 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
bb4bdbaf 2791
49a699c4 2792 g_initialDomainMap = parseAuthAndForwards();
3ddb9247 2793
08f3f638 2794 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3ddb9247 2795
f7c1d4e3 2796 g_logCommonErrors=::arg().mustDo("log-common-errors");
e661a20b
PD
2797
2798 g_anyToTcp = ::arg().mustDo("any-to-tcp");
a09a8ce0
PD
2799 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
2800
b3adda56
PD
2801 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
2802
810ff705
RG
2803 g_numWorkerThreads = ::arg().asNum("threads");
2804 g_numThreads = g_numWorkerThreads + g_weDistributeQueries;
2805 g_maxMThreads = ::arg().asNum("max-mthreads");
2806
2807#ifdef SO_REUSEPORT
2808 g_reusePort = ::arg().mustDo("reuseport");
2809#endif
2810
2811 g_useOneSocketPerThread = (!g_weDistributeQueries && g_reusePort);
2812
2813 if (g_useOneSocketPerThread) {
2814 for (unsigned int threadId = 0; threadId < g_numWorkerThreads; threadId++) {
2815 makeUDPServerSockets(threadId);
2816 makeTCPServerSockets(threadId);
2817 }
2818 }
2819 else {
2820 makeUDPServerSockets(0);
2821 makeTCPServerSockets(0);
2822 }
815099b2 2823
376effcf 2824 parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
b40562da 2825 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
376effcf 2826
677e2a46
BH
2827 int forks;
2828 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
1bc3c142
BH
2829 if(!fork()) // we are child
2830 break;
2831 }
3ddb9247 2832
f7c1d4e3
BH
2833 if(::arg().mustDo("daemon")) {
2834 L<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
2835 L.toConsole(Logger::Critical);
f7c1d4e3 2836 daemonize();
a4241908 2837 loadRecursorLuaConfig(::arg()["lua-config-file"], false);
f7c1d4e3
BH
2838 }
2839 signal(SIGUSR1,usr1Handler);
2840 signal(SIGUSR2,usr2Handler);
2841 signal(SIGPIPE,SIG_IGN);
810ff705 2842
a6414fdc 2843 checkOrFixFDS();
3ddb9247 2844
d1b28475
KM
2845#ifdef HAVE_LIBSODIUM
2846 if (sodium_init() == -1) {
2847 L<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
2848 exit(99);
2849 }
2850#endif
2851
3afde9b2
PL
2852 openssl_thread_setup();
2853 openssl_seed();
2854
138435cb
BH
2855 int newgid=0;
2856 if(!::arg()["setgid"].empty())
2857 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
2858 int newuid=0;
2859 if(!::arg()["setuid"].empty())
2860 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
2861
f1d6a7ce
KM
2862 Utility::dropGroupPrivs(newuid, newgid);
2863
138435cb 2864 if (!::arg()["chroot"].empty()) {
75336810
PL
2865#ifdef HAVE_SYSTEMD
2866 char *ns;
2867 ns = getenv("NOTIFY_SOCKET");
2868 if (ns != nullptr) {
2869 L<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
2870 exit(1);
2871 }
2872#endif
138435cb
BH
2873 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
2874 L<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
2875 exit(1);
2876 }
f0f3f0b0
PL
2877 else
2878 L<<Logger::Error<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
138435cb
BH
2879 }
2880
f0f3f0b0
PL
2881 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
2882 if(!s_pidfname.empty())
2883 unlink(s_pidfname.c_str()); // remove possible old pid file
2884 writePid();
2885
2886 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
2887
f1d6a7ce 2888 Utility::dropUserPrivs(newuid);
c0063e60 2889
49a699c4 2890 makeThreadPipes();
3ddb9247 2891
5d4dd7fe
BH
2892 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
2893 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
fde296a3 2894 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
343257a4 2895
d705aad9
RG
2896 if (::arg().mustDo("snmp-agent")) {
2897 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
2898 g_snmpAgent->run();
2899 }
2900
c3828c03 2901 if(g_numThreads == 1) {
76698c6e 2902 L<<Logger::Warning<<"Operating unthreaded"<<endl;
6b6720de
PL
2903#ifdef HAVE_SYSTEMD
2904 sd_notify(0, "READY=1");
2905#endif
76698c6e
BH
2906 recursorThread(0);
2907 }
2908 else {
2909 pthread_t tid;
c3828c03
BH
2910 L<<Logger::Warning<<"Launching "<< g_numThreads <<" threads"<<endl;
2911 for(unsigned int n=0; n < g_numThreads; ++n) {
77499b05 2912 pthread_create(&tid, 0, recursorThread, (void*)(long)n);
76698c6e
BH
2913 }
2914 void* res;
6b6720de
PL
2915#ifdef HAVE_SYSTEMD
2916 sd_notify(0, "READY=1");
2917#endif
76698c6e 2918 pthread_join(tid, &res);
bb4bdbaf 2919 }
bb4bdbaf
BH
2920 return 0;
2921}
2922
d187038c 2923static void* recursorThread(void* ptr)
bb4bdbaf
BH
2924try
2925{
2e2cd8ec 2926 t_id=(int) (long) ptr;
49a699c4 2927 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
ac0e821b 2928 t_sstorage->domainmap = g_initialDomainMap;
49a699c4
BH
2929 t_allowFrom = g_initialAllowFrom;
2930 t_udpclientsocks = new UDPClientSocks();
bd0289fc 2931 t_tcpClientCounts = new tcpClientCounts_t();
49a699c4 2932 primeHints();
3ddb9247 2933
49a699c4 2934 t_packetCache = new RecursorPacketCache();
3ddb9247 2935
aa7929a3
RG
2936#ifdef HAVE_PROTOBUF
2937 t_uuidGenerator = new boost::uuids::random_generator();
2938#endif
49a699c4 2939 L<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3ddb9247 2940
a3e7b735 2941 t_pdl = new shared_ptr<RecursorLua4>();
3ddb9247 2942
674cf0f6
BH
2943 try {
2944 if(!::arg()["lua-dns-script"].empty()) {
a3e7b735 2945 *t_pdl = shared_ptr<RecursorLua4>(new RecursorLua4(::arg()["lua-dns-script"]));
674cf0f6
BH
2946 L<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
2947 }
674cf0f6
BH
2948 }
2949 catch(std::exception &e) {
2950 L<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
62f0ae62 2951 _exit(99);
674cf0f6 2952 }
3ddb9247 2953
77499b05 2954 t_traceRegex = new shared_ptr<Regex>();
f8f243b0 2955 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
92011b8f 2956 if(ringsize) {
60c8afa8 2957 t_remotes = new addrringbuf_t();
f8f243b0 2958 if(g_weDistributeQueries) // if so, only 1 thread does recvfrom
3ddb9247 2959 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries"));
f8f243b0 2960 else
3ddb9247 2961 t_remotes->set_capacity(ringsize);
60c8afa8 2962 t_servfailremotes = new addrringbuf_t();
3ddb9247 2963 t_servfailremotes->set_capacity(ringsize);
60c8afa8 2964 t_largeanswerremotes = new addrringbuf_t();
3ddb9247 2965 t_largeanswerremotes->set_capacity(ringsize);
92011b8f 2966
c5c066bf 2967 t_queryring = new boost::circular_buffer<pair<DNSName, uint16_t> >();
3ddb9247 2968 t_queryring->set_capacity(ringsize);
c5c066bf 2969 t_servfailqueryring = new boost::circular_buffer<pair<DNSName, uint16_t> >();
3ddb9247 2970 t_servfailqueryring->set_capacity(ringsize);
92011b8f 2971 }
3ddb9247 2972
bb4bdbaf 2973 MT=new MTasker<PacketID,string>(::arg().asNum("stack-size"));
3ddb9247 2974
bb4bdbaf
BH
2975 PacketID pident;
2976
2977 t_fdm=getMultiplexer();
f3d1d67b 2978 if(!t_id) {
d07bf7ff 2979 if(::arg().mustDo("webserver")) {
30a1aa92 2980 L<<Logger::Warning << "Enabling web server" << endl;
8989097d 2981 try {
1ce57618 2982 new RecursorWebServer(t_fdm);
8989097d
CH
2983 }
2984 catch(PDNSException &e) {
2985 L<<Logger::Error<<"Exception: "<<e.reason<<endl;
2986 exit(99);
2987 }
f3d1d67b 2988 }
83252304 2989 L<<Logger::Error<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
f3d1d67b 2990 }
83252304 2991
49a699c4 2992 t_fdm->addReadFD(g_pipes[t_id].readToThread, handlePipeRequest);
83252304 2993
810ff705
RG
2994 if(g_useOneSocketPerThread) {
2995 for (unsigned int threadId = 0; threadId < g_numWorkerThreads; threadId++) {
2996 for(deferredAdd_t::const_iterator i = deferredAdds[threadId].begin(); i != deferredAdds[threadId].end(); ++i) {
2997 t_fdm->addReadFD(i->first, i->second);
2998 }
2999 }
3000 }
3001 else {
3002 if(!g_weDistributeQueries || !t_id) { // if we distribute queries, only t_id = 0 listens
3003 for(deferredAdd_t::const_iterator i = deferredAdds[0].begin(); i != deferredAdds[0].end(); ++i) {
3004 t_fdm->addReadFD(i->first, i->second);
3005 }
3006 }
3007 }
3ddb9247 3008
674cf0f6 3009 if(!t_id) {
674cf0f6
BH
3010 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
3011 }
1bc3c142 3012
f7c1d4e3 3013 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3ddb9247 3014
f7c1d4e3 3015 bool listenOnTCP(true);
49a699c4 3016
2c78bd57 3017 time_t last_carbon=0;
3018 time_t carbonInterval=::arg().asNum("carbon-interval");
ac0995bb 3019 counter.store(0); // used to periodically execute certain tasks
f7c1d4e3 3020 for(;;) {
ac0e821b 3021 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3ddb9247 3022
3427fa8a
BH
3023 if(!(counter%500)) {
3024 MT->makeThread(houseKeeping, 0);
f7c1d4e3
BH
3025 }
3026
d2392145 3027 if(!(counter%55)) {
d8f6d49f 3028 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
bb4bdbaf 3029 expired_t expired=t_fdm->getTimeouts(g_now);
3ddb9247 3030
f7c1d4e3 3031 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
cd989c87 3032 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4957a608 3033 if(g_logCommonErrors)
cd989c87 3034 L<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toString() <<endl;
4957a608 3035 t_fdm->removeReadFD(i->first);
f7c1d4e3
BH
3036 }
3037 }
3ddb9247 3038
f7c1d4e3
BH
3039 counter++;
3040
3427fa8a 3041 if(!t_id && statsWanted) {
f7c1d4e3
BH
3042 doStats();
3043 }
3044
3045 Utility::gettimeofday(&g_now, 0);
2c78bd57 3046
3047 if(!t_id && (g_now.tv_sec - last_carbon >= carbonInterval)) {
3048 MT->makeThread(doCarbonDump, 0);
3049 last_carbon = g_now.tv_sec;
3050 }
3051
bb4bdbaf 3052 t_fdm->run(&g_now);
3ea54bf0 3053 // 'run' updates g_now for us
f7c1d4e3 3054
b8ef5c5c 3055 if(!g_weDistributeQueries || !t_id) { // if pdns distributes queries, only tid 0 should do this
5c889cf5 3056 if(listenOnTCP) {
3057 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
3058 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
3059 t_fdm->removeReadFD(*i);
3060 listenOnTCP=false;
3061 }
f7c1d4e3 3062 }
5c889cf5 3063 else {
3064 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
3065 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
3066 t_fdm->addReadFD(*i, handleNewTCPQuestion);
3067 listenOnTCP=true;
3068 }
f7c1d4e3
BH
3069 }
3070 }
3071 }
3072}
3f81d239 3073catch(PDNSException &ae) {
bb4bdbaf
BH
3074 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
3075 return 0;
3076}
3077catch(std::exception &e) {
3078 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
3079 return 0;
3080}
3081catch(...) {
3082 L<<Logger::Error<<"any other exception in main: "<<endl;
3083 return 0;
3084}
3085
51e2144e 3086
3ddb9247 3087int main(int argc, char **argv)
288f4aa9 3088{
dbd23fc2
BH
3089 g_argc = argc;
3090 g_argv = argv;
5e3de507 3091 g_stats.startupTime=time(0);
3e135495 3092 versionSetProduct(ProductRecursor);
8a63d3ce 3093 reportBasicTypes();
0007c2e5 3094 reportOtherTypes();
ea634573 3095
22030c37 3096 int ret = EXIT_SUCCESS;
caa6eefa 3097
288f4aa9 3098 try {
f888311c 3099 ::arg().set("stack-size","stack size per mthread")="200000";
2e3d8a19 3100 ::arg().set("soa-minimum-ttl","Don't change")="0";
2e3d8a19 3101 ::arg().set("no-shuffle","Don't change")="off";
2e3d8a19 3102 ::arg().set("local-port","port to listen on")="53";
32252594 3103 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
fec7dd5a 3104 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
77499b05 3105 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
a6415142 3106 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
c87e1876 3107 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
d3f809bf 3108 ::arg().set("daemon","Operate as a daemon")="no";
191f2e47 3109 ::arg().setSwitch("write-pid","Write a PID file")="yes";
34162f8f 3110 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="4";
b6cfa948 3111 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
22e0810c 3112 ::arg().set("log-common-errors","If we should log rather common errors")="no";
2e3d8a19
BH
3113 ::arg().set("chroot","switch to chroot jail")="";
3114 ::arg().set("setgid","If set, change group id to this gid for more security")="";
3115 ::arg().set("setuid","If set, change user id to this uid for more security")="";
c83ee49d 3116 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
bb4bdbaf 3117 ::arg().set("threads", "Launch this number of threads")="2";
adabfcb9 3118 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
5124de27 3119 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
d07bf7ff 3120 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
479e0976
CH
3121 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
3122 ::arg().set("api-logfile", "Location of the server logfile (used by the REST API)") = "/var/log/pdns.log";
3123 ::arg().set("api-readonly", "Disallow data modification through the REST API when set") = "no";
3124 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
d07bf7ff
PL
3125 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
3126 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
3127 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
69e7f117 3128 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="0.0.0.0/0,::/0";
cc08b5a9 3129 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
e12f8407 3130 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
2c78bd57 3131 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
c038218b 3132 ::arg().set("quiet","Suppress logging of questions and answers")="";
f27e6356 3133 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
2e3d8a19 3134 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
fdbf35ac
BH
3135 ::arg().set("socket-owner","Owner of socket")="";
3136 ::arg().set("socket-group","Group of socket")="";
3137 ::arg().set("socket-mode", "Permissions for socket")="";
3ddb9247 3138
f0f3f0b0 3139 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
2e3d8a19
BH
3140 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
3141 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
d4fb76e9 3142 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
2e3d8a19 3143 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
85c32340 3144 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
2e3d8a19 3145 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
324dc148 3146 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
979edd70 3147 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
2e3d8a19 3148 ::arg().set("hint-file", "If set, load root hints from this file")="";
b45eb27c 3149 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
a9af3782 3150 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
c3e753c7 3151 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
1051f8a9 3152 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
927c12b0 3153 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
1051f8a9 3154 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
7f7b8d55 3155 ::arg().set("server-id", "Returned when queried for 'server.id' TXT or NSID, defaults to hostname")="";
92011b8f 3156 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
ba1a571d 3157 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
49a699c4 3158 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
2c95fc65 3159 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
51e2144e 3160 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3ddb9247 3161 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4e120339 3162 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
fde296a3 3163 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
0d5f0a9f 3164 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4ef015cd 3165 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
5605c067 3166 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3e61e7f7 3167 ::arg().set("lua-config-file", "More powerful configuration options")="";
644dd1da 3168
5605c067 3169 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
927c12b0
BH
3170 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
3171 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
5605c067 3172 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
ac0b4eb3 3173 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3ea54bf0 3174 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
e498dac1 3175 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
4485aa35 3176 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
08f3f638 3177 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3ddb9247 3178 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
35695d18 3179 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
3180 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
3f975863 3181 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
b40562da 3182 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="";
e498dac1 3183 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
4ca2d205 3184 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
e661a20b 3185 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
b3adda56 3186 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
a09a8ce0 3187 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1680";
b33c2462 3188 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1680";
aadceba8 3189 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
173d790e 3190 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
c5950146 3191 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
7c3398aa 3192 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
a09a8ce0 3193
68e6df3c 3194 ::arg().set("include-dir","Include *.conf files from this directory")="";
d67620e4 3195 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
2332f42d 3196
3197 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
2e3d8a19 3198
d705aad9
RG
3199 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
3200 ::arg().setSwitch("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
3201
0735b17e
RG
3202 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
3203
2e3d8a19 3204 ::arg().setCmd("help","Provide a helpful message");
ba1a571d 3205 ::arg().setCmd("version","Print version string");
d5141417 3206 ::arg().setCmd("config","Output blank configuration");
f27e6356 3207 L.toConsole(Logger::Info);
2e3d8a19 3208 ::arg().laxParse(argc,argv); // do a lax parse
c75a6a9e 3209
2d733c0f
CH
3210 string configname=::arg()["config-dir"]+"/recursor.conf";
3211 if(::arg()["config-name"]!="") {
3212 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
5124de27 3213 s_programname+="-"+::arg()["config-name"];
2d733c0f
CH
3214 }
3215 cleanSlashes(configname);
5124de27 3216
577cf284
BH
3217 if(::arg().mustDo("config")) {
3218 cout<<::arg().configstring()<<endl;
3219 exit(0);
3220 }
3221
3ddb9247 3222 if(!::arg().file(configname.c_str()))
c75a6a9e
BH
3223 L<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
3224
2e3d8a19 3225 ::arg().parse(argc,argv);
c836dc19 3226
f0f3f0b0
PL
3227 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() && !::arg().mustDo("api-readonly") ) {
3228 L<<Logger::Error<<"Using chroot and a writable API is not possible"<<endl;
3229 exit(EXIT_FAILURE);
3230 }
3231
3232 if (::arg()["socket-dir"].empty()) {
3233 if (::arg()["chroot"].empty())
3234 ::arg().set("socket-dir") = LOCALSTATEDIR;
3235 else
3236 ::arg().set("socket-dir") = "/";
3237 }
3238
2e3d8a19 3239 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
562588a3 3240
61d74169 3241 if(::arg().asNum("threads")==1)
3242 ::arg().set("pdns-distributes-queries")="no";
3243
2e3d8a19 3244 if(::arg().mustDo("help")) {
ff5ba4f9
WA
3245 cout<<"syntax:"<<endl<<endl;
3246 cout<<::arg().helpstring(::arg()["help"])<<endl;
3247 exit(0);
b636533b 3248 }
5e3de507 3249 if(::arg().mustDo("version")) {
ba1a571d 3250 showProductVersion();
3613a51c 3251 showBuildConfiguration();
67076869 3252 exit(0);
5e3de507 3253 }
b636533b 3254
34162f8f 3255 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
f48d7b65 3256
34162f8f
CH
3257 if (logUrgency < Logger::Error)
3258 logUrgency = Logger::Error;
f48d7b65 3259 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
3260 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
3261 }
34162f8f
CH
3262 L.setLoglevel(logUrgency);
3263 L.toConsole(logUrgency);
3264
f7c1d4e3 3265 serviceMain(argc, argv);
288f4aa9 3266 }
3f81d239 3267 catch(PDNSException &ae) {
c836dc19 3268 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
22030c37 3269 ret=EXIT_FAILURE;
288f4aa9 3270 }
fdbf35ac 3271 catch(std::exception &e) {
c836dc19 3272 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
22030c37 3273 ret=EXIT_FAILURE;
288f4aa9
BH
3274 }
3275 catch(...) {
c836dc19 3276 L<<Logger::Error<<"any other exception in main: "<<endl;
22030c37 3277 ret=EXIT_FAILURE;
288f4aa9 3278 }
3ddb9247 3279
22030c37 3280 return ret;
288f4aa9 3281}
7836f7b4
PL
3282
3283int getRootNS(void) {
3284 SyncRes sr(g_now);
3285 sr.setDoEDNS0(true);
3286 sr.setNoCache();
3287 sr.d_doDNSSEC = (g_dnssecmode != DNSSECMode::Off);
3288
3289 vector<DNSRecord> ret;
3290 int res=-1;
3291 try {
3292 res=sr.beginResolve(g_rootdnsname, QType(QType::NS), 1, ret);
3293 if (g_dnssecmode != DNSSECMode::Off && g_dnssecmode != DNSSECMode::ProcessNoValidate) {
4898a348
RG
3294 ResolveContext ctx;
3295 auto state = validateRecords(ctx, ret);
7836f7b4
PL
3296 if (state == Bogus)
3297 throw PDNSException("Got Bogus validation result for .|NS");
3298 }
3299 return res;
3300 }
3301 catch(PDNSException& e)
3302 {
3303 L<<Logger::Error<<"Failed to update . records, got an exception: "<<e.reason<<endl;
3304 }
3305
3306 catch(std::exception& e)
3307 {
3308 L<<Logger::Error<<"Failed to update . records, got an exception: "<<e.what()<<endl;
3309 }
3310
3311 catch(...)
3312 {
3313 L<<Logger::Error<<"Failed to update . records, got an exception"<<endl;
3314 }
3315 if(!res) {
3316 L<<Logger::Notice<<"Refreshed . records"<<endl;
3317 }
3318 else
3319 L<<Logger::Error<<"Failed to update . records, RCODE="<<res<<endl;
3320 return res;
3321}