]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/pdns_recursor.cc
recursor: Add NSID support
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
CommitLineData
288f4aa9 1/*
6edbf68a
PL
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
870a0fe4
AT
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
3e61e7f7 25
76473b92
KM
26#include <netdb.h>
27#include <sys/stat.h>
28#include <unistd.h>
f097141b 29#ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
b47026fd 30#include <boost/container/flat_set.hpp>
f097141b 31#endif
2470b36e 32#include "ws-recursor.hh"
49a699c4 33#include <pthread.h>
3ea54bf0 34#include "recpacketcache.hh"
3ddb9247 35#include "utility.hh"
51e2144e 36#include "dns_random.hh"
d1b28475
KM
37#ifdef HAVE_LIBSODIUM
38#include <sodium.h>
39#endif
3afde9b2 40#include "opensslsigners.hh"
288f4aa9
BH
41#include <iostream>
42#include <errno.h>
81859ba5 43#include <boost/static_assert.hpp>
288f4aa9
BH
44#include <map>
45#include <set>
97bb160b 46#include "recursor_cache.hh"
38c9ceaa 47#include "cachecleaner.hh"
288f4aa9 48#include <stdio.h>
c75a6a9e 49#include <signal.h>
288f4aa9 50#include <stdlib.h>
bb4bdbaf 51#include "misc.hh"
288f4aa9
BH
52#include "mtasker.hh"
53#include <utility>
288f4aa9
BH
54#include "arguments.hh"
55#include "syncres.hh"
88def049
BH
56#include <fcntl.h>
57#include <fstream>
3e61e7f7 58#include "sortlist.hh"
5c633640
BH
59#include "sstuff.hh"
60#include <boost/tuple/tuple.hpp>
61#include <boost/tuple/tuple_comparison.hpp>
72df400f 62#include <boost/shared_array.hpp>
7f1fa77d 63#include <boost/function.hpp>
5605c067 64#include <boost/algorithm/string.hpp>
8f7473d7 65#ifdef MALLOC_TRACE
66#include "malloctrace.hh"
67#endif
40a3dd64 68#include <netinet/tcp.h>
ea634573
BH
69#include "dnsparser.hh"
70#include "dnswriter.hh"
71#include "dnsrecords.hh"
f814d7c8 72#include "zoneparser-tng.hh"
1d5b3ce6 73#include "rec_channel.hh"
aaacf7f2 74#include "logger.hh"
c8ddb7c2 75#include "iputils.hh"
09e6702a 76#include "mplexer.hh"
c038218b 77#include "config.h"
808c5ef7 78#include "lua-recursor4.hh"
ba1a571d 79#include "version.hh"
79332bff 80#include "responsestats.hh"
d67620e4 81#include "secpoll-recursor.hh"
c5c066bf 82#include "dnsname.hh"
644dd1da 83#include "filterpo.hh"
84#include "rpzloader.hh"
b3f0ed10 85#include "validate-recursor.hh"
f3c18728 86#include "rec-lua-conf.hh"
5c3b5e7f 87#include "ednsoptions.hh"
85c7ca75 88#include "gettime.hh"
f3c18728 89
d9d3f9c1 90#include "rec-protobuf.hh"
d705aad9 91#include "rec-snmp.hh"
aa7929a3 92
6b6720de
PL
93#ifdef HAVE_SYSTEMD
94#include <systemd/sd-daemon.h>
95#endif
96
d187038c
RG
97#include "namespaces.hh"
98
5cc8371b
RG
99#include "xpf.hh"
100
d187038c
RG
101typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
102
f26bf547 103static thread_local std::shared_ptr<RecursorLua4> t_pdl;
d77abca1 104static thread_local int t_id;
f26bf547
RG
105static thread_local std::shared_ptr<Regex> t_traceRegex;
106static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
63341e8d
RG
107#ifdef HAVE_PROTOBUF
108static thread_local std::shared_ptr<RemoteLogger> t_protobufServer{nullptr};
109static thread_local std::shared_ptr<RemoteLogger> t_outgoingProtobufServer{nullptr};
110#endif /* HAVE_PROTOBUF */
f26bf547
RG
111
112thread_local std::unique_ptr<MT_t> MT; // the big MTasker
113thread_local std::unique_ptr<MemRecursorCache> t_RC;
114thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
3337c2f7 115thread_local FDMultiplexer* t_fdm{nullptr};
f26bf547
RG
116thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes;
117thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring;
118thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
aa7929a3 119#ifdef HAVE_PROTOBUF
f26bf547 120thread_local std::unique_ptr<boost::uuids::random_generator> t_uuidGenerator;
aa7929a3 121#endif
d187038c 122__thread struct timeval g_now; // timestamp, updated (too) frequently
d7dae798
BH
123
124// for communicating with our threads
49a699c4
BH
125struct ThreadPipeSet
126{
127 int writeToThread;
128 int readToThread;
129 int writeFromThread;
130 int readFromThread;
cf8cda18
RG
131 int writeQueriesToThread; // this one is non-blocking
132 int readQueriesToThread;
49a699c4 133};
810ff705 134
387b9ca6 135/* the TID of the thread handling the web server, carbon, statistics and the control channel */
faf580f5 136static const int s_handlerThreadID = -1;
387b9ca6
RG
137/* when pdns-distributes-queries is set, the TID of the thread handling, hashing and distributing new queries
138 to the other threads */
faf580f5
RG
139static const int s_distributorThreadID = 0;
140
d187038c
RG
141typedef vector<int> tcpListenSockets_t;
142typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
143typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
3ea54bf0 144
d187038c
RG
145static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
146static vector<ThreadPipeSet> g_pipes; // effectively readonly after startup
147static tcpListenSockets_t g_tcpListenSockets; // shared across threads, but this is fine, never written to from a thread. All threads listen on all sockets
148static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
810ff705 149static std::unordered_map<unsigned int, deferredAdd_t> deferredAdds;
d187038c
RG
150static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
151static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
152static AtomicCounter counter;
9065eb05 153static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
f26bf547 154static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
5cc8371b 155static NetmaskGroup g_XPFAcl;
d187038c 156static size_t g_tcpMaxQueriesPerConn;
a5886e6a 157static size_t s_maxUDPQueriesPerRound;
d187038c
RG
158static uint64_t g_latencyStatSize;
159static uint32_t g_disthashseed;
160static unsigned int g_maxTCPPerClient;
161static unsigned int g_networkTimeoutMsec;
162static unsigned int g_maxMThreads;
163static unsigned int g_numWorkerThreads;
164static int g_tcpTimeout;
165static uint16_t g_udpTruncationThreshold;
59cb4a79 166static uint16_t g_xpfRRCode{0};
d187038c
RG
167static std::atomic<bool> statsWanted;
168static std::atomic<bool> g_quiet;
169static bool g_logCommonErrors;
170static bool g_anyToTcp;
d187038c 171static bool g_weDistributeQueries; // if true, only 1 thread listens on the incoming query sockets
810ff705
RG
172static bool g_reusePort{false};
173static bool g_useOneSocketPerThread;
00b8cadc 174static bool g_gettagNeedsEDNSOptions{false};
0ec489bf 175static time_t g_statisticsInterval;
9065eb05 176static bool g_useIncomingECS;
a6f7f5fe 177std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
f097141b 178#ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
bf6f28ca 179static boost::container::flat_set<uint16_t> s_avoidUdpSourcePorts;
f097141b
CHB
180#else
181static std::set<uint16_t> s_avoidUdpSourcePorts;
182#endif
bf6f28ca
CHB
183static uint16_t s_minUdpSourcePort;
184static uint16_t s_maxUdpSourcePort;
49a699c4 185
d187038c
RG
186RecursorControlChannel s_rcc; // only active in thread 0
187RecursorStats g_stats;
2d733c0f 188string s_programname="pdns_recursor";
d187038c 189string s_pidfname;
c1c29961 190bool g_lowercaseOutgoing;
d187038c
RG
191unsigned int g_numThreads;
192uint16_t g_outgoingEDNSBufsize;
98d36505 193bool g_logRPZChanges{false};
c3828c03 194
12cd44ee 195#define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
2fe3354d 196#define LOCAL_NETS_INVERSE "!127.0.0.0/8, !10.0.0.0/8, !100.64.0.0/10, !169.254.0.0/16, !192.168.0.0/16, !172.16.0.0/12, !::1/128, !fc00::/7, !fe80::/10"
12cd44ee 197// Bad Nets taken from both:
3ddb9247 198// http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
12cd44ee 199// and
200// http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
201// where such a network may not be considered a valid destination
202#define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
203#define DONT_QUERY LOCAL_NETS ", " BAD_NETS
49a699c4 204
d7dae798 205//! used to send information to a newborn mthread
ea634573 206struct DNSComboWriter {
5164bac3 207 DNSComboWriter(const char* data, uint16_t len, const struct timeval& now): d_mdp(true, data, len), d_now(now)
ea634573 208 {}
5cc8371b 209
5164bac3
RG
210 DNSComboWriter(const std::string& query, const struct timeval& now, std::vector<std::string>&& policyTags, LuaContext::LuaObject&& data): d_mdp(true, query.c_str(), query.size()), d_now(now), d_policyTags(std::move(policyTags)), d_data(std::move(data))
211 {
212 }
213
5cc8371b
RG
214 void setRemote(const ComboAddress& sa)
215 {
216 d_remote=sa;
217 }
218
219 void setSource(const ComboAddress& sa)
ea634573 220 {
5cc8371b 221 d_source=sa;
ea634573
BH
222 }
223
b71b60ee 224 void setLocal(const ComboAddress& sa)
225 {
226 d_local=sa;
227 }
228
5cc8371b
RG
229 void setDestination(const ComboAddress& sa)
230 {
231 d_destination=sa;
232 }
b71b60ee 233
ea634573
BH
234 void setSocket(int sock)
235 {
236 d_socket=sock;
237 }
a1754c6a
BH
238
239 string getRemote() const
240 {
5cc8371b
RG
241 if (d_source == d_remote) {
242 return d_source.toStringWithPort();
243 }
244 return d_source.toStringWithPort() + " (proxied by " + d_remote.toStringWithPort() + ")";
a1754c6a
BH
245 }
246
5cc8371b 247 MOADNSParser d_mdp;
c9e9e5e0 248 struct timeval d_now;
5cc8371b
RG
249 /* Remote client, might differ from d_source
250 in case of XPF, in which case d_source holds
251 the IP of the client and d_remote of the proxy
252 */
253 ComboAddress d_remote;
254 ComboAddress d_source;
255 /* Destination address, might differ from
256 d_destination in case of XPF, in which case
257 d_destination holds the IP of the proxy and
258 d_local holds our own. */
259 ComboAddress d_local;
260 ComboAddress d_destination;
aa7929a3
RG
261#ifdef HAVE_PROTOBUF
262 boost::uuids::uuid d_uuid;
67e31ebe 263 string d_requestorId;
590388d2 264 string d_deviceId;
aa7929a3 265#endif
5164bac3
RG
266 std::vector<std::string> d_policyTags;
267 LuaContext::LuaObject d_data;
b40562da 268 EDNSSubnetOpts d_ednssubnet;
5164bac3 269 shared_ptr<TCPConnection> d_tcpConnection;
ea634573 270 int d_socket;
b673817a 271 unsigned int d_tag{0};
e9f63d47 272 uint32_t d_qhash{0};
70fb28d9
RG
273 uint32_t d_ttlCap{std::numeric_limits<uint32_t>::max()};
274 bool d_variable{false};
5164bac3
RG
275 bool d_ecsFound{false};
276 bool d_ecsParsed{false};
277 bool d_tcp;
ea634573
BH
278};
279
06857845
RG
280MT_t* getMT()
281{
282 return MT ? MT.get() : nullptr;
283}
ea634573 284
288f4aa9
BH
285ArgvMap &arg()
286{
287 static ArgvMap theArg;
288 return theArg;
289}
4ef015cd 290
b4015453
RG
291unsigned int getRecursorThreadId()
292{
d77abca1 293 return static_cast<unsigned int>(t_id);
b4015453 294}
09e6702a 295
30ee601a
RG
296int getMTaskerTID()
297{
298 return MT->getTid();
299}
300
d187038c 301static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 302
50c81227 303// -1 is error, 0 is timeout, 1 is success
3ddb9247 304int asendtcp(const string& data, Socket* sock)
5c633640
BH
305{
306 PacketID pident;
307 pident.sock=sock;
308 pident.outMSG=data;
3ddb9247 309
bb4bdbaf 310 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
50c81227 311 string packet;
5c633640 312
5b0ddd18 313 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
23db0a09 314
9170fbaf 315 if(!ret || ret==-1) { // timeout
bb4bdbaf 316 t_fdm->removeWriteFD(sock->getHandle());
5c633640 317 }
50c81227
BH
318 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
319 return -1;
320 }
9170fbaf 321 return ret;
5c633640
BH
322}
323
d187038c 324static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
09e6702a 325
9170fbaf 326// -1 is error, 0 is timeout, 1 is success
a683e8bd 327int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
288f4aa9 328{
50c81227 329 data.clear();
5c633640
BH
330 PacketID pident;
331 pident.sock=sock;
332 pident.inNeeded=len;
825fa717 333 pident.inIncompleteOkay=incompleteOkay;
bb4bdbaf 334 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
5c633640 335
bb4bdbaf 336 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
9170fbaf 337 if(!ret || ret==-1) { // timeout
bb4bdbaf 338 t_fdm->removeReadFD(sock->getHandle());
288f4aa9 339 }
50c81227
BH
340 else if(data.empty()) {// error, EOF or other
341 return -1;
342 }
343
9170fbaf 344 return ret;
288f4aa9
BH
345}
346
d187038c 347static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
4465e941 348{
fba1e944 349 PacketID pident=*any_cast<PacketID>(&var);
4465e941 350 char resp[512];
7c77ce63
RG
351 ComboAddress fromaddr;
352 socklen_t addrlen=sizeof(fromaddr);
353
354 ssize_t ret=recvfrom(fd, resp, sizeof(resp), 0, (sockaddr *)&fromaddr, &addrlen);
355 if (fromaddr != pident.remote) {
e6a9dde5 356 g_log<<Logger::Notice<<"Response received from the wrong remote host ("<<fromaddr.toStringWithPort()<<" instead of "<<pident.remote.toStringWithPort()<<"), discarding"<<endl;
7c77ce63
RG
357
358 }
359
4465e941 360 t_fdm->removeReadFD(fd);
361 if(ret >= 0) {
a683e8bd 362 string data(resp, (size_t) ret);
fba1e944 363 MT->sendEvent(pident, &data);
4465e941 364 }
365 else {
fba1e944 366 string empty;
367 MT->sendEvent(pident, &empty);
368 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
4465e941 369 }
370}
fba1e944 371string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
4465e941 372{
4465e941 373 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
374 s.setNonBlocking();
375 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
376
377 s.bind(local);
378 s.connect(dest);
4465e941 379 s.send(query);
380
381 PacketID pident;
382 pident.sock=&s;
7c77ce63 383 pident.remote=dest;
4465e941 384 pident.type=0;
fba1e944 385 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
4465e941 386
387 string data;
fba1e944 388
4465e941 389 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
fba1e944 390
4465e941 391 if(!ret || ret==-1) { // timeout
4465e941 392 t_fdm->removeReadFD(s.getHandle());
393 }
394 else if(data.empty()) {// error, EOF or other
fba1e944 395 // we could special case this
4465e941 396 return data;
397 }
4465e941 398 return data;
399}
400
d7dae798 401//! pick a random query local address
1652a63e 402ComboAddress getQueryLocalAddress(int family, uint16_t port)
5a38281c 403{
1652a63e 404 ComboAddress ret;
5a38281c 405 if(family==AF_INET) {
3ddb9247 406 if(g_localQueryAddresses4.empty())
1652a63e 407 ret = g_local4;
3ddb9247 408 else
1652a63e
BH
409 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
410 ret.sin4.sin_port = htons(port);
5a38281c
BH
411 }
412 else {
413 if(g_localQueryAddresses6.empty())
1652a63e
BH
414 ret = g_local6;
415 else
416 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
3ddb9247 417
1652a63e 418 ret.sin6.sin6_port = htons(port);
5a38281c 419 }
1652a63e 420 return ret;
5a38281c 421}
4ef015cd 422
d187038c 423static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
09e6702a 424
d187038c 425static void setSocketBuffer(int fd, int optname, uint32_t size)
d7dae798
BH
426{
427 uint32_t psize=0;
428 socklen_t len=sizeof(psize);
3ddb9247 429
d7dae798 430 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
e6a9dde5 431 g_log<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
3ddb9247 432 return;
d7dae798
BH
433 }
434
435 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
e6a9dde5 436 g_log<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
d7dae798
BH
437}
438
439
440static void setSocketReceiveBuffer(int fd, uint32_t size)
441{
442 setSocketBuffer(fd, SO_RCVBUF, size);
443}
444
445static void setSocketSendBuffer(int fd, uint32_t size)
446{
447 setSocketBuffer(fd, SO_SNDBUF, size);
448}
449
450
4ef015cd
BH
451// you can ask this class for a UDP socket to send a query from
452// this socket is not yours, don't even think about deleting it
453// but after you call 'returnSocket' on it, don't assume anything anymore
454class UDPClientSocks
455{
4ef015cd 456 unsigned int d_numsocks;
4ef015cd 457public:
e2642526 458 UDPClientSocks() : d_numsocks(0)
4ef015cd
BH
459 {
460 }
461
996c89cc 462 typedef set<int> socks_t;
4ef015cd
BH
463 socks_t d_socks;
464
2ee280cf 465 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
d8f6d49f 466 int getSocket(const ComboAddress& toaddr, int* fd)
4ef015cd 467 {
d8f6d49f
BH
468 *fd=makeClientSocket(toaddr.sin4.sin_family);
469 if(*fd < 0) // temporary error - receive exception otherwise
2ee280cf 470 return -2;
d8f6d49f
BH
471
472 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
473 int err = errno;
41ff43f8 474 // returnSocket(*fd);
a7b68ae7
RG
475 try {
476 closesocket(*fd);
477 }
478 catch(const PDNSException& e) {
e6a9dde5 479 g_log<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
a7b68ae7
RG
480 }
481
d8f6d49f 482 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
4957a608 483 return -2;
998a4334 484 return -1;
d8f6d49f 485 }
998a4334 486
d8f6d49f 487 d_socks.insert(*fd);
998a4334 488 d_numsocks++;
d8f6d49f 489 return 0;
4ef015cd
BH
490 }
491
095c3045
BH
492 void returnSocket(int fd)
493 {
494 socks_t::iterator i=d_socks.find(fd);
34801ab1 495 if(i==d_socks.end()) {
335da0ba 496 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd)+") not in the pool");
34801ab1 497 }
bb4bdbaf 498 returnSocketLocked(i);
095c3045
BH
499 }
500
4ef015cd 501 // return a socket to the pool, or simply erase it
bb4bdbaf 502 void returnSocketLocked(socks_t::iterator& i)
4ef015cd 503 {
600fc20b 504 if(i==d_socks.end()) {
3f81d239 505 throw PDNSException("Trying to return a socket not in the pool");
600fc20b 506 }
80baf329 507 try {
bb4bdbaf 508 t_fdm->removeReadFD(*i);
80baf329
BH
509 }
510 catch(FDMultiplexerException& e) {
bb4bdbaf 511 // we sometimes return a socket that has not yet been assigned to t_fdm
80baf329 512 }
a7b68ae7
RG
513 try {
514 closesocket(*i);
515 }
516 catch(const PDNSException& e) {
e6a9dde5 517 g_log<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
a7b68ae7 518 }
3ddb9247 519
998a4334
BH
520 d_socks.erase(i++);
521 --d_numsocks;
4ef015cd 522 }
d8f6d49f
BH
523
524 // returns -1 for errors which might go away, throws for ones that won't
bb4bdbaf 525 static int makeClientSocket(int family)
d8f6d49f 526 {
a683e8bd 527 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
42c235e5 528
d8f6d49f
BH
529 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
530 return ret;
3ddb9247
PD
531
532 if(ret<0)
335da0ba 533 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
36855b53 534
7eb73ffa 535 // setCloseOnExec(ret); // we're not going to exec
5a38281c 536
d8f6d49f 537 int tries=10;
3aa91c3e 538 ComboAddress sin;
d8f6d49f 539 while(--tries) {
1652a63e 540 uint16_t port;
3ddb9247 541
d8f6d49f 542 if(tries==1) // fall back to kernel 'random'
4957a608 543 port = 0;
bf6f28ca
CHB
544 else {
545 do {
546 port = s_minUdpSourcePort + dns_random(s_maxUdpSourcePort - s_minUdpSourcePort + 1);
547 }
548 while (s_avoidUdpSourcePorts.count(port));
549 }
5a38281c 550
3aa91c3e 551 sin=getQueryLocalAddress(family, port); // does htons for us
5a38281c 552
3ddb9247 553 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
4957a608 554 break;
d8f6d49f
BH
555 }
556 if(!tries)
3aa91c3e 557 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
3ddb9247 558
3897b9e1 559 setNonBlocking(ret);
d8f6d49f
BH
560 return ret;
561 }
49a699c4
BH
562};
563
f26bf547 564static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
4ef015cd 565
288f4aa9 566/* these two functions are used by LWRes */
34801ab1 567// -2 is OS error, -1 is error that depends on the remote, > 0 is success
a683e8bd 568int asendto(const char *data, size_t len, int flags,
3ddb9247 569 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
288f4aa9 570{
34801ab1
BH
571
572 PacketID pident;
787e5eab
BH
573 pident.domain = domain;
574 pident.remote = toaddr;
575 pident.type = qtype;
34801ab1
BH
576
577 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
578 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
579
580 for(; chain.first != chain.second; chain.first++) {
581 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
e27e91a8 582 /*
4665c31e
BH
583 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
584 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
4957a608 585 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
e27e91a8 586 */
34801ab1
BH
587 chain.first->key.chain.insert(id); // we can chain
588 *fd=-1; // gets used in waitEvent / sendEvent later on
589 return 1;
590 }
591 }
592
49a699c4 593 int ret=t_udpclientsocks->getSocket(toaddr, fd);
d8f6d49f
BH
594 if(ret < 0)
595 return ret;
34801ab1 596
998a4334
BH
597 pident.fd=*fd;
598 pident.id=id;
3ddb9247 599
bb4bdbaf
BH
600 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
601 ret = send(*fd, data, len, 0);
602
5b0ddd18 603 int tmp = errno;
bb4bdbaf 604
7302ed0a 605 if(ret < 0)
49a699c4 606 t_udpclientsocks->returnSocket(*fd);
bb4bdbaf 607
5b0ddd18 608 errno = tmp; // this is for logging purposes only
7302ed0a 609 return ret;
288f4aa9
BH
610}
611
9170fbaf 612// -1 is error, 0 is timeout, 1 is success
a683e8bd 613int arecvfrom(char *data, size_t len, int flags, const ComboAddress& fromaddr, size_t *d_len,
c5c066bf 614 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
288f4aa9 615{
0d5f0a9f 616 static optional<unsigned int> nearMissLimit;
3ddb9247 617 if(!nearMissLimit)
0d5f0a9f
BH
618 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
619
288f4aa9 620 PacketID pident;
4ef015cd 621 pident.fd=fd;
288f4aa9 622 pident.id=id;
0d5f0a9f 623 pident.domain=domain;
787e5eab 624 pident.type = qtype;
996c89cc 625 pident.remote=fromaddr;
b636533b 626
288f4aa9 627 string packet;
5b0ddd18 628 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
34801ab1 629
9170fbaf 630 if(ret > 0) {
996c89cc 631 if(packet.empty()) // means "error"
3ddb9247 632 return -1;
998a4334 633
a683e8bd 634 *d_len=packet.size();
9170fbaf 635 memcpy(data,packet.c_str(),min(len,*d_len));
0d5f0a9f 636 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
e6a9dde5 637 g_log<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
0d5f0a9f 638 g_stats.spoofCount++;
35ce8576
BH
639 return -1;
640 }
288f4aa9 641 }
09e6702a 642 else {
34801ab1 643 if(fd >= 0)
49a699c4 644 t_udpclientsocks->returnSocket(fd);
09e6702a 645 }
9170fbaf 646 return ret;
288f4aa9
BH
647}
648
88def049
BH
649static void writePid(void)
650{
191f2e47 651 if(!::arg().mustDo("write-pid"))
652 return;
18e7758c 653 ofstream of(s_pidfname.c_str(), std::ios_base::app);
88def049 654 if(of)
705f31ae 655 of<< Utility::getpid() <<endl;
88def049 656 else
e6a9dde5 657 g_log<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
88def049
BH
658}
659
cd989c87 660TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : d_remote(addr), d_fd(fd)
3ddb9247
PD
661{
662 ++s_currentConnections;
cd989c87 663 (*t_tcpClientCounts)[d_remote]++;
0e408828 664}
cd989c87
BH
665
666TCPConnection::~TCPConnection()
0e408828 667{
a7b68ae7
RG
668 try {
669 if(closesocket(d_fd) < 0)
e6a9dde5 670 g_log<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
a7b68ae7
RG
671 }
672 catch(const PDNSException& e) {
e6a9dde5 673 g_log<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
a7b68ae7
RG
674 }
675
3ddb9247 676 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
cd989c87 677 t_tcpClientCounts->erase(d_remote);
1bc9e6bd 678 --s_currentConnections;
0e408828 679}
0e9d9ce2 680
3ddb9247 681AtomicCounter TCPConnection::s_currentConnections;
d187038c
RG
682
683static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
6dcd28c3 684
92011b8f 685// the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
d187038c 686static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
2cc13433 687{
92011b8f 688 if(packetsize > 1000 && t_largeanswerremotes)
689 t_largeanswerremotes->push_back(remote);
2cc13433
BH
690 switch(res) {
691 case RCode::ServFail:
92011b8f 692 if(t_servfailremotes) {
693 t_servfailremotes->push_back(remote);
5af86fdc 694 if(query && t_servfailqueryring) // packet cache
92011b8f 695 t_servfailqueryring->push_back(make_pair(*query, qtype));
696 }
2cc13433
BH
697 g_stats.servFails++;
698 break;
699 case RCode::NXDomain:
700 g_stats.nxDomains++;
701 break;
702 case RCode::NoError:
703 g_stats.noErrors++;
704 break;
705 }
706}
707
5164bac3 708static string makeLoginfo(const DNSComboWriter* dc)
a903b39c 709try
710{
5cc8371b 711 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->getRemote())+")";
a903b39c 712}
713catch(...)
714{
715 return "Exception making error message for exception";
716}
717
aa7929a3 718#ifdef HAVE_PROTOBUF
590388d2 719static void protobufLogQuery(const std::shared_ptr<RemoteLogger>& logger, uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId)
aa7929a3 720{
e1c8a4bb
RG
721 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
722 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
723 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
a94bc5d7 724 message.setEDNSSubnet(ednssubnet, ednssubnet.isIpv4() ? maskV4 : maskV6);
67e31ebe 725 message.setRequestorId(requestorId);
590388d2 726 message.setDeviceId(deviceId);
02b47f43 727
02b47f43 728 if (!policyTags.empty()) {
d9d3f9c1 729 message.setPolicyTags(policyTags);
02b47f43 730 }
aa7929a3 731
d9d3f9c1 732// cerr <<message.toDebugString()<<endl;
aa7929a3 733 std::string str;
d9d3f9c1 734 message.serialize(str);
aa7929a3 735 logger->queueData(str);
aa7929a3
RG
736}
737
d9d3f9c1 738static void protobufLogResponse(const std::shared_ptr<RemoteLogger>& logger, const RecProtoBufMessage& message)
aa7929a3 739{
d9d3f9c1 740// cerr <<message.toDebugString()<<endl;
aa7929a3 741 std::string str;
d9d3f9c1 742 message.serialize(str);
aa7929a3 743 logger->queueData(str);
aa7929a3
RG
744}
745#endif
746
53508135
PL
747/**
748 * Chases the CNAME provided by the PolicyCustom RPZ policy.
749 *
750 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
751 * @param qtype: The QType of the original query
752 * @param sr: A SyncRes
753 * @param res: An integer that will contain the RCODE of the lookup we do
754 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
755 */
d187038c 756static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
53508135
PL
757{
758 if (spoofed.d_type == QType::CNAME) {
30ee601a
RG
759 bool oldWantsRPZ = sr.getWantsRPZ();
760 sr.setWantsRPZ(false);
53508135
PL
761 vector<DNSRecord> ans;
762 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, 1, ans);
763 for (const auto& rec : ans) {
764 if(rec.d_place == DNSResourceRecord::ANSWER) {
765 ret.push_back(rec);
766 }
767 }
768 // Reset the RPZ state of the SyncRes
30ee601a 769 sr.setWantsRPZ(oldWantsRPZ);
53508135
PL
770 }
771}
772
70fb28d9 773static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, uint32_t ttlCap, const uint16_t maxAnswerSize)
97c6d7e5 774{
70fb28d9 775 pw.startRecord(rec.d_name, rec.d_type, (rec.d_ttl > ttlCap ? ttlCap : rec.d_ttl), rec.d_class, rec.d_place);
97c6d7e5
RG
776
777 if(rec.d_type != QType::OPT) // their TTL ain't real
778 minTTL = min(minTTL, rec.d_ttl);
779
780 rec.d_content->toPacket(pw);
781 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
782 pw.rollback();
783 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
784 pw.getHeader()->tc=1;
785 pw.truncate();
786 }
787 return false;
788 }
789
790 return true;
791}
792
63341e8d
RG
793#ifdef HAVE_PROTOBUF
794static std::shared_ptr<RemoteLogger> startProtobufServer(const ProtobufExportConfig& config, uint64_t generation)
795{
796 std::shared_ptr<RemoteLogger> result = nullptr;
797 try {
798 result = std::make_shared<RemoteLogger>(config.server, config.timeout, config.maxQueuedEntries, config.reconnectWaitTime, config.asyncConnect);
799 result->setGeneration(generation);
800 }
801 catch(const std::exception& e) {
802 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<config.server<<": "<<e.what()<<endl;
803 }
804 catch(const PDNSException& e) {
805 g_log<<Logger::Error<<"Error while starting protobuf logger to '"<<config.server<<": "<<e.reason<<endl;
806 }
807
808 return result;
809}
810
811static bool checkProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
812{
813 if (!luaconfsLocal->protobufExportConfig.enabled) {
814 if (t_protobufServer != nullptr) {
815 t_protobufServer->stop();
816 t_protobufServer = nullptr;
817 }
818
819 return false;
820 }
821
822 /* if the server was not running, or if it was running according to a
823 previous configuration */
824 if (t_protobufServer == nullptr ||
825 t_protobufServer->getGeneration() < luaconfsLocal->generation) {
826
827 if (t_protobufServer) {
828 t_protobufServer->stop();
829 }
830
831 t_protobufServer = startProtobufServer(luaconfsLocal->protobufExportConfig, luaconfsLocal->generation);
832 }
833
834 return true;
835}
836
837static bool checkOutgoingProtobufExport(LocalStateHolder<LuaConfigItems>& luaconfsLocal)
838{
839 if (!luaconfsLocal->outgoingProtobufExportConfig.enabled) {
840 if (t_outgoingProtobufServer != nullptr) {
841 t_outgoingProtobufServer->stop();
842 t_outgoingProtobufServer = nullptr;
843 }
844
845 return false;
846 }
847
848 /* if the server was not running, or if it was running according to a
849 previous configuration */
850 if (t_outgoingProtobufServer == nullptr ||
851 t_outgoingProtobufServer->getGeneration() < luaconfsLocal->generation) {
852
853 if (t_outgoingProtobufServer) {
854 t_outgoingProtobufServer->stop();
855 }
856
857 t_outgoingProtobufServer = startProtobufServer(luaconfsLocal->outgoingProtobufExportConfig, luaconfsLocal->generation);
858 }
859
860 return true;
861}
862#endif /* HAVE_PROTOBUF */
863
d187038c 864static void startDoResolve(void *p)
288f4aa9 865{
7b1469bb 866 DNSComboWriter* dc=(DNSComboWriter *)p;
288f4aa9 867 try {
5af86fdc
RG
868 if (t_queryring)
869 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
92011b8f 870
32015748 871 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
7f7b8d55 872 EDNSOpts edo;
5164bac3 873 std::vector<pair<uint16_t, string> > ednsOpts;
8e079f3a 874 bool haveEDNS=false;
1f691b94 875 bool wantsNSID = false;
8e079f3a 876 if(getEDNSOpts(dc->d_mdp, &edo)) {
32015748
RG
877 if(!dc->d_tcp) {
878 /* rfc6891 6.2.3:
879 "Values lower than 512 MUST be treated as equal to 512."
880 */
881 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
882 }
5164bac3 883 ednsOpts = edo.d_options;
8e079f3a 884 haveEDNS=true;
b40562da 885
1f691b94
PL
886 for (const auto& o : edo.d_options) {
887 if (o.first == EDNSOptionCode::ECS && g_useIncomingECS && !dc->d_ecsParsed) {
888 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
889 } else if (o.first == EDNSOptionCode::NSID) {
890 wantsNSID = true;
b40562da
RG
891 }
892 }
10321a98 893 }
b40562da
RG
894 /* perhaps there was no EDNS or no ECS but by now we looked */
895 dc->d_ecsParsed = true;
e325f20c 896 vector<DNSRecord> ret;
ea634573 897 vector<uint8_t> packet;
b23b8614 898
ad42489c 899 auto luaconfsLocal = g_luaconfs.getLocal();
b8470add
PL
900 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
901 bool wantsRPZ(true);
1fbc6dc5 902 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
aa7929a3 903#ifdef HAVE_PROTOBUF
63341e8d 904 if (checkProtobufExport(luaconfsLocal)) {
5cc8371b 905 Netmask requestorNM(dc->d_source, dc->d_source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
e1c8a4bb 906 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
d362f7c1
RG
907 pbMessage = RecProtoBufMessage(RecProtoBufMessage::Response);
908 pbMessage->update(dc->d_uuid, &requestor, &dc->d_destination, dc->d_tcp, dc->d_mdp.d_header.id);
909 pbMessage->setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
910 pbMessage->setQuestion(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
d9d3f9c1
RG
911 }
912#endif /* HAVE_PROTOBUF */
ad42489c 913
3ddb9247 914 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
ea634573
BH
915
916 pw.getHeader()->aa=0;
917 pw.getHeader()->ra=1;
c154c8a4 918 pw.getHeader()->qr=1;
bb4bdbaf 919 pw.getHeader()->tc=0;
ea634573 920 pw.getHeader()->id=dc->d_mdp.d_header.id;
10321a98 921 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
57769f13 922 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
ea634573 923
70fb28d9
RG
924 /* This is the lowest TTL seen in the records of the response,
925 so we can't cache it for longer than this value.
926 If we have a TTL cap, this value can't be larger than the
927 cap no matter what. */
928 uint32_t minTTL = dc->d_ttlCap;
904d3219
PD
929
930 SyncRes sr(dc->d_now);
0c43f455 931
2e921ec6 932 bool DNSSECOK=false;
3457a2a0 933 if(t_pdl) {
f26bf547 934 sr.setLuaEngine(t_pdl);
3457a2a0 935 }
9eec8c98 936 if(g_dnssecmode != DNSSECMode::Off) {
30ee601a 937 sr.setDoDNSSEC(true);
9eec8c98
PL
938
939 // Does the requestor want DNSSEC records?
940 if(edo.d_Z & EDNSOpts::DNSSECOK) {
941 DNSSECOK=true;
942 g_stats.dnssecQueries++;
943 }
944 } else {
945 // Ignore the client-set CD flag
946 pw.getHeader()->cd=0;
5b9853c9 947 }
0c43f455
RG
948 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
949
4898a348 950#ifdef HAVE_PROTOBUF
30ee601a 951 sr.setInitialRequestId(dc->d_uuid);
63341e8d 952 sr.setOutgoingProtobufServer(t_outgoingProtobufServer);
4898a348 953#endif
0c43f455 954
2fe3354d 955 sr.setQuerySource(dc->d_remote, g_useIncomingECS && !dc->d_ednssubnet.source.empty() ? boost::optional<const EDNSSubnetOpts&>(dc->d_ednssubnet) : boost::none);
57769f13 956
904d3219 957 bool tracedQuery=false; // we could consider letting Lua know about this too
70fb28d9 958 bool variableAnswer = dc->d_variable;
9fc36e90 959 bool shouldNotValidate = false;
904d3219 960
ef3b6cd7
RG
961 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
962 int res = RCode::NoError;
1f1ca368 963 DNSFilterEngine::Policy appliedPolicy;
39ec5d29 964 DNSRecord spoofed;
5cc8371b 965 RecursorLua4::DNSQuestion dq(dc->d_source, dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ);
6e505c5e 966 dq.ednsFlags = &edo.d_Z;
5164bac3 967 dq.ednsOptions = &ednsOpts;
6e505c5e
RG
968 dq.tag = dc->d_tag;
969 dq.discardedPolicies = &sr.d_discardedPolicies;
970 dq.policyTags = &dc->d_policyTags;
971 dq.appliedPolicy = &appliedPolicy;
972 dq.currentRecords = &ret;
973 dq.dh = &dc->d_mdp.d_header;
05c74122 974 dq.data = dc->d_data;
67e31ebe
RG
975#ifdef HAVE_PROTOBUF
976 dq.requestorId = dc->d_requestorId;
590388d2 977 dq.deviceId = dc->d_deviceId;
67e31ebe 978#endif
ba21fcfe 979
e661a20b 980 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
56b4d21b
PD
981 pw.getHeader()->tc = 1;
982 res = 0;
983 variableAnswer = true;
e661a20b
PD
984 goto sendit;
985 }
986
f26bf547 987 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
77499b05
BH
988 sr.setLogMode(SyncRes::Store);
989 tracedQuery=true;
990 }
3ddb9247 991
8f7473d7 992
976ec823 993 if(!g_quiet || tracedQuery) {
e6a9dde5 994 g_log<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
976ec823 995 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
b40562da 996 if(!dc->d_ednssubnet.source.empty()) {
e6a9dde5 997 g_log<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
6e986f5e 998 }
e6a9dde5 999 g_log<<endl;
976ec823 1000 }
c75a6a9e 1001
fededf47 1002 sr.setId(MT->getTid());
67828389 1003 if(!dc->d_mdp.d_header.rd)
c836dc19
BH
1004 sr.setCacheOnly();
1005
f26bf547
RG
1006 if (t_pdl) {
1007 t_pdl->prerpz(dq, res);
0a273054
RG
1008 }
1009
db486de5 1010 // Check if the query has a policy attached to it
0a273054 1011 if (wantsRPZ) {
5cc8371b 1012 appliedPolicy = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_source, sr.d_discardedPolicies);
0a273054 1013 }
644dd1da 1014
54be222b 1015 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
f26bf547 1016 if(!t_pdl || !t_pdl->preresolve(dq, res)) {
b8470add 1017
30ee601a 1018 sr.setWantsRPZ(wantsRPZ);
b8470add
PL
1019 if(wantsRPZ) {
1020 switch(appliedPolicy.d_kind) {
1021 case DNSFilterEngine::PolicyKind::NoAction:
1022 break;
1023 case DNSFilterEngine::PolicyKind::Drop:
1024 g_stats.policyDrops++;
7a25883a 1025 g_stats.policyResults[appliedPolicy.d_kind]++;
b8470add
PL
1026 delete dc;
1027 dc=0;
1028 return;
1029 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1030 g_stats.policyResults[appliedPolicy.d_kind]++;
1031 res=RCode::NXDomain;
1032 goto haveAnswer;
1033 case DNSFilterEngine::PolicyKind::NODATA:
1034 g_stats.policyResults[appliedPolicy.d_kind]++;
1035 res=RCode::NoError;
db486de5 1036 goto haveAnswer;
b8470add
PL
1037 case DNSFilterEngine::PolicyKind::Custom:
1038 g_stats.policyResults[appliedPolicy.d_kind]++;
1039 res=RCode::NoError;
a9e029ee 1040 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
b8470add 1041 ret.push_back(spoofed);
53508135 1042 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
b8470add
PL
1043 goto haveAnswer;
1044 case DNSFilterEngine::PolicyKind::Truncate:
1045 if(!dc->d_tcp) {
1046 g_stats.policyResults[appliedPolicy.d_kind]++;
1047 res=RCode::NoError;
1048 pw.getHeader()->tc=1;
1049 goto haveAnswer;
1050 }
1051 break;
1052 }
db486de5
PL
1053 }
1054
b8470add 1055 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
44971ca0
PD
1056 try {
1057 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
9fc36e90 1058 shouldNotValidate = sr.wasOutOfBand();
44971ca0
PD
1059 }
1060 catch(ImmediateServFailException &e) {
854d44e3 1061 if(g_logCommonErrors)
e6a9dde5 1062 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
44971ca0
PD
1063 res = RCode::ServFail;
1064 }
4485aa35 1065
1921a4c2
RG
1066 dq.validationState = sr.getValidationState();
1067
b8470add
PL
1068 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
1069 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
1070 appliedPolicy = sr.d_appliedPolicy;
1071 g_stats.policyResults[appliedPolicy.d_kind]++;
1072 switch(appliedPolicy.d_kind) {
1073 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
1074 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
1075 case DNSFilterEngine::PolicyKind::Drop:
1076 g_stats.policyDrops++;
1077 delete dc;
1078 dc=0;
1079 return;
1080 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1081 ret.clear();
1082 res=RCode::NXDomain;
1083 goto haveAnswer;
1084
1085 case DNSFilterEngine::PolicyKind::NODATA:
1086 ret.clear();
1087 res=RCode::NoError;
1088 goto haveAnswer;
1089
1090 case DNSFilterEngine::PolicyKind::Truncate:
1091 if(!dc->d_tcp) {
1092 ret.clear();
1093 res=RCode::NoError;
1094 pw.getHeader()->tc=1;
1095 goto haveAnswer;
1096 }
1097 break;
1098
1099 case DNSFilterEngine::PolicyKind::Custom:
1100 ret.clear();
1101 res=RCode::NoError;
a9e029ee 1102 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
b8470add 1103 ret.push_back(spoofed);
53508135 1104 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
b8470add
PL
1105 goto haveAnswer;
1106 }
1107 }
1108
1109 if (wantsRPZ) {
1f1ca368 1110 appliedPolicy = luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies);
b8470add 1111 }
db486de5 1112
f26bf547 1113 if(t_pdl) {
db486de5
PL
1114 if(res == RCode::NoError) {
1115 auto i=ret.cbegin();
1116 for(; i!= ret.cend(); ++i)
1117 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
1118 break;
f26bf547 1119 if(i == ret.cend() && t_pdl->nodata(dq, res))
3ca4e735
PL
1120 shouldNotValidate = true;
1121
db486de5 1122 }
f26bf547 1123 else if(res == RCode::NXDomain && t_pdl->nxdomain(dq, res))
3ca4e735 1124 shouldNotValidate = true;
db486de5 1125
f26bf547 1126 if(t_pdl->postresolve(dq, res))
3ca4e735 1127 shouldNotValidate = true;
db486de5
PL
1128 }
1129
b8470add
PL
1130 if (wantsRPZ) { //XXX This block is repeated, see above
1131 g_stats.policyResults[appliedPolicy.d_kind]++;
1132 switch(appliedPolicy.d_kind) {
1133 case DNSFilterEngine::PolicyKind::NoAction:
1134 break;
1135 case DNSFilterEngine::PolicyKind::Drop:
1136 g_stats.policyDrops++;
1137 delete dc;
1138 dc=0;
1139 return;
1140 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1141 ret.clear();
1142 res=RCode::NXDomain;
1143 goto haveAnswer;
1144
1145 case DNSFilterEngine::PolicyKind::NODATA:
1146 ret.clear();
1147 res=RCode::NoError;
1148 goto haveAnswer;
1149
1150 case DNSFilterEngine::PolicyKind::Truncate:
1151 if(!dc->d_tcp) {
1152 ret.clear();
1153 res=RCode::NoError;
1154 pw.getHeader()->tc=1;
1155 goto haveAnswer;
1156 }
1157 break;
1158
1159 case DNSFilterEngine::PolicyKind::Custom:
1160 ret.clear();
1161 res=RCode::NoError;
a9e029ee 1162 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
b8470add 1163 ret.push_back(spoofed);
53508135 1164 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
b8470add
PL
1165 goto haveAnswer;
1166 }
644dd1da 1167 }
4485aa35 1168 }
644dd1da 1169 haveAnswer:;
3e8216c8 1170 if(res == PolicyDecision::DROP) {
e9c2ad3a 1171 g_stats.policyDrops++;
ae7e77ad 1172 delete dc;
1173 dc=0;
1174 return;
3ddb9247 1175 }
9cdfab64 1176 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1dc8f4d0 1177 {
85ffbc53
PD
1178 string trace(sr.getTrace());
1179 if(!trace.empty()) {
1180 vector<string> lines;
1181 boost::split(lines, trace, boost::is_any_of("\n"));
1dc8f4d0 1182 for(const string& line : lines) {
85ffbc53 1183 if(!line.empty())
e6a9dde5 1184 g_log<<Logger::Warning<< line << endl;
85ffbc53
PD
1185 }
1186 }
1187 }
3ddb9247 1188
9cdfab64 1189 if(res == -1) {
0fe1d080
PD
1190 pw.getHeader()->rcode=RCode::ServFail;
1191 // no commit here, because no record
1192 g_stats.servFails++;
1193 }
288f4aa9 1194 else {
ea634573 1195 pw.getHeader()->rcode=res;
92011b8f 1196
f3fe4ae6 1197 // Does the validation mode or query demand validation?
0c43f455 1198 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
b25cae9a 1199 try {
f3fe4ae6 1200 if(sr.doLog()) {
e6a9dde5 1201 g_log<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<endl;
2e921ec6 1202 }
4d2be65d
RG
1203
1204 auto state = sr.getValidationState();
1205
b25cae9a 1206 if(state == Secure) {
2e921ec6 1207 if(sr.doLog()) {
e6a9dde5 1208 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates correctly"<<endl;
2e921ec6 1209 }
b25cae9a 1210
1211 // Is the query source interested in the value of the ad-bit?
885c8881 1212 if (dc->d_mdp.d_header.ad || DNSSECOK)
b25cae9a 1213 pw.getHeader()->ad=1;
1214 }
1215 else if(state == Insecure) {
f3fe4ae6 1216 if(sr.doLog()) {
e6a9dde5 1217 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Insecure"<<endl;
12ce523e 1218 }
b25cae9a 1219
1220 pw.getHeader()->ad=0;
f3fe4ae6 1221 }
b25cae9a 1222 else if(state == Bogus) {
c87e1876 1223 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
e6a9dde5 1224 g_log<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->getRemote()<<" validates as Bogus"<<endl;
b25cae9a 1225 }
1226
1227 // Does the query or validation mode sending out a SERVFAIL on validation errors?
885c8881 1228 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
b25cae9a 1229 if(sr.doLog()) {
e6a9dde5 1230 g_log<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
b25cae9a 1231 }
1232
1233 pw.getHeader()->rcode=RCode::ServFail;
1234 goto sendit;
1235 } else {
1236 if(sr.doLog()) {
e6a9dde5 1237 g_log<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
b25cae9a 1238 }
1239 }
1240 }
1241 }
1242 catch(ImmediateServFailException &e) {
1243 if(g_logCommonErrors)
e6a9dde5 1244 g_log<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
b25cae9a 1245 pw.getHeader()->rcode=RCode::ServFail;
1246 goto sendit;
f3fe4ae6 1247 }
b3f0ed10 1248 }
1249
c154c8a4 1250 if(ret.size()) {
92476c8b 1251 orderAndShuffle(ret);
5cc8371b 1252 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_source)) {
20d84f77 1253 stable_sort(ret.begin(), ret.end(), *sl);
3e61e7f7 1254 variableAnswer=true;
1255 }
8e079f3a 1256 }
0afa32d4
RG
1257
1258 bool needCommit = false;
8e079f3a 1259 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
3e80ebce
KM
1260 if( ! DNSSECOK &&
1261 ( i->d_type == QType::NSEC3 ||
1262 (
1263 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1264 (
1265 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1266 i->d_place != DNSResourceRecord::ANSWER
1267 )
1268 )
1269 )
1270 ) {
2e921ec6 1271 continue;
3e80ebce
KM
1272 }
1273
70fb28d9 1274 if (!addRecordToPacket(pw, *i, minTTL, dc->d_ttlCap, maxanswersize)) {
97c6d7e5
RG
1275 needCommit = false;
1276 break;
1277 }
1278 needCommit = true;
1279
aa7929a3 1280#ifdef HAVE_PROTOBUF
63341e8d 1281 if(t_protobufServer && (i->d_type == QType::A || i->d_type == QType::AAAA || i->d_type == QType::CNAME)) {
d362f7c1 1282 pbMessage->addRR(*i);
aa7929a3
RG
1283 }
1284#endif
ea634573 1285 }
0afa32d4 1286 if(needCommit)
8e079f3a 1287 pw.commit();
288f4aa9 1288 }
10321a98 1289 sendit:;
b3f0ed10 1290
97c6d7e5
RG
1291 if (haveEDNS) {
1292 /* we try to add the EDNS OPT RR even for truncated answers,
1293 as rfc6891 states:
1294 "The minimal response MUST be the DNS header, question section, and an
1295 OPT record. This MUST also occur when a truncated response (using
1296 the DNS header's TC bit) is returned."
1297 */
1f691b94
PL
1298 DNSPacketWriter::optvect_t opts;
1299 if(wantsNSID) {
1300 const static string mode_server_id = ::arg()["server-id"];
1301 if(mode_server_id != "disabled" && !mode_server_id.empty()) {
1302 opts.push_back(make_pair(3, mode_server_id));
1303 variableAnswer = true; // Can't packetcache an answer with NSID
1304 }
97c6d7e5 1305 }
1f691b94
PL
1306 pw.addOpt(maxanswersize, 0, DNSSECOK ? EDNSOpts::DNSSECOK : 0, opts);
1307 pw.commit();
97c6d7e5
RG
1308 }
1309
79332bff 1310 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
5cc8371b 1311 updateResponseStats(res, dc->d_source, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
aa7929a3 1312#ifdef HAVE_PROTOBUF
63341e8d 1313 if (t_protobufServer && (!luaconfsLocal->protobufTaggedOnly || (appliedPolicy.d_name && !appliedPolicy.d_name->empty()) || !dc->d_policyTags.empty())) {
d362f7c1
RG
1314 pbMessage->setBytes(packet.size());
1315 pbMessage->setResponseCode(pw.getHeader()->rcode);
0a273054 1316 if (appliedPolicy.d_name) {
d362f7c1
RG
1317 pbMessage->setAppliedPolicy(*appliedPolicy.d_name);
1318 pbMessage->setAppliedPolicyType(appliedPolicy.d_type);
0a273054 1319 }
d362f7c1
RG
1320 pbMessage->setPolicyTags(dc->d_policyTags);
1321 pbMessage->setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1322 pbMessage->setRequestorId(dq.requestorId);
1323 pbMessage->setDeviceId(dq.deviceId);
63341e8d 1324 protobufLogResponse(t_protobufServer, *pbMessage);
aa7929a3
RG
1325 }
1326#endif
ea634573 1327 if(!dc->d_tcp) {
b71b60ee 1328 struct msghdr msgh;
1329 struct iovec iov;
1330 char cbuf[256];
1331 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
2c0af54f
PD
1332 msgh.msg_control=NULL;
1333
cbc03320 1334 if(g_fromtosockets.count(dc->d_socket)) {
fbe2a2e0 1335 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
2c0af54f 1336 }
cbc03320 1337 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
e6a9dde5 1338 g_log<<Logger::Warning<<"Sending UDP reply to client "<<dc->getRemote()<<" failed with: "<<strerror(errno)<<endl;
70fb28d9 1339
3762e821 1340 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
e9f63d47 1341 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
76e2b9e3 1342 string((const char*)&*packet.begin(), packet.size()),
3ddb9247 1343 g_now.tv_sec,
76e2b9e3 1344 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
d9d3f9c1 1345 min(minTTL,SyncRes::s_packetcachettl),
d362f7c1 1346 pbMessage);
1051f8a9 1347 }
3762e821 1348 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
feccc9fc 1349 }
9c495589
BH
1350 else {
1351 char buf[2];
ea634573
BH
1352 buf[0]=packet.size()/256;
1353 buf[1]=packet.size()%256;
feccc9fc 1354
c038218b 1355 Utility::iovec iov[2];
feccc9fc 1356
ea634573
BH
1357 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1358 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
feccc9fc 1359
dd079764 1360 int wret=Utility::writev(dc->d_socket, iov, 2);
0e9d9ce2 1361 bool hadError=true;
feccc9fc 1362
dd079764 1363 if(wret == 0)
e6a9dde5 1364 g_log<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
dd079764 1365 else if(wret < 0 )
e6a9dde5 1366 g_log<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
dd079764 1367 else if((unsigned int)wret != 2 + packet.size())
e6a9dde5 1368 g_log<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
0e9d9ce2 1369 else
18af64a8 1370 hadError=false;
3ddb9247 1371
09e6702a 1372 // update tcp connection status, either by closing or moving to 'BYTE0'
3ddb9247 1373
09e6702a 1374 if(hadError) {
18af64a8 1375 // no need to remove us from FDM, we weren't there
c36bc97a 1376 dc->d_socket = -1;
09e6702a 1377 }
a6ae6414 1378 else {
fde296a3
RG
1379 dc->d_tcpConnection->queriesCount++;
1380 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1381 dc->d_socket = -1;
1382 }
1383 else {
1384 dc->d_tcpConnection->state=TCPConnection::BYTE0;
1385 Utility::gettimeofday(&g_now, 0); // needs to be updated
1386 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection);
1387 t_fdm->setReadTTD(dc->d_socket, g_now, g_tcpTimeout);
1388 }
0e9d9ce2 1389 }
9c495589 1390 }
2c9119cd 1391 float spent=makeFloat(sr.getNow()-dc->d_now);
1d5b3ce6 1392 if(!g_quiet) {
e6a9dde5
PL
1393 g_log<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1394 g_log<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
2c9119cd 1395 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
1396 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
1397
1398 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
e6a9dde5 1399 g_log<< ", dnssec="<<vStates[sr.getValidationState()];
2c9119cd 1400 }
1401
e6a9dde5 1402 g_log<<endl;
2c9119cd 1403
c75a6a9e 1404 }
b23b8614 1405
f7b8cffa
RG
1406 if (sr.d_outqueries || sr.d_authzonequeries) {
1407 t_RC->cacheMisses++;
1408 }
1409 else {
1410 t_RC->cacheHits++;
1411 }
2c9119cd 1412
fe213470
BH
1413 if(spent < 0.001)
1414 g_stats.answers0_1++;
1415 else if(spent < 0.010)
1416 g_stats.answers1_10++;
1417 else if(spent < 0.1)
1418 g_stats.answers10_100++;
1419 else if(spent < 1.0)
1420 g_stats.answers100_1000++;
1421 else
1422 g_stats.answersSlow++;
1423
574af7ea 1424 uint64_t newLat=(uint64_t)(spent*1000000);
b841314c 1425 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
08f3f638 1426 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
0a6b1027 1427 // no worries, we do this for packet cache hits elsewhere
19178da9 1428
1429 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
1430 if(ourtime < 1)
1431 g_stats.ourtime0_1++;
1432 else if(ourtime < 2)
1433 g_stats.ourtime1_2++;
1434 else if(ourtime < 4)
1435 g_stats.ourtime2_4++;
1436 else if(ourtime < 8)
1437 g_stats.ourtime4_8++;
1438 else if(ourtime < 16)
1439 g_stats.ourtime8_16++;
1440 else if(ourtime < 32)
1441 g_stats.ourtime16_32++;
1442 else {
1443 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1444 g_stats.ourtimeSlow++;
1445 }
042da1a1 1446 if(ourtime >= 0.0) {
1447 newLat=ourtime*1000; // usec
1448 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
1449 }
c6d04bdc 1450 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
ea634573 1451 delete dc;
c36bc97a 1452 dc=0;
288f4aa9 1453 }
3f81d239 1454 catch(PDNSException &ae) {
e6a9dde5 1455 g_log<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
c36bc97a 1456 delete dc;
288f4aa9 1457 }
7b1469bb 1458 catch(MOADNSException& e) {
e6a9dde5 1459 g_log<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<e.what()<<endl;
c36bc97a 1460 delete dc;
7b1469bb 1461 }
fdbf35ac 1462 catch(std::exception& e) {
e6a9dde5 1463 g_log<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
068c7634
PD
1464
1465 // Luawrapper nests the exception from Lua, so we unnest it here
1466 try {
1467 std::rethrow_if_nested(e);
2010ac95 1468 } catch(const std::exception& ne) {
e6a9dde5 1469 g_log<<". Extra info: "<<ne.what();
068c7634
PD
1470 } catch(...) {}
1471
e6a9dde5 1472 g_log<<endl;
c36bc97a 1473 delete dc;
c154c8a4 1474 }
288f4aa9 1475 catch(...) {
e6a9dde5 1476 g_log<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
288f4aa9 1477 }
3ddb9247 1478
ec6eacbc 1479 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
288f4aa9
BH
1480}
1481
d187038c 1482static void makeControlChannelSocket(int processNum=-1)
1d5b3ce6 1483{
2d733c0f 1484 string sockname=::arg()["socket-dir"]+"/"+s_programname;
677e2a46 1485 if(processNum >= 0)
335da0ba 1486 sockname += "."+std::to_string(processNum);
677e2a46 1487 sockname+=".controlsocket";
41f7a068 1488 s_rcc.listen(sockname);
3ddb9247 1489
387de317
BH
1490 int sockowner = -1;
1491 int sockgroup = -1;
1492
1493 if (!::arg().isEmpty("socket-group"))
1494 sockgroup=::arg().asGid("socket-group");
1495 if (!::arg().isEmpty("socket-owner"))
1496 sockowner=::arg().asUid("socket-owner");
3ddb9247 1497
f838ad8d
BH
1498 if (sockgroup > -1 || sockowner > -1) {
1499 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1500 unixDie("Failed to chown control socket");
1501 }
1502 }
387de317
BH
1503
1504 // do mode change if socket-mode is given
1505 if(!::arg().isEmpty("socket-mode")) {
1506 mode_t sockmode=::arg().asMode("socket-mode");
34c513f9
RG
1507 if(chmod(sockname.c_str(), sockmode) < 0) {
1508 unixDie("Failed to chmod control socket");
1509 }
387de317 1510 }
1d5b3ce6
BH
1511}
1512
5cc8371b
RG
1513static void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass,
1514 bool& foundECS, EDNSSubnetOpts* ednssubnet, std::map<uint16_t, EDNSOptionView>* options,
1515 bool& foundXPF, ComboAddress* xpfSource, ComboAddress* xpfDest)
02b47f43 1516{
59cb4a79 1517 const bool lookForXPF = xpfSource != nullptr && g_xpfRRCode != 0;
5cc8371b
RG
1518 const bool lookForECS = ednssubnet != nullptr;
1519 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(question.c_str());
02b47f43
RG
1520 size_t questionLen = question.length();
1521 unsigned int consumed=0;
1522 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1523
1524 size_t pos= sizeof(dnsheader)+consumed+4;
5cc8371b
RG
1525 const size_t headerSize = /* root */ 1 + sizeof(dnsrecordheader);
1526 const uint16_t arcount = ntohs(dh->arcount);
1527
1528 for (uint16_t arpos = 0; arpos < arcount && questionLen > (pos + headerSize) && ((lookForECS && !foundECS) || (lookForXPF && !foundXPF)); arpos++) {
1529 if (question.at(pos) != 0) {
1530 /* not an OPT or a XPF, bye. */
1531 return;
1532 }
1533
1534 pos += 1;
1535 const dnsrecordheader* drh = reinterpret_cast<const dnsrecordheader*>(&question.at(pos));
1536 pos += sizeof(dnsrecordheader);
1537
1538 if (pos >= questionLen) {
1539 return;
1540 }
1541
02b47f43 1542 /* OPT root label (1) followed by type (2) */
5cc8371b 1543 if(lookForECS && ntohs(drh->d_type) == QType::OPT) {
00b8cadc
RG
1544 if (!options) {
1545 char* ecsStart = nullptr;
1546 size_t ecsLen = 0;
5cc8371b
RG
1547 /* we need to pass the record len */
1548 int res = getEDNSOption(const_cast<char*>(reinterpret_cast<const char*>(&question.at(pos - sizeof(drh->d_clen)))), questionLen - pos + sizeof(drh->d_clen), EDNSOptionCode::ECS, &ecsStart, &ecsLen);
00b8cadc
RG
1549 if (res == 0 && ecsLen > 4) {
1550 EDNSSubnetOpts eso;
1551 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1552 *ednssubnet=eso;
5cc8371b 1553 foundECS = true;
00b8cadc
RG
1554 }
1555 }
1556 }
1557 else {
5cc8371b
RG
1558 /* we need to pass the record len */
1559 int res = getEDNSOptions(reinterpret_cast<const char*>(&question.at(pos -sizeof(drh->d_clen))), questionLen - pos + (sizeof(drh->d_clen)), *options);
00b8cadc
RG
1560 if (res == 0) {
1561 const auto& it = options->find(EDNSOptionCode::ECS);
1562 if (it != options->end() && it->second.content != nullptr && it->second.size > 0) {
1563 EDNSSubnetOpts eso;
1564 if(getEDNSSubnetOptsFromString(it->second.content, it->second.size, &eso)) {
1565 *ednssubnet=eso;
5cc8371b 1566 foundECS = true;
00b8cadc
RG
1567 }
1568 }
02b47f43
RG
1569 }
1570 }
1571 }
59cb4a79 1572 else if (lookForXPF && ntohs(drh->d_type) == g_xpfRRCode && ntohs(drh->d_class) == QClass::IN && drh->d_ttl == 0) {
5cc8371b
RG
1573 if ((questionLen - pos) < ntohs(drh->d_clen)) {
1574 return;
1575 }
1576
1577 foundXPF = parseXPFPayload(reinterpret_cast<const char*>(&question.at(pos)), ntohs(drh->d_clen), *xpfSource, xpfDest);
1578 }
1579
1580 pos += ntohs(drh->d_clen);
02b47f43
RG
1581 }
1582}
1583
d187038c 1584static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 1585{
cd989c87 1586 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
c038218b 1587
879b3f70 1588 if(conn->state==TCPConnection::BYTE0) {
b841314c 1589 ssize_t bytes=recv(conn->getFD(), conn->data, 2, 0);
09e6702a 1590 if(bytes==1)
667f7e60 1591 conn->state=TCPConnection::BYTE1;
3ddb9247 1592 if(bytes==2) {
a0aa4f64 1593 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
667f7e60
BH
1594 conn->bytesread=0;
1595 conn->state=TCPConnection::GETQUESTION;
09e6702a
BH
1596 }
1597 if(!bytes || bytes < 0) {
bb4bdbaf 1598 t_fdm->removeReadFD(fd);
09e6702a
BH
1599 return;
1600 }
1601 }
667f7e60 1602 else if(conn->state==TCPConnection::BYTE1) {
b841314c 1603 ssize_t bytes=recv(conn->getFD(), conn->data+1, 1, 0);
09e6702a 1604 if(bytes==1) {
667f7e60 1605 conn->state=TCPConnection::GETQUESTION;
a0aa4f64 1606 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
667f7e60 1607 conn->bytesread=0;
09e6702a
BH
1608 }
1609 if(!bytes || bytes < 0) {
1610 if(g_logCommonErrors)
e6a9dde5 1611 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected after first byte"<<endl;
bb4bdbaf 1612 t_fdm->removeReadFD(fd);
09e6702a
BH
1613 return;
1614 }
1615 }
667f7e60 1616 else if(conn->state==TCPConnection::GETQUESTION) {
b841314c 1617 ssize_t bytes=recv(conn->getFD(), conn->data + conn->bytesread, conn->qlen - conn->bytesread, 0);
f9d67b41 1618 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
e6a9dde5 1619 g_log<<Logger::Error<<"TCP client "<< conn->d_remote.toStringWithPort() <<" disconnected while reading question body"<<endl;
bb4bdbaf 1620 t_fdm->removeReadFD(fd);
09e6702a
BH
1621 return;
1622 }
b841314c 1623 conn->bytesread+=(uint16_t)bytes;
667f7e60 1624 if(conn->bytesread==conn->qlen) {
bb4bdbaf 1625 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
879b3f70 1626
f26bf547 1627 DNSComboWriter* dc=nullptr;
09e6702a 1628 try {
cd989c87 1629 dc=new DNSComboWriter(conn->data, conn->qlen, g_now);
09e6702a
BH
1630 }
1631 catch(MOADNSException &mde) {
3ddb9247 1632 g_stats.clientParseError++;
4957a608 1633 if(g_logCommonErrors)
e6a9dde5 1634 g_log<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4957a608 1635 return;
09e6702a 1636 }
cd989c87
BH
1637 dc->d_tcpConnection = conn; // carry the torch
1638 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
09e6702a 1639 dc->d_tcp=true;
5cc8371b
RG
1640 dc->setRemote(conn->d_remote);
1641 dc->setSource(conn->d_remote);
a6147cd2 1642 ComboAddress dest;
d38e2ba9 1643 dest.reset();
a6147cd2 1644 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1645 socklen_t len = dest.getSocklen();
1646 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1647 dc->setLocal(dest);
5cc8371b 1648 dc->setDestination(dest);
33dcceba
RG
1649 DNSName qname;
1650 uint16_t qtype=0;
1651 uint16_t qclass=0;
1652 bool needECS = false;
5cc8371b 1653 bool needXPF = g_XPFAcl.match(conn->d_remote);
67e31ebe 1654 string requestorId;
590388d2 1655 string deviceId;
aa7929a3 1656#ifdef HAVE_PROTOBUF
02b47f43 1657 auto luaconfsLocal = g_luaconfs.getLocal();
63341e8d 1658 if (checkProtobufExport(luaconfsLocal)) {
33dcceba
RG
1659 needECS = true;
1660 }
1661#endif
1662
70fb28d9 1663 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag_ffi || t_pdl->d_gettag))) {
33dcceba
RG
1664
1665 try {
00b8cadc 1666 std::map<uint16_t, EDNSOptionView> ednsOptions;
5cc8371b 1667 bool xpfFound = false;
b40562da 1668 dc->d_ecsParsed = true;
5cc8371b
RG
1669 dc->d_ecsFound = false;
1670 getQNameAndSubnet(std::string(conn->data, conn->qlen), &qname, &qtype, &qclass,
1671 dc->d_ecsFound, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
1672 xpfFound, needXPF ? &dc->d_source : nullptr, needXPF ? &dc->d_destination : nullptr);
02b47f43 1673
70fb28d9 1674 if(t_pdl) {
33dcceba 1675 try {
70fb28d9
RG
1676 if (t_pdl->d_gettag_ffi) {
1677 dc->d_tag = t_pdl->gettag_ffi(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, dc->d_ttlCap, dc->d_variable);
1678 }
1679 else if (t_pdl->d_gettag) {
1680 dc->d_tag = t_pdl->gettag(dc->d_source, dc->d_ednssubnet.source, dc->d_destination, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId);
1681 }
33dcceba 1682 }
70fb28d9 1683 catch(const std::exception& e) {
33dcceba 1684 if(g_logCommonErrors)
e6a9dde5 1685 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
33dcceba
RG
1686 }
1687 }
1688 }
70fb28d9 1689 catch(const std::exception& e)
33dcceba
RG
1690 {
1691 if(g_logCommonErrors)
e6a9dde5 1692 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
33dcceba
RG
1693 }
1694 }
1695#ifdef HAVE_PROTOBUF
63341e8d 1696 if(t_protobufServer || t_outgoingProtobufServer) {
67e31ebe 1697 dc->d_requestorId = requestorId;
590388d2 1698 dc->d_deviceId = deviceId;
02b47f43 1699 dc->d_uuid = (*t_uuidGenerator)();
4898a348 1700 }
02b47f43 1701
63341e8d 1702 if(t_protobufServer) {
02b47f43 1703 try {
02b47f43 1704 const struct dnsheader* dh = (const struct dnsheader*) conn->data;
02b47f43 1705
b790ef3d 1706 if (!luaconfsLocal->protobufTaggedOnly) {
63341e8d 1707 protobufLogQuery(t_protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, dc->d_source, dc->d_destination, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId);
b790ef3d 1708 }
02b47f43
RG
1709 }
1710 catch(std::exception& e) {
1711 if(g_logCommonErrors)
e6a9dde5 1712 g_log<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
02b47f43
RG
1713 }
1714 }
aa7929a3 1715#endif
879b3f70 1716 if(dc->d_mdp.d_header.qr) {
048f5db6 1717 g_stats.ignoredCount++;
e6a9dde5 1718 g_log<<Logger::Error<<"Ignoring answer from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
cf14c141 1719 delete dc;
4957a608 1720 return;
879b3f70 1721 }
3abcdab2 1722 if(dc->d_mdp.d_header.opcode) {
048f5db6 1723 g_stats.ignoredCount++;
e6a9dde5 1724 g_log<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< dc->getRemote() <<" on server socket!"<<endl;
cf14c141 1725 delete dc;
3abcdab2
PD
1726 return;
1727 }
09e6702a 1728 else {
4957a608
BH
1729 ++g_stats.qcounter;
1730 ++g_stats.tcpqcounter;
50a5ef72 1731 MT->makeThread(startDoResolve, dc); // deletes dc, will set state to BYTE0 again
4957a608 1732 return;
09e6702a
BH
1733 }
1734 }
1735 }
1736}
1737
6dcd28c3 1738//! Handle new incoming TCP connection
d187038c 1739static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
09e6702a 1740{
37d3f960 1741 ComboAddress addr;
09e6702a 1742 socklen_t addrlen=sizeof(addr);
a683e8bd 1743 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
b841314c 1744 if(newsock>=0) {
85c32340
BH
1745 if(MT->numProcesses() > g_maxMThreads) {
1746 g_stats.overCapacityDrops++;
a7b68ae7
RG
1747 try {
1748 closesocket(newsock);
1749 }
1750 catch(const PDNSException& e) {
e6a9dde5 1751 g_log<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
a7b68ae7 1752 }
85c32340
BH
1753 return;
1754 }
1755
92011b8f 1756 if(t_remotes)
1757 t_remotes->push_back(addr);
49a699c4 1758 if(t_allowFrom && !t_allowFrom->match(&addr)) {
3ddb9247 1759 if(!g_quiet)
e6a9dde5 1760 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
2914b022 1761
09e6702a 1762 g_stats.unauthorizedTCP++;
a7b68ae7
RG
1763 try {
1764 closesocket(newsock);
1765 }
1766 catch(const PDNSException& e) {
e6a9dde5 1767 g_log<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
a7b68ae7 1768 }
09e6702a
BH
1769 return;
1770 }
bd0289fc 1771 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
09e6702a 1772 g_stats.tcpClientOverflow++;
a7b68ae7
RG
1773 try {
1774 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
1775 }
1776 catch(const PDNSException& e) {
e6a9dde5 1777 g_log<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
a7b68ae7 1778 }
09e6702a
BH
1779 return;
1780 }
3ddb9247 1781
3897b9e1 1782 setNonBlocking(newsock);
f26bf547 1783 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
cd989c87 1784 tc->state=TCPConnection::BYTE0;
3ddb9247 1785
cd989c87 1786 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc);
c038218b 1787
0bff046b 1788 struct timeval now;
c038218b 1789 Utility::gettimeofday(&now, 0);
cd989c87 1790 t_fdm->setReadTTD(tc->getFD(), now, g_tcpTimeout);
09e6702a
BH
1791 }
1792}
3ddb9247 1793
d187038c 1794static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
1bc3c142 1795{
183eb877 1796 gettimeofday(&g_now, 0);
b71b60ee 1797 struct timeval diff = g_now - tv;
1798 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
183eb877 1799
22cf1fda 1800 if(tv.tv_sec && delta > 1000.0) {
b71b60ee 1801 g_stats.tooOldDrops++;
1802 return 0;
1803 }
1804
1bc3c142 1805 ++g_stats.qcounter;
d7f10541
BH
1806 if(fromaddr.sin4.sin_family==AF_INET6)
1807 g_stats.ipv6qcounter++;
1bc3c142
BH
1808
1809 string response;
93f0da94 1810 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
49a3500d 1811 unsigned int ctag=0;
f57486f1 1812 uint32_t qhash = 0;
12aff2e5 1813 bool needECS = false;
5cc8371b 1814 bool needXPF = g_XPFAcl.match(fromaddr);
02b47f43 1815 std::vector<std::string> policyTags;
5fd2577f 1816 LuaContext::LuaObject data;
5cc8371b
RG
1817 ComboAddress source = fromaddr;
1818 ComboAddress destination = destaddr;
67e31ebe 1819 string requestorId;
590388d2 1820 string deviceId;
12aff2e5 1821#ifdef HAVE_PROTOBUF
02b47f43 1822 boost::uuids::uuid uniqueId;
02b47f43 1823 auto luaconfsLocal = g_luaconfs.getLocal();
63341e8d 1824 if (checkProtobufExport(luaconfsLocal)) {
4898a348 1825 uniqueId = (*t_uuidGenerator)();
02b47f43 1826 needECS = true;
63341e8d 1827 } else if (checkOutgoingProtobufExport(luaconfsLocal)) {
02b47f43
RG
1828 uniqueId = (*t_uuidGenerator)();
1829 }
12aff2e5 1830#endif
b40562da
RG
1831 EDNSSubnetOpts ednssubnet;
1832 bool ecsFound = false;
1833 bool ecsParsed = false;
70fb28d9
RG
1834 uint32_t ttlCap = std::numeric_limits<uint32_t>::max();
1835 bool variable = false;
1bc3c142 1836 try {
02b47f43
RG
1837 DNSName qname;
1838 uint16_t qtype=0;
1839 uint16_t qclass=0;
1bc3c142 1840 uint32_t age;
c15ff3df 1841 bool qnameParsed=false;
8f7473d7 1842#ifdef MALLOC_TRACE
1843 /*
1844 static uint64_t last=0;
1845 if(!last)
1846 g_mtracer->clearAllocators();
1847 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
1848 last=g_mtracer->getAllocs();
1849 cout<<g_mtracer->topAllocatorsString()<<endl;
1850 g_mtracer->clearAllocators();
1851 */
1852#endif
55a1378f 1853
70fb28d9 1854 if(needECS || needXPF || (t_pdl && (t_pdl->d_gettag || t_pdl->d_gettag_ffi))) {
b2eacd67 1855 try {
00b8cadc 1856 std::map<uint16_t, EDNSOptionView> ednsOptions;
5cc8371b
RG
1857 bool xpfFound = false;
1858
1859 ecsFound = false;
1860
1861 getQNameAndSubnet(question, &qname, &qtype, &qclass,
1862 ecsFound, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr,
1863 xpfFound, needXPF ? &source : nullptr, needXPF ? &destination : nullptr);
1864
c15ff3df
RG
1865 qnameParsed = true;
1866 ecsParsed = true;
12aff2e5 1867
70fb28d9 1868 if(t_pdl) {
12aff2e5 1869 try {
70fb28d9
RG
1870 if (t_pdl->d_gettag_ffi) {
1871 ctag = t_pdl->gettag_ffi(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, ttlCap, variable);
1872 }
1873 else if (t_pdl->d_gettag) {
1874 ctag = t_pdl->gettag(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId);
1875 }
12aff2e5 1876 }
70fb28d9 1877 catch(const std::exception& e) {
12aff2e5 1878 if(g_logCommonErrors)
e6a9dde5 1879 g_log<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 1880 }
8ea8c302 1881 }
b2eacd67 1882 }
70fb28d9 1883 catch(const std::exception& e)
b2eacd67 1884 {
1885 if(g_logCommonErrors)
e6a9dde5 1886 g_log<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
12aff2e5 1887 }
12ce523e 1888 }
3ddb9247 1889
02b47f43 1890 bool cacheHit = false;
1fbc6dc5 1891 boost::optional<RecProtoBufMessage> pbMessage(boost::none);
02b47f43 1892#ifdef HAVE_PROTOBUF
63341e8d 1893 if(t_protobufServer) {
d362f7c1 1894 pbMessage = RecProtoBufMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
b790ef3d 1895 if (!luaconfsLocal->protobufTaggedOnly || !policyTags.empty()) {
63341e8d 1896 protobufLogQuery(t_protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, source, destination, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId);
b790ef3d 1897 }
d9d3f9c1
RG
1898 }
1899#endif /* HAVE_PROTOBUF */
02b47f43 1900
70fb28d9
RG
1901 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
1902 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
1903 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
c15ff3df 1904 if (qnameParsed) {
d362f7c1 1905 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &qhash, pbMessage ? &(*pbMessage) : nullptr));
c15ff3df
RG
1906 }
1907 else {
d362f7c1 1908 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, g_now.tv_sec, &response, &age, &qhash, pbMessage ? &(*pbMessage) : nullptr));
c15ff3df
RG
1909 }
1910
d9d3f9c1
RG
1911 if (cacheHit) {
1912#ifdef HAVE_PROTOBUF
63341e8d 1913 if(t_protobufServer && (!luaconfsLocal->protobufTaggedOnly || !pbMessage->getAppliedPolicy().empty() || !pbMessage->getPolicyTags().empty())) {
5cc8371b 1914 Netmask requestorNM(source, source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
e1c8a4bb 1915 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
d362f7c1
RG
1916 pbMessage->update(uniqueId, &requestor, &destination, false, dh->id);
1917 pbMessage->setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1918 pbMessage->setQueryTime(g_now.tv_sec, g_now.tv_usec);
1919 pbMessage->setRequestorId(requestorId);
1920 pbMessage->setDeviceId(deviceId);
63341e8d 1921 protobufLogResponse(t_protobufServer, *pbMessage);
02b47f43 1922 }
d9d3f9c1 1923#endif /* HAVE_PROTOBUF */
49a3500d 1924 if(!g_quiet)
e6a9dde5 1925 g_log<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<endl;
8f7473d7 1926
1bc3c142
BH
1927 g_stats.packetCacheHits++;
1928 SyncRes::s_queries++;
1929 ageDNSPacket(response, age);
b71b60ee 1930 struct msghdr msgh;
1931 struct iovec iov;
1932 char cbuf[256];
1933 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
2c0af54f
PD
1934 msgh.msg_control=NULL;
1935
cbc03320 1936 if(g_fromtosockets.count(fd)) {
fbe2a2e0 1937 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
b71b60ee 1938 }
cbc03320 1939 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
e6a9dde5 1940 g_log<<Logger::Warning<<"Sending UDP reply to client "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" failed with: "<<strerror(errno)<<endl;
b71b60ee 1941
97bee66d 1942 if(response.length() >= sizeof(struct dnsheader)) {
dd079764
RG
1943 struct dnsheader tmpdh;
1944 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
5cc8371b 1945 updateResponseStats(tmpdh.rcode, source, response.length(), 0, 0);
97bee66d 1946 }
08f3f638 1947 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
19178da9 1948 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
1bc3c142
BH
1949 return 0;
1950 }
3ddb9247 1951 }
1bc3c142 1952 catch(std::exception& e) {
e6a9dde5 1953 g_log<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1bc3c142
BH
1954 return 0;
1955 }
3ddb9247 1956
f26bf547 1957 if(t_pdl) {
5cc8371b 1958 if(t_pdl->ipfilter(source, destination, *dh)) {
4ea94941 1959 if(!g_quiet)
e6a9dde5 1960 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<" based on policy"<<endl;
4ea94941 1961 g_stats.policyDrops++;
1962 return 0;
1963 }
1964 }
1965
1bc3c142 1966 if(MT->numProcesses() > g_maxMThreads) {
461df9d2 1967 if(!g_quiet)
e6a9dde5 1968 g_log<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<source.toStringWithPort()<<(source != fromaddr ? " (via "+fromaddr.toStringWithPort()+")" : "")<<", over capacity"<<endl;
461df9d2 1969
1bc3c142
BH
1970 g_stats.overCapacityDrops++;
1971 return 0;
1972 }
3ddb9247 1973
5164bac3 1974 DNSComboWriter* dc = new DNSComboWriter(question, g_now, std::move(policyTags), std::move(data));
1bc3c142 1975 dc->setSocket(fd);
49a3500d 1976 dc->d_tag=ctag;
e9f63d47 1977 dc->d_qhash=qhash;
5cc8371b
RG
1978 dc->setRemote(fromaddr);
1979 dc->setSource(source);
b71b60ee 1980 dc->setLocal(destaddr);
5cc8371b 1981 dc->setDestination(destination);
1bc3c142 1982 dc->d_tcp=false;
b40562da
RG
1983 dc->d_ecsFound = ecsFound;
1984 dc->d_ecsParsed = ecsParsed;
1985 dc->d_ednssubnet = ednssubnet;
70fb28d9
RG
1986 dc->d_ttlCap = ttlCap;
1987 dc->d_variable = variable;
aa7929a3 1988#ifdef HAVE_PROTOBUF
63341e8d 1989 if (t_protobufServer || t_outgoingProtobufServer) {
5164bac3 1990 dc->d_uuid = std::move(uniqueId);
d9d3f9c1 1991 }
67e31ebe 1992 dc->d_requestorId = requestorId;
590388d2 1993 dc->d_deviceId = deviceId;
aa7929a3
RG
1994#endif
1995
1bc3c142
BH
1996 MT->makeThread(startDoResolve, (void*) dc); // deletes dc
1997 return 0;
3ddb9247
PD
1998}
1999
b71b60ee 2000
d187038c 2001static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
5db529f8 2002{
a683e8bd 2003 ssize_t len;
04896b99 2004 static thread_local std::string data;
5db529f8 2005 ComboAddress fromaddr;
b71b60ee 2006 struct msghdr msgh;
2007 struct iovec iov;
2008 char cbuf[256];
390f1dab 2009 bool firstQuery = true;
b71b60ee 2010
04896b99 2011 data.resize(1500);
b71b60ee 2012 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
04896b99 2013 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), &data[0], data.size(), &fromaddr);
b71b60ee 2014
a5886e6a 2015 for(size_t counter = 0; counter < s_maxUDPQueriesPerRound; counter++)
b71b60ee 2016 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
390f1dab
RG
2017
2018 firstQuery = false;
2019
04896b99
RG
2020 if (static_cast<size_t>(len) < sizeof(dnsheader)) {
2021 g_stats.ignoredCount++;
2022 if (!g_quiet) {
2023 g_log<<Logger::Error<<"Ignoring too-short ("<<std::to_string(len)<<") query from "<<fromaddr.toString()<<endl;
2024 }
2025 return;
2026 }
2027
92011b8f 2028 if(t_remotes)
2029 t_remotes->push_back(fromaddr);
b23b8614 2030
49a699c4 2031 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
3ddb9247 2032 if(!g_quiet)
e6a9dde5 2033 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
2914b022 2034
5db529f8 2035 g_stats.unauthorizedUDP++;
a9af3782 2036 return;
5db529f8 2037 }
15c01deb 2038 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
81859ba5 2039 if(!fromaddr.sin4.sin_port) { // also works for IPv6
3ddb9247 2040 if(!g_quiet)
e6a9dde5 2041 g_log<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
81859ba5 2042
2043 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
2044 return;
2045 }
5db529f8 2046 try {
04896b99
RG
2047 data.resize(static_cast<size_t>(len));
2048 dnsheader* dh=(dnsheader*)&data[0];
3ddb9247 2049
b23b8614 2050 if(dh->qr) {
048f5db6 2051 g_stats.ignoredCount++;
4957a608 2052 if(g_logCommonErrors)
e6a9dde5 2053 g_log<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
5db529f8 2054 }
3abcdab2 2055 else if(dh->opcode) {
048f5db6 2056 g_stats.ignoredCount++;
3abcdab2 2057 if(g_logCommonErrors)
e6a9dde5 2058 g_log<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
3abcdab2 2059 }
5db529f8 2060 else {
b71b60ee 2061 struct timeval tv={0,0};
2062 HarvestTimestamp(&msgh, &tv);
2063 ComboAddress dest;
d38e2ba9 2064 dest.reset(); // this makes sure we ignore this address if not returned by recvmsg above
a6147cd2 2065 auto loc = rplookup(g_listenSocketsAddresses, fd);
2066 if(HarvestDestinationAddress(&msgh, &dest)) {
2067 // but.. need to get port too
2068 if(loc)
2069 dest.sin4.sin_port = loc->sin4.sin_port;
2070 }
2071 else {
2072 if(loc) {
2073 dest = *loc;
2074 }
2075 else {
2076 dest.sin4.sin_family = fromaddr.sin4.sin_family;
a683e8bd
RG
2077 socklen_t slen = dest.getSocklen();
2078 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
a6147cd2 2079 }
2080 }
232f0877 2081 if(g_weDistributeQueries)
04896b99 2082 distributeAsyncFunction(data, boost::bind(doProcessUDPQuestion, data, fromaddr, dest, tv, fd));
232f0877 2083 else
04896b99 2084 doProcessUDPQuestion(data, fromaddr, dest, tv, fd);
5db529f8
BH
2085 }
2086 }
2087 catch(MOADNSException& mde) {
3ddb9247 2088 g_stats.clientParseError++;
84e66a59 2089 if(g_logCommonErrors)
e6a9dde5 2090 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
5db529f8 2091 }
0b602819
KM
2092 catch(std::runtime_error& e) {
2093 g_stats.clientParseError++;
2094 if(g_logCommonErrors)
e6a9dde5 2095 g_log<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
5db529f8
BH
2096 }
2097 }
ac0e821b
BH
2098 else {
2099 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
390f1dab 2100 if(firstQuery && errno == EAGAIN)
9326cae1 2101 g_stats.noPacketError++;
390f1dab 2102
bf3b0cec 2103 break;
ac0e821b 2104 }
5db529f8
BH
2105}
2106
810ff705 2107static void makeTCPServerSockets(unsigned int threadId)
9c495589 2108{
37d3f960 2109 int fd;
f28307ad 2110 vector<string>locals;
2e3d8a19 2111 stringtok(locals,::arg()["local-address"]," ,");
9c495589 2112
f28307ad 2113 if(locals.empty())
3f81d239 2114 throw PDNSException("No local address specified");
3ddb9247 2115
f28307ad 2116 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
2117 ServiceTuple st;
2118 st.port=::arg().asNum("local-port");
2119 parseService(*i, st);
3ddb9247 2120
32252594
BH
2121 ComboAddress sin;
2122
d38e2ba9 2123 sin.reset();
37d3f960 2124 sin.sin4.sin_family = AF_INET;
32252594 2125 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 2126 sin.sin6.sin6_family = AF_INET6;
f71bc087 2127 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 2128 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
37d3f960
BH
2129 }
2130
2131 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
3ddb9247 2132 if(fd<0)
3f81d239 2133 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
f28307ad 2134
3897b9e1 2135 setCloseOnExec(fd);
a903b39c 2136
f28307ad 2137 int tmp=1;
810ff705 2138 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
e6a9dde5 2139 g_log<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
c8ddb7c2 2140 exit(1);
f28307ad 2141 }
0dfa94ab 2142 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
e6a9dde5 2143 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
0dfa94ab 2144 }
2145
c8ddb7c2 2146#ifdef TCP_DEFER_ACCEPT
38ac0821 2147 if(setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
37d3f960 2148 if(i==locals.begin())
e6a9dde5 2149 g_log<<Logger::Error<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
c8ddb7c2
BH
2150 }
2151#endif
2152
fec7dd5a
SS
2153 if( ::arg().mustDo("non-local-bind") )
2154 Utility::setBindAny(AF_INET, fd);
2155
2332f42d 2156#ifdef SO_REUSEPORT
810ff705
RG
2157 if(g_reusePort) {
2158 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
2332f42d 2159 throw PDNSException("SO_REUSEPORT: "+stringerror());
2160 }
2161#endif
2162
0735b17e
RG
2163 if (::arg().asNum("tcp-fast-open") > 0) {
2164#ifdef TCP_FASTOPEN
2165 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
2166 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
e6a9dde5 2167 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno)<<endl;
0735b17e
RG
2168 }
2169#else
e6a9dde5 2170 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
0735b17e
RG
2171#endif
2172 }
2173
32252594 2174 sin.sin4.sin_port = htons(st.port);
a683e8bd 2175 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
3ddb9247 2176 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
3f81d239 2177 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
3ddb9247 2178
3897b9e1 2179 setNonBlocking(fd);
49a699c4 2180 setSocketSendBuffer(fd, 65000);
37d3f960 2181 listen(fd, 128);
810ff705 2182 deferredAdds[threadId].push_back(make_pair(fd, handleNewTCPQuestion));
c2136bf0 2183 g_tcpListenSockets.push_back(fd);
84433b79 2184 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
2185 // - fd is not that which we know here, but returned from accept()
3ddb9247 2186 if(sin.sin4.sin_family == AF_INET)
e6a9dde5 2187 g_log<<Logger::Error<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 2188 else
e6a9dde5 2189 g_log<<Logger::Error<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 2190 }
9c495589
BH
2191}
2192
810ff705 2193static void makeUDPServerSockets(unsigned int threadId)
288f4aa9 2194{
fec7dd5a 2195 int one=1;
f28307ad 2196 vector<string>locals;
2e3d8a19 2197 stringtok(locals,::arg()["local-address"]," ,");
288f4aa9 2198
f28307ad 2199 if(locals.empty())
3f81d239 2200 throw PDNSException("No local address specified");
3ddb9247 2201
f28307ad 2202 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
32252594
BH
2203 ServiceTuple st;
2204 st.port=::arg().asNum("local-port");
2205 parseService(*i, st);
2206
37d3f960 2207 ComboAddress sin;
996c89cc 2208
d38e2ba9 2209 sin.reset();
37d3f960 2210 sin.sin4.sin_family = AF_INET;
32252594 2211 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
37d3f960 2212 sin.sin6.sin6_family = AF_INET6;
f71bc087 2213 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
3ddb9247 2214 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
37d3f960 2215 }
3ddb9247 2216
bb4bdbaf 2217 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
d3b4137e 2218 if(fd < 0) {
3f81d239 2219 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
d3b4137e 2220 }
915b0c39 2221 if (!setSocketTimestamps(fd))
e6a9dde5 2222 g_log<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
0dfa94ab 2223
b71b60ee 2224 if(IsAnyAddress(sin)) {
cbc03320 2225 if(sin.sin4.sin_family == AF_INET)
2226 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
2227 g_fromtosockets.insert(fd);
757d3179 2228#ifdef IPV6_RECVPKTINFO
cbc03320 2229 if(sin.sin4.sin_family == AF_INET6)
2230 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
2231 g_fromtosockets.insert(fd);
757d3179 2232#endif
0dfa94ab 2233 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
e6a9dde5 2234 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
0dfa94ab 2235 }
b71b60ee 2236 }
fec7dd5a
SS
2237 if( ::arg().mustDo("non-local-bind") )
2238 Utility::setBindAny(AF_INET6, fd);
2239
3897b9e1 2240 setCloseOnExec(fd);
a903b39c 2241
4e9a20e6 2242 setSocketReceiveBuffer(fd, 250000);
32252594 2243 sin.sin4.sin_port = htons(st.port);
37d3f960 2244
2332f42d 2245
2573d4a6 2246#ifdef SO_REUSEPORT
810ff705 2247 if(g_reusePort) {
2332f42d 2248 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
2249 throw PDNSException("SO_REUSEPORT: "+stringerror());
2250 }
2251#endif
a683e8bd 2252 socklen_t socklen=sin.getSocklen();
3ddb9247 2253 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
335da0ba 2254 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
3ddb9247 2255
3897b9e1 2256 setNonBlocking(fd);
c2136bf0 2257
810ff705 2258 deferredAdds[threadId].push_back(make_pair(fd, handleNewUDPQuestion));
40a3dd64 2259 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
3ddb9247 2260 if(sin.sin4.sin_family == AF_INET)
e6a9dde5 2261 g_log<<Logger::Error<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
aa136564 2262 else
e6a9dde5 2263 g_log<<Logger::Error<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
f28307ad 2264 }
c836dc19 2265}
caa6eefa 2266
d187038c 2267static void daemonize(void)
c836dc19
BH
2268{
2269 if(fork())
2270 exit(0); // bye bye
3ddb9247
PD
2271
2272 setsid();
c836dc19 2273
27a5ead5 2274 int i=open("/dev/null",O_RDWR); /* open stdin */
3ddb9247 2275 if(i < 0)
e6a9dde5 2276 g_log<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
27a5ead5
BH
2277 else {
2278 dup2(i,0); /* stdin */
2279 dup2(i,1); /* stderr */
2280 dup2(i,2); /* stderr */
2281 close(i);
2282 }
288f4aa9 2283}
caa6eefa 2284
d187038c 2285static void usr1Handler(int)
c75a6a9e
BH
2286{
2287 statsWanted=true;
2288}
ae1b2e98 2289
d187038c 2290static void usr2Handler(int)
9170fbaf 2291{
f1f34cc2 2292 g_quiet= !g_quiet;
2293 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
2294 ::arg().set("quiet")=g_quiet ? "" : "no";
9170fbaf
BH
2295}
2296
d187038c 2297static void doStats(void)
c75a6a9e 2298{
16beeaa4
BH
2299 static time_t lastOutputTime;
2300 static uint64_t lastQueryCount;
d299d4f5 2301
2302 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
2303 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
3ddb9247 2304
d299d4f5 2305 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
e6a9dde5 2306 g_log<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
3427fa8a
BH
2307 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
2308 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
3ddb9247
PD
2309 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
2310
e6a9dde5 2311 g_log<<Logger::Notice<<"stats: throttle map: "
3427fa8a 2312 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
3ddb9247 2313 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
e6a9dde5
PL
2314 g_log<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2315 g_log<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
525b8a7c 2316 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
e6a9dde5 2317 g_log<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
3427fa8a 2318 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
81883dcc 2319
e6a9dde5 2320 //g_log<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
16beeaa4 2321 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
3ddb9247 2322
e6a9dde5 2323 g_log<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
16beeaa4 2324 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
3ddb9247 2325
16beeaa4
BH
2326 time_t now = time(0);
2327 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
e6a9dde5 2328 g_log<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
16beeaa4
BH
2329 }
2330 lastOutputTime = now;
2331 lastQueryCount = SyncRes::s_queries;
c75a6a9e 2332 }
3ddb9247 2333 else if(statsWanted)
e6a9dde5 2334 g_log<<Logger::Notice<<"stats: no stats yet!"<<endl;
7becf07f 2335
c75a6a9e
BH
2336 statsWanted=false;
2337}
c836dc19 2338
29f0b1ce 2339static void houseKeeping(void *)
c836dc19 2340{
cb1523d1 2341 static thread_local time_t last_rootupdate, last_prune, last_secpoll;
3337c2f7
RG
2342 static thread_local int cleanCounter=0;
2343 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
cc59bce6 2344 try {
2345 if(s_running)
2346 return;
2347 s_running=true;
3ddb9247 2348
cc59bce6 2349 struct timeval now;
2350 Utility::gettimeofday(&now, 0);
3ddb9247
PD
2351
2352 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
cc59bce6 2353 DTime dt;
2354 dt.setTimeval(now);
a6f7f5fe 2355 t_RC->doPrune(g_maxCacheEntries / g_numThreads); // this function is local to a thread, so fine anyhow
2356 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
3ddb9247 2357
a6f7f5fe 2358 SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
3ddb9247 2359
cc59bce6 2360 if(!((cleanCounter++)%40)) { // this is a full scan!
2361 time_t limit=now.tv_sec-300;
a712cb56 2362 SyncRes::pruneNSSpeeds(limit);
cc59bce6 2363 }
2364 last_prune=time(0);
d67620e4 2365 }
3ddb9247 2366
cc59bce6 2367 if(now.tv_sec - last_rootupdate > 7200) {
30ee601a 2368 int res = SyncRes::getRootNS(g_now, nullptr);
7836f7b4
PL
2369 if (!res)
2370 last_rootupdate=now.tv_sec;
cc59bce6 2371 }
3ddb9247 2372
cb1523d1 2373 if(t_id == s_distributorThreadID) {
3ddb9247 2374
cc59bce6 2375 if(now.tv_sec - last_secpoll >= 3600) {
2376 try {
2377 doSecPoll(&last_secpoll);
2378 }
581d4ea3 2379 catch(std::exception& e)
2380 {
e6a9dde5 2381 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
581d4ea3 2382 }
47e9b74f 2383 catch(PDNSException& e)
2384 {
e6a9dde5 2385 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
47e9b74f 2386 }
d0992a65
CH
2387 catch(ImmediateServFailException &e)
2388 {
e6a9dde5 2389 g_log<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
d0992a65 2390 }
47e9b74f 2391 catch(...)
2392 {
e6a9dde5 2393 g_log<<Logger::Error<<"Exception while performing security poll"<<endl;
47e9b74f 2394 }
2395
18b73338 2396 }
d67620e4 2397 }
cc59bce6 2398 s_running=false;
d67620e4 2399 }
cc59bce6 2400 catch(PDNSException& ae)
2401 {
2402 s_running=false;
e6a9dde5 2403 g_log<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
cc59bce6 2404 throw;
2405 }
779828c4 2406}
d6d5dea7 2407
d187038c 2408static void makeThreadPipes()
49a699c4 2409{
c3828c03 2410 for(unsigned int n=0; n < g_numThreads; ++n) {
49a699c4
BH
2411 struct ThreadPipeSet tps;
2412 int fd[2];
2413 if(pipe(fd) < 0)
2414 unixDie("Creating pipe for inter-thread communications");
3ddb9247 2415
49a699c4
BH
2416 tps.readToThread = fd[0];
2417 tps.writeToThread = fd[1];
3ddb9247 2418
49a699c4
BH
2419 if(pipe(fd) < 0)
2420 unixDie("Creating pipe for inter-thread communications");
2421 tps.readFromThread = fd[0];
2422 tps.writeFromThread = fd[1];
3ddb9247 2423
cf8cda18
RG
2424 if(pipe(fd) < 0)
2425 unixDie("Creating pipe for inter-thread communications");
2426 tps.readQueriesToThread = fd[0];
2427 tps.writeQueriesToThread = fd[1];
d10307c5
RG
2428
2429 if (!setNonBlocking(tps.writeQueriesToThread)) {
2430 unixDie("Making pipe for inter-thread communications non-blocking");
2431 }
cf8cda18 2432
49a699c4
BH
2433 g_pipes.push_back(tps);
2434 }
2435}
2436
00c9b8c1
BH
2437struct ThreadMSG
2438{
2439 pipefunc_t func;
2440 bool wantAnswer;
2441};
2442
b4e76a18 2443void broadcastFunction(const pipefunc_t& func)
49a699c4 2444{
387b9ca6
RG
2445 /* This function might be called by the worker with t_id 0 during startup
2446 for the initialization of ACLs and domain maps */
faf580f5 2447 if (t_id != s_handlerThreadID && t_id != s_distributorThreadID) {
d77abca1
RG
2448 g_log<<Logger::Error<<"broadcastFunction() has been called by a worker ("<<t_id<<")"<<endl;
2449 exit(1);
2450 }
2451
387b9ca6
RG
2452 if (t_id == s_handlerThreadID) {
2453 /* the distributor will call itself below, but if we are the handler thread,
2454 call the function ourselves to update the ACL or domain maps for example */
2455 func();
2456 }
b4e76a18 2457
d77abca1 2458 int n = 0;
1dc8f4d0 2459 for(ThreadPipeSet& tps : g_pipes)
49a699c4
BH
2460 {
2461 if(n++ == t_id) {
b4e76a18 2462 func(); // don't write to ourselves!
49a699c4
BH
2463 continue;
2464 }
3ddb9247 2465
00c9b8c1
BH
2466 ThreadMSG* tmsg = new ThreadMSG();
2467 tmsg->func = func;
2468 tmsg->wantAnswer = true;
b841314c
RG
2469 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2470 delete tmsg;
49a699c4 2471 unixDie("write to thread pipe returned wrong size or error");
b841314c 2472 }
3ddb9247 2473
49467864 2474 string* resp = nullptr;
49a699c4
BH
2475 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2476 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 2477
49a699c4 2478 if(resp) {
49a699c4 2479 delete resp;
49467864 2480 resp = nullptr;
49a699c4
BH
2481 }
2482 }
2483}
06ea9015 2484
387b9ca6 2485// This function is only called by the distributor thread, when pdns-distributes-queries is set
8171ab83 2486void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
00c9b8c1 2487{
faf580f5 2488 if (t_id != s_distributorThreadID) {
d77abca1
RG
2489 g_log<<Logger::Error<<"distributeAsyncFunction() has been called by a worker ("<<t_id<<")"<<endl;
2490 exit(1);
2491 }
2492
8171ab83 2493 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
06ea9015 2494 unsigned int target = 1 + (hash % (g_pipes.size()-1));
2495
de0b293b 2496 if(target == static_cast<unsigned int>(s_distributorThreadID)) {
d77abca1
RG
2497 g_log<<Logger::Error<<"distributeAsyncFunction() tried to assign a query to the distributor"<<endl;
2498 exit(1);
00c9b8c1 2499 }
d77abca1 2500
3ddb9247 2501 ThreadPipeSet& tps = g_pipes[target];
00c9b8c1
BH
2502 ThreadMSG* tmsg = new ThreadMSG();
2503 tmsg->func = func;
2504 tmsg->wantAnswer = false;
3ddb9247 2505
cf8cda18
RG
2506 ssize_t written = write(tps.writeQueriesToThread, &tmsg, sizeof(tmsg));
2507 if (written > 0) {
2508 if (static_cast<size_t>(written) != sizeof(tmsg)) {
2509 delete tmsg;
2510 unixDie("write to thread pipe returned wrong size or error");
2511 }
2512 }
2513 else {
2514 int error = errno;
b841314c 2515 delete tmsg;
cf8cda18
RG
2516 if (error == EAGAIN || error == EWOULDBLOCK) {
2517 g_stats.queryPipeFullDrops++;
2518 } else {
17634427 2519 unixDie("write to thread pipe returned wrong size or error:" + std::to_string(error));
cf8cda18 2520 }
b841314c 2521 }
00c9b8c1 2522}
3427fa8a 2523
d187038c 2524static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
49a699c4 2525{
f26bf547 2526 ThreadMSG* tmsg = nullptr;
3ddb9247 2527
cf8cda18 2528 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread || fd == readQueriesToThread
49a699c4
BH
2529 unixDie("read from thread pipe returned wrong size or error");
2530 }
3ddb9247 2531
2f22827a 2532 void *resp=0;
2533 try {
2534 resp = tmsg->func();
2535 }
2536 catch(std::exception& e) {
6d2010a8 2537 if(g_logCommonErrors)
e6a9dde5 2538 g_log<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2f22827a 2539 }
2540 catch(PDNSException& e) {
6d2010a8 2541 if(g_logCommonErrors)
e6a9dde5 2542 g_log<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2f22827a 2543 }
d7c676a5
RG
2544 if(tmsg->wantAnswer) {
2545 if(write(g_pipes[t_id].writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
2546 delete tmsg;
00c9b8c1 2547 unixDie("write to thread pipe returned wrong size or error");
d7c676a5
RG
2548 }
2549 }
3ddb9247 2550
00c9b8c1 2551 delete tmsg;
49a699c4 2552}
09e6702a 2553
13034931
BH
2554template<class T> void *voider(const boost::function<T*()>& func)
2555{
2556 return func();
2557}
2558
b3b5459d
BH
2559vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
2560{
2561 a.insert(a.end(), b.begin(), b.end());
2562 return a;
2563}
2564
92011b8f 2565vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
2566{
2567 a.insert(a.end(), b.begin(), b.end());
2568 return a;
2569}
2570
3ddb9247
PD
2571vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
2572{
2573 a.insert(a.end(), b.begin(), b.end());
2574 return a;
2575}
2576
92011b8f 2577
387b9ca6
RG
2578/*
2579 This function should only be called by the handler to gather metrics, wipe the cache,
2580 reload the Lua script (not the Lua config) or change the current trace regex */
b4e76a18 2581template<class T> T broadcastAccFunction(const boost::function<T*()>& func)
3427fa8a 2582{
faf580f5 2583 if (t_id != s_handlerThreadID) {
d77abca1
RG
2584 g_log<<Logger::Error<<"broadcastFunction has been called by a worker ("<<t_id<<")"<<endl;
2585 exit(1);
d77abca1
RG
2586 }
2587
3427fa8a 2588 T ret=T();
1dc8f4d0 2589 for(ThreadPipeSet& tps : g_pipes)
3427fa8a 2590 {
00c9b8c1
BH
2591 ThreadMSG* tmsg = new ThreadMSG();
2592 tmsg->func = boost::bind(voider<T>, func);
2593 tmsg->wantAnswer = true;
3ddb9247 2594
b841314c
RG
2595 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2596 delete tmsg;
3427fa8a 2597 unixDie("write to thread pipe returned wrong size or error");
b841314c 2598 }
3ddb9247 2599
49467864 2600 T* resp = nullptr;
3427fa8a
BH
2601 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2602 unixDie("read from thread pipe returned wrong size or error");
3ddb9247 2603
3427fa8a 2604 if(resp) {
3427fa8a
BH
2605 ret += *resp;
2606 delete resp;
49467864 2607 resp = nullptr;
3427fa8a
BH
2608 }
2609 }
2610 return ret;
2611}
2612
b4e76a18
RG
2613template string broadcastAccFunction(const boost::function<string*()>& fun); // explicit instantiation
2614template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun); // explicit instantiation
2615template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun); // explicit instantiation
2616template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun); // explicit instantiation
3427fa8a 2617
d187038c 2618static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
09e6702a
BH
2619{
2620 string remote;
2621 string msg=s_rcc.recv(&remote);
2622 RecursorControlParser rcp;
2623 RecursorControlParser::func_t* command;
3ddb9247 2624
09e6702a 2625 string answer=rcp.getAnswer(msg, &command);
f0f3f0b0
PL
2626
2627 // If we are inside a chroot, we need to strip
2628 if (!arg()["chroot"].empty()) {
a683e8bd 2629 size_t len = arg()["chroot"].length();
f0f3f0b0
PL
2630 remote = remote.substr(len);
2631 }
2632
ab5c053d
BH
2633 try {
2634 s_rcc.send(answer, &remote);
2635 command();
2636 }
fdbf35ac 2637 catch(std::exception& e) {
e6a9dde5 2638 g_log<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
ab5c053d 2639 }
3f81d239 2640 catch(PDNSException& ae) {
e6a9dde5 2641 g_log<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
ab5c053d 2642 }
09e6702a
BH
2643}
2644
d187038c 2645static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2646{
0b18b22e 2647 PacketID* pident=any_cast<PacketID>(&var);
667f7e60 2648 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
09e6702a 2649
667f7e60 2650 shared_array<char> buffer(new char[pident->inNeeded]);
09e6702a 2651
a683e8bd 2652 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
09e6702a 2653 if(ret > 0) {
667f7e60 2654 pident->inMSG.append(&buffer[0], &buffer[ret]);
a683e8bd 2655 pident->inNeeded-=(size_t)ret;
825fa717 2656 if(!pident->inNeeded || pident->inIncompleteOkay) {
667f7e60
BH
2657 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
2658 PacketID pid=*pident;
2659 string msg=pident->inMSG;
3ddb9247 2660
bb4bdbaf 2661 t_fdm->removeReadFD(fd);
3ddb9247 2662 MT->sendEvent(pid, &msg);
09e6702a
BH
2663 }
2664 else {
667f7e60 2665 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
09e6702a
BH
2666 }
2667 }
2668 else {
667f7e60 2669 PacketID tmp=*pident;
bb4bdbaf 2670 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
09e6702a
BH
2671 string empty;
2672 MT->sendEvent(tmp, &empty); // this conveys error status
2673 }
2674}
2675
d187038c 2676static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2677{
0b18b22e 2678 PacketID* pid=any_cast<PacketID>(&var);
a683e8bd 2679 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
09e6702a 2680 if(ret > 0) {
a683e8bd 2681 pid->outPos+=(ssize_t)ret;
667f7e60
BH
2682 if(pid->outPos==pid->outMSG.size()) {
2683 PacketID tmp=*pid;
bb4bdbaf 2684 t_fdm->removeWriteFD(fd);
09e6702a
BH
2685 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
2686 }
2687 }
2688 else { // error or EOF
667f7e60 2689 PacketID tmp(*pid);
bb4bdbaf 2690 t_fdm->removeWriteFD(fd);
09e6702a 2691 string sent;
998a4334 2692 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
09e6702a
BH
2693 }
2694}
2695
34801ab1 2696// resend event to everybody chained onto it
d187038c 2697static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
34801ab1
BH
2698{
2699 if(iter->key.chain.empty())
2700 return;
e27e91a8 2701 // cerr<<"doResends called!\n";
34801ab1
BH
2702 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
2703 resend.fd=-1;
2704 resend.id=*i;
e27e91a8 2705 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
4665c31e 2706
34801ab1
BH
2707 MT->sendEvent(resend, &content);
2708 g_stats.chainResends++;
34801ab1
BH
2709 }
2710}
2711
d187038c 2712static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
09e6702a 2713{
600fc20b 2714 PacketID pid=any_cast<PacketID>(var);
a683e8bd 2715 ssize_t len;
fae8fe07
RG
2716 std::string packet;
2717 packet.resize(g_outgoingEDNSBufsize);
996c89cc 2718 ComboAddress fromaddr;
09e6702a
BH
2719 socklen_t addrlen=sizeof(fromaddr);
2720
fae8fe07 2721 len=recvfrom(fd, &packet.at(0), packet.size(), 0, (sockaddr *)&fromaddr, &addrlen);
c1da7976 2722
a683e8bd 2723 if(len < (ssize_t) sizeof(dnsheader)) {
998a4334 2724 if(len < 0)
996c89cc 2725 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
09e6702a 2726 else {
3ddb9247 2727 g_stats.serverParseError++;
09e6702a 2728 if(g_logCommonErrors)
e6a9dde5 2729 g_log<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
e44d9fa7 2730 ": packet smaller than DNS header"<<endl;
998a4334 2731 }
34801ab1 2732
49a699c4 2733 t_udpclientsocks->returnSocket(fd);
34801ab1
BH
2734 string empty;
2735
2736 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
3ddb9247 2737 if(iter != MT->d_waiters.end())
34801ab1 2738 doResends(iter, pid, empty);
3ddb9247 2739
34801ab1 2740 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
998a4334 2741 return;
3ddb9247 2742 }
998a4334 2743
fae8fe07 2744 packet.resize(len);
998a4334 2745 dnsheader dh;
fae8fe07 2746 memcpy(&dh, &packet.at(0), sizeof(dh));
3ddb9247 2747
6da3b3ad
PD
2748 PacketID pident;
2749 pident.remote=fromaddr;
2750 pident.id=dh.id;
2751 pident.fd=fd;
34801ab1 2752
33a928af 2753 if(!dh.qr && g_logCommonErrors) {
e6a9dde5 2754 g_log<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
6da3b3ad
PD
2755 }
2756
2757 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
2758 !dh.qr) { // one weird server
2759 pident.domain.clear();
2760 pident.type = 0;
2761 }
2762 else {
2763 try {
0b31e67e 2764 if(len > 12)
fae8fe07 2765 pident.domain=DNSName(&packet.at(0), len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
6da3b3ad
PD
2766 }
2767 catch(std::exception& e) {
2768 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
e6a9dde5 2769 g_log<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
6da3b3ad 2770 return;
34801ab1 2771 }
6da3b3ad 2772 }
34801ab1 2773
6da3b3ad
PD
2774 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
2775 if(iter != MT->d_waiters.end()) {
2776 doResends(iter, pident, packet);
2777 }
c1da7976 2778
6da3b3ad 2779retryWithName:
4957a608 2780
6da3b3ad
PD
2781 if(!MT->sendEvent(pident, &packet)) {
2782 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
2783 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
2784 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
e325f20c 2785 pident.domain == mthread->key.domain) {
6da3b3ad 2786 mthread->key.nearMisses++;
998a4334 2787 }
6da3b3ad
PD
2788
2789 // be a bit paranoid here since we're weakening our matching
3ddb9247 2790 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
6da3b3ad
PD
2791 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
2792 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
2793 pident.domain = mthread->key.domain;
2794 pident.type = mthread->key.type;
2795 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
d4fb76e9 2796 }
09e6702a 2797 }
6da3b3ad
PD
2798 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
2799 if(g_logCommonErrors) {
e6a9dde5 2800 g_log<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
d8f6d49f 2801 }
09e6702a 2802 }
6da3b3ad
PD
2803 else if(fd >= 0) {
2804 t_udpclientsocks->returnSocket(fd);
2805 }
09e6702a
BH
2806}
2807
1f4abb20
BH
2808FDMultiplexer* getMultiplexer()
2809{
2810 FDMultiplexer* ret;
f26bf547 2811 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
1f4abb20 2812 try {
f26bf547 2813 ret=i.second();
1f4abb20
BH
2814 return ret;
2815 }
98d0ee4a 2816 catch(FDMultiplexerException &fe) {
e6a9dde5 2817 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
98d0ee4a
BH
2818 }
2819 catch(...) {
e6a9dde5 2820 g_log<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
98d0ee4a 2821 }
1f4abb20 2822 }
e6a9dde5 2823 g_log<<Logger::Error<<"No working multiplexer found!"<<endl;
1f4abb20
BH
2824 exit(1);
2825}
2826
3ddb9247 2827
d187038c 2828static string* doReloadLuaScript()
4485aa35 2829{
674cf0f6 2830 string fname= ::arg()["lua-dns-script"];
4485aa35 2831 try {
674cf0f6 2832 if(fname.empty()) {
f26bf547 2833 t_pdl.reset();
e6a9dde5 2834 g_log<<Logger::Error<<t_id<<" Unloaded current lua script"<<endl;
0f39c1a3 2835 return new string("unloaded\n");
4485aa35
BH
2836 }
2837 else {
9694e14f
AT
2838 t_pdl = std::make_shared<RecursorLua4>();
2839 t_pdl->loadFile(fname);
4485aa35
BH
2840 }
2841 }
fdbf35ac 2842 catch(std::exception& e) {
e6a9dde5 2843 g_log<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
0f39c1a3 2844 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
4485aa35 2845 }
3ddb9247 2846
e6a9dde5 2847 g_log<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
0f39c1a3 2848 return new string("(re)loaded '"+fname+"'\n");
4485aa35
BH
2849}
2850
49a699c4
BH
2851string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2852{
3ddb9247 2853 if(begin != end)
49a699c4 2854 ::arg().set("lua-dns-script") = *begin;
3ddb9247 2855
0f39c1a3 2856 return broadcastAccFunction<string>(doReloadLuaScript);
3ddb9247 2857}
49a699c4 2858
d187038c 2859static string* pleaseUseNewTraceRegex(const std::string& newRegex)
77499b05
BH
2860try
2861{
2862 if(newRegex.empty()) {
f26bf547 2863 t_traceRegex.reset();
77499b05
BH
2864 return new string("unset\n");
2865 }
2866 else {
f26bf547 2867 t_traceRegex = std::make_shared<Regex>(newRegex);
77499b05
BH
2868 return new string("ok\n");
2869 }
2870}
3f81d239 2871catch(PDNSException& ae)
77499b05
BH
2872{
2873 return new string(ae.reason+"\n");
2874}
2875
2876string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2877{
2878 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
2879}
2880
4e9a20e6 2881static void checkLinuxIPv6Limits()
2882{
2883#ifdef __linux__
2884 string line;
2885 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
335da0ba 2886 int lim=std::stoi(line);
4e9a20e6 2887 if(lim < 16384) {
e6a9dde5 2888 g_log<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
4e9a20e6 2889 }
2890 }
2891#endif
2892}
36849ff2 2893static void checkOrFixFDS()
4e9a20e6 2894{
c0063e60 2895 unsigned int availFDs=getFilenumLimit();
2896 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
2897
2898 if(wantFDs > availFDs) {
067ad20e 2899 unsigned int hardlimit= getFilenumLimit(true);
2900 if(hardlimit >= wantFDs) {
c0063e60 2901 setFilenumLimit(wantFDs);
e6a9dde5 2902 g_log<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
36849ff2 2903 }
2904 else {
067ad20e 2905 int newval = (hardlimit - 25) / g_numWorkerThreads;
e6a9dde5 2906 g_log<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
36849ff2 2907 g_maxMThreads = newval;
067ad20e 2908 setFilenumLimit(hardlimit);
36849ff2 2909 }
2910 }
4e9a20e6 2911}
77499b05 2912
d77abca1 2913static void* recursorThread(int tid, bool worker);
51e2144e 2914
f26bf547 2915static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
49a699c4
BH
2916{
2917 t_allowFrom = ng;
f26bf547 2918 return nullptr;
49a699c4
BH
2919}
2920
dbd23fc2
BH
2921int g_argc;
2922char** g_argv;
2923
18af64a8 2924void parseACLs()
f7c1d4e3 2925{
18af64a8 2926 static bool l_initialized;
3ddb9247 2927
49a699c4 2928 if(l_initialized) { // only reload configuration file on second call
18af64a8 2929 string configname=::arg()["config-dir"]+"/recursor.conf";
3e63da83
JR
2930 if(::arg()["config-name"]!="") {
2931 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
2932 }
18af64a8 2933 cleanSlashes(configname);
3ddb9247
PD
2934
2935 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
7e818521 2936 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
49a699c4 2937 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
242b90e1 2938 ::arg().preParseFile(configname.c_str(), "include-dir");
829849d6
AT
2939 ::arg().preParse(g_argc, g_argv, "include-dir");
2940
2941 // then process includes
2942 std::vector<std::string> extraConfigs;
242b90e1
AT
2943 ::arg().gatherIncludes(extraConfigs);
2944
1dc8f4d0 2945 for(const std::string& fn : extraConfigs) {
7e818521 2946 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
2947 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
2948 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
2949 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
829849d6 2950 }
ca2c884c
AT
2951
2952 ::arg().preParse(g_argc, g_argv, "allow-from-file");
2953 ::arg().preParse(g_argc, g_argv, "allow-from");
f27e6356 2954 }
49a699c4 2955
f26bf547
RG
2956 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
2957 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
3ddb9247 2958
2c95fc65
BH
2959 if(!::arg()["allow-from-file"].empty()) {
2960 string line;
2c95fc65
BH
2961 ifstream ifs(::arg()["allow-from-file"].c_str());
2962 if(!ifs) {
9c61b9d0 2963 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2c95fc65
BH
2964 }
2965
2966 string::size_type pos;
2967 while(getline(ifs,line)) {
2968 pos=line.find('#');
2969 if(pos!=string::npos)
2970 line.resize(pos);
2971 trim(line);
2972 if(line.empty())
2973 continue;
2974
18af64a8 2975 allowFrom->addMask(line);
2c95fc65 2976 }
e6a9dde5 2977 g_log<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2c95fc65
BH
2978 }
2979 else if(!::arg()["allow-from"].empty()) {
f7c1d4e3
BH
2980 vector<string> ips;
2981 stringtok(ips, ::arg()["allow-from"], ", ");
3ddb9247 2982
e6a9dde5 2983 g_log<<Logger::Warning<<"Only allowing queries from: ";
f7c1d4e3 2984 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
18af64a8 2985 allowFrom->addMask(*i);
f7c1d4e3 2986 if(i!=ips.begin())
e6a9dde5
PL
2987 g_log<<Logger::Warning<<", ";
2988 g_log<<Logger::Warning<<*i;
f7c1d4e3 2989 }
e6a9dde5 2990 g_log<<Logger::Warning<<endl;
f7c1d4e3 2991 }
49a699c4 2992 else {
3ddb9247 2993 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
e6a9dde5 2994 g_log<<Logger::Error<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
f26bf547 2995 allowFrom = nullptr;
49a699c4 2996 }
3ddb9247 2997
49a699c4 2998 g_initialAllowFrom = allowFrom;
d7dae798 2999 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
f26bf547 3000 oldAllowFrom = nullptr;
3ddb9247 3001
49a699c4 3002 l_initialized = true;
18af64a8
BH
3003}
3004
795215f2 3005
756e82cf 3006static void setupDelegationOnly()
3007{
3008 vector<string> parts;
3009 stringtok(parts, ::arg()["delegation-only"], ", \t");
3010 for(const auto& p : parts) {
9065eb05 3011 SyncRes::addDelegationOnly(DNSName(p));
756e82cf 3012 }
3013}
795215f2 3014
8fd25133
RG
3015static std::map<unsigned int, std::set<int> > parseCPUMap()
3016{
3017 std::map<unsigned int, std::set<int> > result;
3018
3019 const std::string value = ::arg()["cpu-map"];
3020
3021 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
e6a9dde5 3022 g_log<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
8fd25133
RG
3023 return result;
3024 }
3025
3026 std::vector<std::string> parts;
3027
3028 stringtok(parts, value, " \t");
3029
3030 for(const auto& part : parts) {
3031 if (part.find('=') == string::npos)
3032 continue;
3033
3034 try {
3035 auto headers = splitField(part, '=');
3036 trim(headers.first);
3037 trim(headers.second);
3038
3039 unsigned int threadId = pdns_stou(headers.first);
3040 std::vector<std::string> cpus;
3041
3042 stringtok(cpus, headers.second, ",");
3043
3044 for(const auto& cpu : cpus) {
3045 int cpuId = std::stoi(cpu);
3046
3047 result[threadId].insert(cpuId);
3048 }
3049 }
3050 catch(const std::exception& e) {
e6a9dde5 3051 g_log<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
8fd25133
RG
3052 }
3053 }
3054
3055 return result;
3056}
3057
3058static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
3059{
3060 const auto& cpuMapping = cpusMap.find(n);
3061 if (cpuMapping != cpusMap.cend()) {
3062 int rc = mapThreadToCPUList(tid, cpuMapping->second);
3063 if (rc == 0) {
e6a9dde5 3064 g_log<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
8fd25133 3065 for (const auto cpu : cpuMapping->second) {
e6a9dde5 3066 g_log<<Logger::Info<<" "<<cpu;
8fd25133 3067 }
e6a9dde5 3068 g_log<<Logger::Info<<endl;
8fd25133
RG
3069 }
3070 else {
e6a9dde5 3071 g_log<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
8fd25133 3072 for (const auto cpu : cpuMapping->second) {
e6a9dde5 3073 g_log<<Logger::Info<<" "<<cpu;
8fd25133 3074 }
e6a9dde5 3075 g_log<<Logger::Info<<strerror(rc)<<endl;
8fd25133
RG
3076 }
3077 }
3078}
3079
d187038c 3080static int serviceMain(int argc, char*argv[])
18af64a8 3081{
e6a9dde5
PL
3082 g_log.setName(s_programname);
3083 g_log.disableSyslog(::arg().mustDo("disable-syslog"));
3084 g_log.setTimestamps(::arg().mustDo("log-timestamp"));
18af64a8
BH
3085
3086 if(!::arg()["logging-facility"].empty()) {
f8499e52
BH
3087 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
3088 if(val >= 0)
e6a9dde5 3089 g_log.setFacility(val);
18af64a8 3090 else
e6a9dde5 3091 g_log<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
18af64a8
BH
3092 }
3093
ba1a571d 3094 showProductVersion();
3afde9b2 3095
06ea9015 3096 g_disthashseed=dns_random(0xffffffff);
3097
b7ef5828
PL
3098 checkLinuxIPv6Limits();
3099 try {
3100 vector<string> addrs;
3101 if(!::arg()["query-local-address6"].empty()) {
3102 SyncRes::s_doIPv6=true;
e6a9dde5 3103 g_log<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
b7ef5828
PL
3104
3105 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
3106 for(const string& addr : addrs) {
3107 g_localQueryAddresses6.push_back(ComboAddress(addr));
3108 }
3109 }
3110 else {
e6a9dde5 3111 g_log<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
b7ef5828
PL
3112 }
3113 addrs.clear();
3114 stringtok(addrs, ::arg()["query-local-address"], ", ;");
3115 for(const string& addr : addrs) {
3116 g_localQueryAddresses4.push_back(ComboAddress(addr));
3117 }
3118 }
3119 catch(std::exception& e) {
e6a9dde5 3120 g_log<<Logger::Error<<"Assigning local query addresses: "<<e.what();
b7ef5828
PL
3121 exit(99);
3122 }
3123
e48c6b8a
PL
3124 // keep this ABOVE loadRecursorLuaConfig!
3125 if(::arg()["dnssec"]=="off")
3126 g_dnssecmode=DNSSECMode::Off;
3127 else if(::arg()["dnssec"]=="process-no-validate")
3128 g_dnssecmode=DNSSECMode::ProcessNoValidate;
3129 else if(::arg()["dnssec"]=="process")
3130 g_dnssecmode=DNSSECMode::Process;
3131 else if(::arg()["dnssec"]=="validate")
3132 g_dnssecmode=DNSSECMode::ValidateAll;
3133 else if(::arg()["dnssec"]=="log-fail")
3134 g_dnssecmode=DNSSECMode::ValidateForLog;
3135 else {
e6a9dde5 3136 g_log<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
e48c6b8a
PL
3137 exit(1);
3138 }
3139
3140 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
d377bb54 3141 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
e48c6b8a 3142
a6f7f5fe 3143 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
3144 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
3145
0f5785a6
PL
3146 try {
3147 loadRecursorLuaConfig(::arg()["lua-config-file"], ::arg().mustDo("daemon"));
3148 }
3149 catch (PDNSException &e) {
e6a9dde5 3150 g_log<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
0f5785a6
PL
3151 exit(1);
3152 }
ad42489c 3153
18af64a8 3154 parseACLs();
92011b8f 3155 sortPublicSuffixList();
3156
eb5bae86 3157 if(!::arg()["dont-query"].empty()) {
eb5bae86
BH
3158 vector<string> ips;
3159 stringtok(ips, ::arg()["dont-query"], ", ");
66e0b6ea
BH
3160 ips.push_back("0.0.0.0");
3161 ips.push_back("::");
c36bc97a 3162
e6a9dde5 3163 g_log<<Logger::Warning<<"Will not send queries to: ";
eb5bae86 3164 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
9065eb05 3165 SyncRes::addDontQuery(*i);
eb5bae86 3166 if(i!=ips.begin())
e6a9dde5
PL
3167 g_log<<Logger::Warning<<", ";
3168 g_log<<Logger::Warning<<*i;
eb5bae86 3169 }
e6a9dde5 3170 g_log<<Logger::Warning<<endl;
eb5bae86
BH
3171 }
3172
f7c1d4e3 3173 g_quiet=::arg().mustDo("quiet");
3ddb9247 3174
1bc3c142
BH
3175 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
3176 if(g_weDistributeQueries) {
e6a9dde5 3177 g_log<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
1bc3c142 3178 }
3ddb9247 3179
756e82cf 3180 setupDelegationOnly();
b33c2462 3181 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
756e82cf 3182
77499b05
BH
3183 if(::arg()["trace"]=="fail") {
3184 SyncRes::setDefaultLogMode(SyncRes::Store);
3185 }
3186 else if(::arg().mustDo("trace")) {
3187 SyncRes::setDefaultLogMode(SyncRes::Log);
f7c1d4e3
BH
3188 ::arg().set("quiet")="no";
3189 g_quiet=false;
3e9c6c0a 3190 g_dnssecLOG=true;
f7c1d4e3 3191 }
3ddb9247 3192
aadceba8 3193 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
3194
1051f8a9
BH
3195 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
3196
f7c1d4e3 3197 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
63637fd8 3198 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
1051f8a9 3199 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
79ec0627
PL
3200 // Cap the packetcache-servfail-ttl to the packetcache-ttl
3201 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
3202 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
628e2c7b
PA
3203 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
3204 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
f7c1d4e3 3205 SyncRes::s_serverID=::arg()["server-id"];
173d790e 3206 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
9de3e034 3207 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
7c3398aa 3208 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
01402d56 3209 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
f7c1d4e3
BH
3210 if(SyncRes::s_serverID.empty()) {
3211 char tmp[128];
3212 gethostname(tmp, sizeof(tmp)-1);
3213 SyncRes::s_serverID=tmp;
3214 }
3ddb9247 3215
e9f9b8ec
RG
3216 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
3217 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
3218
8a3a3822
RG
3219 if (!::arg().isEmpty("ecs-scope-zero-address")) {
3220 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
3221 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
3222 }
3223 else {
3224 bool found = false;
3225 for (const auto& addr : g_localQueryAddresses4) {
3226 if (!IsAnyAddress(addr)) {
3227 SyncRes::setECSScopeZeroAddress(Netmask(addr, 32));
3228 found = true;
3229 break;
3230 }
3231 }
3232 if (!found) {
3233 for (const auto& addr : g_localQueryAddresses6) {
3234 if (!IsAnyAddress(addr)) {
3235 SyncRes::setECSScopeZeroAddress(Netmask(addr, 128));
3236 found = true;
3237 break;
3238 }
3239 }
3240 if (!found) {
3241 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
3242 }
3243 }
3244 }
3245
2fe3354d
CH
3246 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
3247 SyncRes::parseEDNSSubnetAddFor(::arg()["ecs-add-for"]);
3248 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
3249
5cc8371b 3250 g_XPFAcl.toMasks(::arg()["xpf-allow-from"]);
59cb4a79 3251 g_xpfRRCode = ::arg().asNum("xpf-rr-code");
5cc8371b 3252
5b0ddd18 3253 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
bb4bdbaf 3254
49a699c4 3255 g_initialDomainMap = parseAuthAndForwards();
3ddb9247 3256
08f3f638 3257 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3ddb9247 3258
f7c1d4e3 3259 g_logCommonErrors=::arg().mustDo("log-common-errors");
98d36505 3260 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
e661a20b
PD
3261
3262 g_anyToTcp = ::arg().mustDo("any-to-tcp");
a09a8ce0
PD
3263 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
3264
b3adda56
PD
3265 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
3266
810ff705 3267 g_numWorkerThreads = ::arg().asNum("threads");
1c4f2e1b 3268 if (g_numWorkerThreads < 1) {
e6a9dde5 3269 g_log<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
1c4f2e1b
RG
3270 g_numWorkerThreads = 1;
3271 }
3272
810ff705
RG
3273 g_numThreads = g_numWorkerThreads + g_weDistributeQueries;
3274 g_maxMThreads = ::arg().asNum("max-mthreads");
3275
00b8cadc
RG
3276 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
3277
0ec489bf 3278 g_statisticsInterval = ::arg().asNum("statistics-interval");
3279
810ff705
RG
3280#ifdef SO_REUSEPORT
3281 g_reusePort = ::arg().mustDo("reuseport");
3282#endif
3283
3284 g_useOneSocketPerThread = (!g_weDistributeQueries && g_reusePort);
3285
3286 if (g_useOneSocketPerThread) {
3287 for (unsigned int threadId = 0; threadId < g_numWorkerThreads; threadId++) {
3288 makeUDPServerSockets(threadId);
3289 makeTCPServerSockets(threadId);
3290 }
3291 }
3292 else {
3293 makeUDPServerSockets(0);
3294 makeTCPServerSockets(0);
3295 }
815099b2 3296
677e2a46
BH
3297 int forks;
3298 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
1bc3c142
BH
3299 if(!fork()) // we are child
3300 break;
3301 }
3ddb9247 3302
f7c1d4e3 3303 if(::arg().mustDo("daemon")) {
e6a9dde5
PL
3304 g_log<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
3305 g_log.toConsole(Logger::Critical);
f7c1d4e3 3306 daemonize();
a4241908 3307 loadRecursorLuaConfig(::arg()["lua-config-file"], false);
f7c1d4e3
BH
3308 }
3309 signal(SIGUSR1,usr1Handler);
3310 signal(SIGUSR2,usr2Handler);
3311 signal(SIGPIPE,SIG_IGN);
810ff705 3312
a6414fdc 3313 checkOrFixFDS();
3ddb9247 3314
d1b28475
KM
3315#ifdef HAVE_LIBSODIUM
3316 if (sodium_init() == -1) {
e6a9dde5 3317 g_log<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
d1b28475
KM
3318 exit(99);
3319 }
3320#endif
3321
3afde9b2
PL
3322 openssl_thread_setup();
3323 openssl_seed();
e97cb679
AT
3324 /* setup rng before chroot */
3325 dns_random_init();
3afde9b2 3326
138435cb
BH
3327 int newgid=0;
3328 if(!::arg()["setgid"].empty())
3329 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
3330 int newuid=0;
3331 if(!::arg()["setuid"].empty())
3332 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
3333
f1d6a7ce
KM
3334 Utility::dropGroupPrivs(newuid, newgid);
3335
138435cb 3336 if (!::arg()["chroot"].empty()) {
75336810
PL
3337#ifdef HAVE_SYSTEMD
3338 char *ns;
3339 ns = getenv("NOTIFY_SOCKET");
3340 if (ns != nullptr) {
e6a9dde5 3341 g_log<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
75336810
PL
3342 exit(1);
3343 }
3344#endif
138435cb 3345 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
e6a9dde5 3346 g_log<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
138435cb
BH
3347 exit(1);
3348 }
f0f3f0b0 3349 else
e6a9dde5 3350 g_log<<Logger::Error<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
138435cb
BH
3351 }
3352
f0f3f0b0
PL
3353 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
3354 if(!s_pidfname.empty())
3355 unlink(s_pidfname.c_str()); // remove possible old pid file
3356 writePid();
3357
3358 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
3359
f1d6a7ce 3360 Utility::dropUserPrivs(newuid);
c0063e60 3361
49a699c4 3362 makeThreadPipes();
3ddb9247 3363
5d4dd7fe
BH
3364 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
3365 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
fde296a3 3366 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
a5886e6a 3367 s_maxUDPQueriesPerRound=::arg().asNum("max-udp-queries-per-round");
343257a4 3368
d705aad9
RG
3369 if (::arg().mustDo("snmp-agent")) {
3370 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
3371 g_snmpAgent->run();
3372 }
3373
b47026fd 3374 int port = ::arg().asNum("udp-source-port-min");
58da9034 3375 if(port < 1024 || port > 65535){
e6a9dde5 3376 g_log<<Logger::Error<<"Unable to launch, udp-source-port-min is not a valid port number"<<endl;
bf6f28ca
CHB
3377 exit(99); // this isn't going to fix itself either
3378 }
3379 s_minUdpSourcePort = port;
b47026fd 3380 port = ::arg().asNum("udp-source-port-max");
58da9034 3381 if(port < 1024 || port > 65535 || port < s_minUdpSourcePort){
e6a9dde5 3382 g_log<<Logger::Error<<"Unable to launch, udp-source-port-max is not a valid port number or is smaller than udp-source-port-min"<<endl;
bf6f28ca
CHB
3383 exit(99); // this isn't going to fix itself either
3384 }
3385 s_maxUdpSourcePort = port;
3386 std::vector<string> parts {};
b47026fd 3387 stringtok(parts, ::arg()["udp-source-port-avoid"], ", ");
bf6f28ca
CHB
3388 for (const auto &part : parts)
3389 {
3390 port = std::stoi(part);
58da9034 3391 if(port < 1024 || port > 65535){
e6a9dde5 3392 g_log<<Logger::Error<<"Unable to launch, udp-source-port-avoid contains an invalid port number: "<<part<<endl;
bf6f28ca
CHB
3393 exit(99); // this isn't going to fix itself either
3394 }
3395 s_avoidUdpSourcePorts.insert(port);
3396 }
3397
d77abca1 3398 /* This thread handles the web server, carbon, statistics and the control channel */
faf580f5 3399 std::thread handlerThread(recursorThread, s_handlerThreadID, false);
d77abca1 3400
8fd25133 3401 const auto cpusMap = parseCPUMap();
d77abca1
RG
3402
3403 std::vector<std::thread> workers(g_numThreads);
c3828c03 3404 if(g_numThreads == 1) {
e6a9dde5 3405 g_log<<Logger::Warning<<"Operating unthreaded"<<endl;
6b6720de
PL
3406#ifdef HAVE_SYSTEMD
3407 sd_notify(0, "READY=1");
3408#endif
8fd25133 3409 setCPUMap(cpusMap, 0, pthread_self());
d77abca1 3410 recursorThread(0, true);
76698c6e
BH
3411 }
3412 else {
e6a9dde5 3413 g_log<<Logger::Warning<<"Launching "<< g_numThreads <<" threads"<<endl;
c3828c03 3414 for(unsigned int n=0; n < g_numThreads; ++n) {
d77abca1 3415 workers[n] = std::thread(recursorThread, n, true);
8fd25133 3416
d77abca1 3417 setCPUMap(cpusMap, n, workers[n].native_handle());
76698c6e 3418 }
6b6720de
PL
3419#ifdef HAVE_SYSTEMD
3420 sd_notify(0, "READY=1");
3421#endif
d77abca1 3422 workers.back().join();
bb4bdbaf 3423 }
bb4bdbaf
BH
3424 return 0;
3425}
3426
d77abca1 3427static void* recursorThread(int n, bool worker)
bb4bdbaf
BH
3428try
3429{
d77abca1 3430 t_id=n;
49a699c4 3431 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
a712cb56 3432 SyncRes::setDomainMap(g_initialDomainMap);
49a699c4 3433 t_allowFrom = g_initialAllowFrom;
f26bf547
RG
3434 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
3435 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
49a699c4 3436 primeHints();
3ddb9247 3437
f26bf547 3438 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
3ddb9247 3439
aa7929a3 3440#ifdef HAVE_PROTOBUF
f26bf547 3441 t_uuidGenerator = std::unique_ptr<boost::uuids::random_generator>(new boost::uuids::random_generator());
aa7929a3 3442#endif
e6a9dde5 3443 g_log<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3ddb9247 3444
674cf0f6
BH
3445 try {
3446 if(!::arg()["lua-dns-script"].empty()) {
9694e14f
AT
3447 t_pdl = std::make_shared<RecursorLua4>();
3448 t_pdl->loadFile(::arg()["lua-dns-script"]);
e6a9dde5 3449 g_log<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
674cf0f6 3450 }
674cf0f6
BH
3451 }
3452 catch(std::exception &e) {
e6a9dde5 3453 g_log<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
62f0ae62 3454 _exit(99);
674cf0f6 3455 }
3ddb9247 3456
f8f243b0 3457 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
92011b8f 3458 if(ringsize) {
f26bf547 3459 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
f8f243b0 3460 if(g_weDistributeQueries) // if so, only 1 thread does recvfrom
3ddb9247 3461 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries"));
f8f243b0 3462 else
3ddb9247 3463 t_remotes->set_capacity(ringsize);
f26bf547 3464 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 3465 t_servfailremotes->set_capacity(ringsize);
f26bf547 3466 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3ddb9247 3467 t_largeanswerremotes->set_capacity(ringsize);
92011b8f 3468
f26bf547 3469 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 3470 t_queryring->set_capacity(ringsize);
f26bf547 3471 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3ddb9247 3472 t_servfailqueryring->set_capacity(ringsize);
92011b8f 3473 }
3ddb9247 3474
f26bf547 3475 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
3ddb9247 3476
63341e8d
RG
3477#ifdef HAVE_PROTOBUF
3478 /* start protobuf export threads if needed */
3479 auto luaconfsLocal = g_luaconfs.getLocal();
3480 checkProtobufExport(luaconfsLocal);
3481 checkOutgoingProtobufExport(luaconfsLocal);
3482#endif /* HAVE_PROTOBUF */
3483
bb4bdbaf
BH
3484 PacketID pident;
3485
3486 t_fdm=getMultiplexer();
d77abca1
RG
3487
3488 if(!worker) {
d07bf7ff 3489 if(::arg().mustDo("webserver")) {
e6a9dde5 3490 g_log<<Logger::Warning << "Enabling web server" << endl;
8989097d 3491 try {
1ce57618 3492 new RecursorWebServer(t_fdm);
8989097d
CH
3493 }
3494 catch(PDNSException &e) {
e6a9dde5 3495 g_log<<Logger::Error<<"Exception: "<<e.reason<<endl;
8989097d
CH
3496 exit(99);
3497 }
f3d1d67b 3498 }
e6a9dde5 3499 g_log<<Logger::Error<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
f3d1d67b 3500 }
810ff705 3501 else {
d77abca1
RG
3502 t_fdm->addReadFD(g_pipes[t_id].readToThread, handlePipeRequest);
3503 t_fdm->addReadFD(g_pipes[t_id].readQueriesToThread, handlePipeRequest);
3504
3505 if(g_useOneSocketPerThread) {
3506 for(deferredAdd_t::const_iterator i = deferredAdds[t_id].cbegin(); i != deferredAdds[t_id].cend(); ++i) {
810ff705
RG
3507 t_fdm->addReadFD(i->first, i->second);
3508 }
3509 }
d77abca1 3510 else {
faf580f5 3511 if(!g_weDistributeQueries || t_id == s_distributorThreadID) { // if we distribute queries, only t_id = 0 listens
d77abca1
RG
3512 for(deferredAdd_t::const_iterator i = deferredAdds[0].cbegin(); i != deferredAdds[0].cend(); ++i) {
3513 t_fdm->addReadFD(i->first, i->second);
3514 }
3515 }
3516 }
810ff705 3517 }
3ddb9247 3518
b0b37121 3519 registerAllStats();
d77abca1
RG
3520
3521 if(!worker) {
674cf0f6
BH
3522 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
3523 }
1bc3c142 3524
f7c1d4e3 3525 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3ddb9247 3526
f7c1d4e3 3527 bool listenOnTCP(true);
49a699c4 3528
cb1523d1 3529 time_t last_stat = 0;
2c78bd57 3530 time_t last_carbon=0;
3531 time_t carbonInterval=::arg().asNum("carbon-interval");
ac0995bb 3532 counter.store(0); // used to periodically execute certain tasks
f7c1d4e3 3533 for(;;) {
ac0e821b 3534 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3ddb9247 3535
3427fa8a
BH
3536 if(!(counter%500)) {
3537 MT->makeThread(houseKeeping, 0);
f7c1d4e3
BH
3538 }
3539
d2392145 3540 if(!(counter%55)) {
d8f6d49f 3541 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
bb4bdbaf 3542 expired_t expired=t_fdm->getTimeouts(g_now);
3ddb9247 3543
f7c1d4e3 3544 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
cd989c87 3545 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
4957a608 3546 if(g_logCommonErrors)
e6a9dde5 3547 g_log<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toStringWithPort() <<endl;
4957a608 3548 t_fdm->removeReadFD(i->first);
f7c1d4e3
BH
3549 }
3550 }
3ddb9247 3551
f7c1d4e3
BH
3552 counter++;
3553
cb1523d1
RG
3554 if(!worker) {
3555 if(statsWanted || (g_statisticsInterval > 0 && (g_now.tv_sec - last_stat) >= g_statisticsInterval)) {
3556 doStats();
3557 last_stat = g_now.tv_sec;
3558 }
f7c1d4e3 3559
cb1523d1 3560 Utility::gettimeofday(&g_now, 0);
2c78bd57 3561
cb1523d1
RG
3562 if((g_now.tv_sec - last_carbon) >= carbonInterval) {
3563 MT->makeThread(doCarbonDump, 0);
3564 last_carbon = g_now.tv_sec;
3565 }
2c78bd57 3566 }
3567
bb4bdbaf 3568 t_fdm->run(&g_now);
3ea54bf0 3569 // 'run' updates g_now for us
f7c1d4e3 3570
faf580f5 3571 if(worker && (!g_weDistributeQueries || t_id == s_distributorThreadID)) { // if pdns distributes queries, only tid 0 should do this
5c889cf5 3572 if(listenOnTCP) {
3573 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
3574 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
3575 t_fdm->removeReadFD(*i);
3576 listenOnTCP=false;
3577 }
f7c1d4e3 3578 }
5c889cf5 3579 else {
3580 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
3581 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
3582 t_fdm->addReadFD(*i, handleNewTCPQuestion);
3583 listenOnTCP=true;
3584 }
f7c1d4e3
BH
3585 }
3586 }
3587 }
3588}
3f81d239 3589catch(PDNSException &ae) {
e6a9dde5 3590 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
bb4bdbaf
BH
3591 return 0;
3592}
3593catch(std::exception &e) {
e6a9dde5 3594 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
bb4bdbaf
BH
3595 return 0;
3596}
3597catch(...) {
e6a9dde5 3598 g_log<<Logger::Error<<"any other exception in main: "<<endl;
bb4bdbaf
BH
3599 return 0;
3600}
3601
51e2144e 3602
3ddb9247 3603int main(int argc, char **argv)
288f4aa9 3604{
dbd23fc2
BH
3605 g_argc = argc;
3606 g_argv = argv;
5e3de507 3607 g_stats.startupTime=time(0);
3e135495 3608 versionSetProduct(ProductRecursor);
8a63d3ce 3609 reportBasicTypes();
0007c2e5 3610 reportOtherTypes();
ea634573 3611
22030c37 3612 int ret = EXIT_SUCCESS;
caa6eefa 3613
288f4aa9 3614 try {
f888311c 3615 ::arg().set("stack-size","stack size per mthread")="200000";
2e3d8a19 3616 ::arg().set("soa-minimum-ttl","Don't change")="0";
2e3d8a19 3617 ::arg().set("no-shuffle","Don't change")="off";
2e3d8a19 3618 ::arg().set("local-port","port to listen on")="53";
32252594 3619 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
fec7dd5a 3620 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
77499b05 3621 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
a6415142 3622 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
c87e1876 3623 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
d3f809bf 3624 ::arg().set("daemon","Operate as a daemon")="no";
191f2e47 3625 ::arg().setSwitch("write-pid","Write a PID file")="yes";
9afa8662 3626 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
b6cfa948 3627 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
b18fa400 3628 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
22e0810c 3629 ::arg().set("log-common-errors","If we should log rather common errors")="no";
2e3d8a19
BH
3630 ::arg().set("chroot","switch to chroot jail")="";
3631 ::arg().set("setgid","If set, change group id to this gid for more security")="";
3632 ::arg().set("setuid","If set, change user id to this uid for more security")="";
c83ee49d 3633 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
bb4bdbaf 3634 ::arg().set("threads", "Launch this number of threads")="2";
adabfcb9 3635 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
5124de27 3636 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
d07bf7ff 3637 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
479e0976
CH
3638 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
3639 ::arg().set("api-logfile", "Location of the server logfile (used by the REST API)") = "/var/log/pdns.log";
3640 ::arg().set("api-readonly", "Disallow data modification through the REST API when set") = "no";
3641 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
d07bf7ff
PL
3642 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
3643 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
3644 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
be3e1477 3645 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
cc08b5a9 3646 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
e12f8407 3647 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
2c78bd57 3648 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
0ec489bf 3649 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
c038218b 3650 ::arg().set("quiet","Suppress logging of questions and answers")="";
f27e6356 3651 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
2e3d8a19 3652 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
fdbf35ac
BH
3653 ::arg().set("socket-owner","Owner of socket")="";
3654 ::arg().set("socket-group","Group of socket")="";
3655 ::arg().set("socket-mode", "Permissions for socket")="";
3ddb9247 3656
f0f3f0b0 3657 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
2e3d8a19
BH
3658 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
3659 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
d4fb76e9 3660 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
2e3d8a19 3661 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
85c32340 3662 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
2e3d8a19 3663 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
324dc148 3664 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
979edd70 3665 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
2e3d8a19 3666 ::arg().set("hint-file", "If set, load root hints from this file")="";
b45eb27c 3667 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
a9af3782 3668 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
c3e753c7 3669 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
1051f8a9 3670 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
927c12b0 3671 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
1051f8a9 3672 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
54c4c0d8 3673 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname")="";
92011b8f 3674 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
ba1a571d 3675 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
49a699c4 3676 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
2c95fc65 3677 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
51e2144e 3678 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3ddb9247 3679 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
4e120339 3680 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
fde296a3 3681 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
0d5f0a9f 3682 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4ef015cd 3683 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
5605c067 3684 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3e61e7f7 3685 ::arg().set("lua-config-file", "More powerful configuration options")="";
644dd1da 3686
5605c067 3687 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
927c12b0
BH
3688 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
3689 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
5605c067 3690 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
ac0b4eb3 3691 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3ea54bf0 3692 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
e498dac1 3693 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
4485aa35 3694 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
08f3f638 3695 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3ddb9247 3696 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
35695d18 3697 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
3698 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
3f975863 3699 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
2fe3354d 3700 ::arg().set("ecs-add-for", "List of client netmasks for which EDNS Client Subnet will be added")="0.0.0.0/0, ::/0, " LOCAL_NETS_INVERSE;
8a3a3822 3701 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
a16c4536 3702 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
e498dac1 3703 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
4ca2d205 3704 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
e661a20b 3705 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
b3adda56 3706 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
00b8cadc 3707 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
a09a8ce0 3708 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1680";
b33c2462 3709 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1680";
aadceba8 3710 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
173d790e 3711 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
c5950146 3712 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
7c3398aa 3713 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
78227847 3714 ::arg().set("max-udp-queries-per-round", "Maximum number of UDP queries processed per recvmsg() round, before returning back to normal processing")="10000";
a09a8ce0 3715
68e6df3c 3716 ::arg().set("include-dir","Include *.conf files from this directory")="";
d67620e4 3717 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
2332f42d 3718
3719 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
2e3d8a19 3720
d705aad9 3721 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
396f126e 3722 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
d705aad9 3723
0735b17e 3724 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
d377bb54 3725 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
0735b17e 3726
8fd25133
RG
3727 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
3728
98d36505
RG
3729 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
3730
5cc8371b 3731 ::arg().set("xpf-allow-from","XPF information is only processed from these subnets")="";
59cb4a79 3732 ::arg().set("xpf-rr-code","XPF option code to use")="0";
5cc8371b 3733
58da9034 3734 ::arg().set("udp-source-port-min", "Minimum UDP port to bind on")="1024";
b47026fd
CHB
3735 ::arg().set("udp-source-port-max", "Maximum UDP port to bind on")="65535";
3736 ::arg().set("udp-source-port-avoid", "List of comma separated UDP port number to avoid")="11211";
e97cb679 3737 ::arg().set("rng", "Specify random number generator to use. Valid values are auto,sodium,openssl,getrandom,arc4random,urandom.")="auto";
bf6f28ca 3738
2e3d8a19 3739 ::arg().setCmd("help","Provide a helpful message");
ba1a571d 3740 ::arg().setCmd("version","Print version string");
d5141417 3741 ::arg().setCmd("config","Output blank configuration");
e6a9dde5 3742 g_log.toConsole(Logger::Info);
2e3d8a19 3743 ::arg().laxParse(argc,argv); // do a lax parse
c75a6a9e 3744
2d733c0f
CH
3745 string configname=::arg()["config-dir"]+"/recursor.conf";
3746 if(::arg()["config-name"]!="") {
3747 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
5124de27 3748 s_programname+="-"+::arg()["config-name"];
2d733c0f
CH
3749 }
3750 cleanSlashes(configname);
5124de27 3751
5cc1ea1d
CH
3752 if(!::arg().getCommands().empty()) {
3753 cerr<<"Fatal: non-option on the command line, perhaps a '--setting=123' statement missed the '='?"<<endl;
3754 exit(99);
3755 }
3756
577cf284
BH
3757 if(::arg().mustDo("config")) {
3758 cout<<::arg().configstring()<<endl;
3759 exit(0);
3760 }
3761
3ddb9247 3762 if(!::arg().file(configname.c_str()))
e6a9dde5 3763 g_log<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
c75a6a9e 3764
2e3d8a19 3765 ::arg().parse(argc,argv);
c836dc19 3766
f0f3f0b0 3767 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() && !::arg().mustDo("api-readonly") ) {
e6a9dde5 3768 g_log<<Logger::Error<<"Using chroot and a writable API is not possible"<<endl;
f0f3f0b0
PL
3769 exit(EXIT_FAILURE);
3770 }
3771
3772 if (::arg()["socket-dir"].empty()) {
3773 if (::arg()["chroot"].empty())
3774 ::arg().set("socket-dir") = LOCALSTATEDIR;
3775 else
3776 ::arg().set("socket-dir") = "/";
3777 }
3778
2e3d8a19 3779 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
562588a3 3780
61d74169 3781 if(::arg().asNum("threads")==1)
3782 ::arg().set("pdns-distributes-queries")="no";
3783
2e3d8a19 3784 if(::arg().mustDo("help")) {
ff5ba4f9
WA
3785 cout<<"syntax:"<<endl<<endl;
3786 cout<<::arg().helpstring(::arg()["help"])<<endl;
3787 exit(0);
b636533b 3788 }
5e3de507 3789 if(::arg().mustDo("version")) {
ba1a571d 3790 showProductVersion();
3613a51c 3791 showBuildConfiguration();
67076869 3792 exit(0);
5e3de507 3793 }
b636533b 3794
34162f8f 3795 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
f48d7b65 3796
34162f8f
CH
3797 if (logUrgency < Logger::Error)
3798 logUrgency = Logger::Error;
f48d7b65 3799 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
3800 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
3801 }
e6a9dde5
PL
3802 g_log.setLoglevel(logUrgency);
3803 g_log.toConsole(logUrgency);
34162f8f 3804
f7c1d4e3 3805 serviceMain(argc, argv);
288f4aa9 3806 }
3f81d239 3807 catch(PDNSException &ae) {
e6a9dde5 3808 g_log<<Logger::Error<<"Exception: "<<ae.reason<<endl;
22030c37 3809 ret=EXIT_FAILURE;
288f4aa9 3810 }
fdbf35ac 3811 catch(std::exception &e) {
e6a9dde5 3812 g_log<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
22030c37 3813 ret=EXIT_FAILURE;
288f4aa9
BH
3814 }
3815 catch(...) {
e6a9dde5 3816 g_log<<Logger::Error<<"any other exception in main: "<<endl;
22030c37 3817 ret=EXIT_FAILURE;
288f4aa9 3818 }
3ddb9247 3819
22030c37 3820 return ret;
288f4aa9 3821}