]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/dnsdist.cc
dnsdist: Unify the management of DNS/DNSCrypt/DoT frontends
[thirdparty/pdns.git] / pdns / dnsdist.cc
CommitLineData
24d5cb00 1/*
12471842
PL
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
5cc8371b
RG
22
23#include "config.h"
24
25#include <fstream>
26#include <getopt.h>
27#include <grp.h>
2c5db3f7 28#include <limits>
5cc8371b
RG
29#include <netinet/tcp.h>
30#include <pwd.h>
31#include <sys/resource.h>
32#include <unistd.h>
424bdfb1 33
4d39d7f3 34#if defined (__OpenBSD__) || defined(__NetBSD__)
424bdfb1
PD
35#include <readline/readline.h>
36#else
37#include <editline/readline.h>
38#endif
39
6ab65223
PL
40#ifdef HAVE_SYSTEMD
41#include <systemd/sd-daemon.h>
42#endif
43
5cc8371b
RG
44#include "dnsdist.hh"
45#include "dnsdist-cache.hh"
b5521206 46#include "dnsdist-console.hh"
5cc8371b
RG
47#include "dnsdist-ecs.hh"
48#include "dnsdist-lua.hh"
03b00917 49#include "dnsdist-rings.hh"
5d4e1ef8 50#include "dnsdist-secpoll.hh"
53c57da7 51#include "dnsdist-xpf.hh"
5cc8371b
RG
52
53#include "base64.hh"
54#include "delaypipe.hh"
55#include "dolog.hh"
56#include "dnsname.hh"
e0fd37ec 57#include "dnsparser.hh"
5cc8371b
RG
58#include "dnswriter.hh"
59#include "ednsoptions.hh"
60#include "gettime.hh"
61#include "lock.hh"
62#include "misc.hh"
63#include "sodcrypto.hh"
64#include "sstuff.hh"
519f5484 65#include "threadname.hh"
5cc8371b 66
a40df301 67/* Known sins:
e48090d1 68
d12cea01 69 Receiver is currently single threaded
e5a14b2b 70 not *that* bad actually, but now that we are thread safe, might want to scale
a40df301 71*/
24d5cb00 72
0940e4eb 73/* the Rulaction plan
74 Set of Rules, if one matches, it leads to an Action
75 Both rules and actions could conceivably be Lua based.
76 On the C++ side, both could be inherited from a class Rule and a class Action,
77 on the Lua side we can't do that. */
78
64e4ebb4 79using std::atomic;
80using std::thread;
7730131a 81bool g_verbose;
b065f701 82
e48090d1 83struct DNSDistStats g_stats;
37a5c2d5
PO
84MetricDefinitionStorage g_metricDefinitions;
85
13fe35db 86uint16_t g_maxOutstanding{10240};
9e87dcb8 87bool g_verboseHealthChecks{false};
1ea747c0 88uint32_t g_staleCacheEntriesTTL{0};
bbfaaa6f 89bool g_syslog{true};
0dffe9e3 90bool g_allowEmptyResponse{false};
cffde2fd 91
92GlobalStateHolder<NetmaskGroup> g_ACL;
c9262563 93string g_outputBuffer;
a227f47d 94
a227f47d 95std::vector<std::shared_ptr<TLSFrontend>> g_tlslocals;
6e9fd124 96std::vector<std::shared_ptr<DNSCryptContext>> g_dnsCryptLocals;
87b515ed
RG
97#ifdef HAVE_EBPF
98shared_ptr<BPFFilter> g_defaultBPFFilter;
8429ad04 99std::vector<std::shared_ptr<DynBPFFilter> > g_dynBPFFilters;
87b515ed 100#endif /* HAVE_EBPF */
6e9fd124 101std::vector<std::unique_ptr<ClientState>> g_frontends;
886e2cf2 102GlobalStateHolder<pools_t> g_pools;
0beaa5c8 103size_t g_udpVectorSize{1};
7c0860e1 104
9f4eb5cc
RG
105bool g_snmpEnabled{false};
106bool g_snmpTrapsEnabled{false};
107DNSDistSNMPAgent* g_snmpAgent{nullptr};
108
726ddf60 109/* UDP: the grand design. Per socket we listen on for incoming queries there is one thread.
7730131a 110 Then we have a bunch of connected sockets for talking to downstream servers.
111 We send directly to those sockets.
24d5cb00 112
7730131a 113 For the return path, per downstream server we have a thread that listens to responses.
24d5cb00 114
7730131a 115 Per socket there is an array of 2^16 states, when we send out a packet downstream, we note
116 there the original requestor and the original id. The new ID is the offset in the array.
117
118 When an answer comes in on a socket, we look up the offset by the id, and lob it to the
119 original requestor.
120
121 IDs are assigned by atomic increments of the socket offset.
122 */
123
4d5959e6
RG
124GlobalStateHolder<vector<DNSDistRuleAction> > g_rulactions;
125GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_resprulactions;
126GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_cachehitresprulactions;
2d4783a8 127GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_selfansweredresprulactions;
4d5959e6 128
df111b53 129Rings g_rings;
786e4d8c 130QueryCount g_qcount;
7c0860e1 131
ecbe9133 132GlobalStateHolder<servers_t> g_dstates;
78ffa782 133GlobalStateHolder<NetmaskTree<DynBlock>> g_dynblockNMG;
71c94675 134GlobalStateHolder<SuffixMatchTree<DynBlock>> g_dynblockSMT;
dd46e5e3 135DNSAction::Action g_dynBlockAction = DNSAction::Action::Drop;
3f6d07a4
RG
136int g_tcpRecvTimeout{2};
137int g_tcpSendTimeout{2};
e0b5e49d 138int g_udpTimeout{2};
3f6d07a4 139
26a3cdb7 140bool g_servFailOnNoPolicy{false};
e72fbfc4 141bool g_truncateTC{false};
53c57da7
RG
142bool g_fixupCase{false};
143bool g_preserveTrailingData{false};
0beaa5c8 144
e0fd37ec 145static void truncateTC(char* packet, uint16_t* len, size_t responseSize, unsigned int consumed)
6ad8b29a 146try
147{
e0fd37ec
RG
148 bool hadEDNS = false;
149 uint16_t payloadSize = 0;
150 uint16_t z = 0;
151
152 if (g_addEDNSToSelfGeneratedResponses) {
153 hadEDNS = getEDNSUDPPayloadSizeAndZ(packet, *len, &payloadSize, &z);
154 }
155
2aa2c0aa 156 *len=static_cast<uint16_t>(sizeof(dnsheader)+consumed+DNS_TYPE_SIZE+DNS_CLASS_SIZE);
e7c732b8
RG
157 struct dnsheader* dh = reinterpret_cast<struct dnsheader*>(packet);
158 dh->ancount = dh->arcount = dh->nscount = 0;
e0fd37ec
RG
159
160 if (hadEDNS) {
5e98ccfa 161 addEDNS(dh, *len, responseSize, z & EDNS_HEADER_FLAG_DO, payloadSize, 0);
e0fd37ec 162 }
6ad8b29a 163}
164catch(...)
165{
166 g_stats.truncFail++;
167}
168
7b3865cd 169struct DelayedPacket
170{
171 int fd;
172 string packet;
173 ComboAddress destination;
68f3eb4d 174 ComboAddress origDest;
7b3865cd 175 void operator()()
176 {
20b019fa
RG
177 ssize_t res;
178 if(origDest.sin4.sin_family == 0) {
179 res = sendto(fd, packet.c_str(), packet.size(), 0, (struct sockaddr*)&destination, destination.getSocklen());
180 }
181 else {
182 res = sendfromto(fd, packet.c_str(), packet.size(), 0, origDest, destination);
183 }
184 if (res == -1) {
185 int err = errno;
186 vinfolog("Error sending delayed response to %s: %s", destination.toStringWithPort(), strerror(err));
187 }
7b3865cd 188 }
189};
190
3e425868 191DelayPipe<DelayedPacket>* g_delay = nullptr;
7b3865cd 192
f653b8df 193void doLatencyStats(double udiff)
daacd477 194{
cb167afd
CHB
195 if(udiff < 1000) ++g_stats.latency0_1;
196 else if(udiff < 10000) ++g_stats.latency1_10;
197 else if(udiff < 50000) ++g_stats.latency10_50;
198 else if(udiff < 100000) ++g_stats.latency50_100;
199 else if(udiff < 1000000) ++g_stats.latency100_1000;
200 else ++g_stats.latencySlow;
be6c318f 201
daacd477 202 auto doAvg = [](double& var, double n, double weight) {
203 var = (weight -1) * var/weight + n/weight;
204 };
205
206 doAvg(g_stats.latencyAvg100, udiff, 100);
207 doAvg(g_stats.latencyAvg1000, udiff, 1000);
208 doAvg(g_stats.latencyAvg10000, udiff, 10000);
209 doAvg(g_stats.latencyAvg1000000, udiff, 1000000);
210}
ca404e94 211
e7c732b8 212bool responseContentMatches(const char* response, const uint16_t responseLen, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const ComboAddress& remote, unsigned int& consumed)
fcffc585 213{
fcffc585
RG
214 if (responseLen < sizeof(dnsheader)) {
215 return false;
216 }
217
3e425868 218 const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(response);
c8c3d4e4 219 if (dh->qdcount == 0) {
ad7ec9a2 220 if ((dh->rcode != RCode::NoError && dh->rcode != RCode::NXDomain) || g_allowEmptyResponse) {
c8c3d4e4
RG
221 return true;
222 }
223 else {
cb167afd 224 ++g_stats.nonCompliantResponses;
c8c3d4e4
RG
225 return false;
226 }
227 }
228
3e425868
RG
229 uint16_t rqtype, rqclass;
230 DNSName rqname;
fcffc585
RG
231 try {
232 rqname=DNSName(response, responseLen, sizeof(dnsheader), false, &rqtype, &rqclass, &consumed);
233 }
3e425868
RG
234 catch(const std::exception& e) {
235 if(responseLen > 0 && static_cast<size_t>(responseLen) > sizeof(dnsheader)) {
fcffc585 236 infolog("Backend %s sent us a response with id %d that did not parse: %s", remote.toStringWithPort(), ntohs(dh->id), e.what());
3e425868 237 }
cb167afd 238 ++g_stats.nonCompliantResponses;
fcffc585
RG
239 return false;
240 }
241
242 if (rqtype != qtype || rqclass != qclass || rqname != qname) {
243 return false;
244 }
245
246 return true;
247}
248
4ab01344 249static void restoreFlags(struct dnsheader* dh, uint16_t origFlags)
fcffc585
RG
250{
251 static const uint16_t rdMask = 1 << FLAGS_RD_OFFSET;
252 static const uint16_t cdMask = 1 << FLAGS_CD_OFFSET;
253 static const uint16_t restoreFlagsMask = UINT16_MAX & ~(rdMask | cdMask);
0f72fd5c
RG
254 uint16_t * flags = getFlagsFromDNSHeader(dh);
255 /* clear the flags we are about to restore */
256 *flags &= restoreFlagsMask;
257 /* only keep the flags we want to restore */
258 origFlags &= ~restoreFlagsMask;
259 /* set the saved flags as they were */
260 *flags |= origFlags;
261}
262
4ab01344 263static bool fixUpQueryTurnedResponse(DNSQuestion& dq, const uint16_t origFlags)
e7c732b8
RG
264{
265 restoreFlags(dq.dh, origFlags);
266
267 return addEDNSToQueryTurnedResponse(dq);
268}
269
3e425868 270static bool fixUpResponse(char** response, uint16_t* responseLen, size_t* responseSize, const DNSName& qname, uint16_t origFlags, bool ednsAdded, bool ecsAdded, std::vector<uint8_t>& rewrittenResponse, uint16_t addRoom, bool* zeroScope)
0f72fd5c 271{
fcffc585
RG
272 if (*responseLen < sizeof(dnsheader)) {
273 return false;
274 }
275
3e425868 276 struct dnsheader* dh = reinterpret_cast<struct dnsheader*>(*response);
c8c3d4e4
RG
277 restoreFlags(dh, origFlags);
278
279 if (*responseLen == sizeof(dnsheader)) {
280 return true;
281 }
282
fcffc585
RG
283 if(g_fixupCase) {
284 string realname = qname.toDNSString();
285 if (*responseLen >= (sizeof(dnsheader) + realname.length())) {
286 memcpy(*response + sizeof(dnsheader), realname.c_str(), realname.length());
287 }
288 }
289
ff73f02b 290 if (ednsAdded || ecsAdded) {
ceae225c 291 uint16_t optStart;
fcffc585
RG
292 size_t optLen = 0;
293 bool last = false;
294
11d5d247
RG
295 const std::string responseStr(*response, *responseLen);
296 int res = locateEDNSOptRR(responseStr, &optStart, &optLen, &last);
fcffc585
RG
297
298 if (res == 0) {
49c33a6c
RG
299 if (zeroScope) { // this finds if an EDNS Client Subnet scope was set, and if it is 0
300 size_t optContentStart = 0;
301 uint16_t optContentLen = 0;
302 /* we need at least 4 bytes after the option length (family: 2, source prefix-length: 1, scope prefix-length: 1) */
303 if (isEDNSOptionInOpt(responseStr, optStart, optLen, EDNSOptionCode::ECS, &optContentStart, &optContentLen) && optContentLen >= 4) {
304 /* see if the EDNS Client Subnet SCOPE PREFIX-LENGTH byte in position 3 is set to 0, which is the only thing
305 we care about. */
306 *zeroScope = responseStr.at(optContentStart + 3) == 0;
307 }
40669042 308 }
9837850d 309
ff73f02b
RG
310 if (ednsAdded) {
311 /* we added the entire OPT RR,
312 therefore we need to remove it entirely */
313 if (last) {
314 /* simply remove the last AR */
315 *responseLen -= optLen;
316 uint16_t arcount = ntohs(dh->arcount);
317 arcount--;
318 dh->arcount = htons(arcount);
319 }
320 else {
321 /* Removing an intermediary RR could lead to compression error */
11d5d247 322 if (rewriteResponseWithoutEDNS(responseStr, rewrittenResponse) == 0) {
ff73f02b
RG
323 *responseLen = rewrittenResponse.size();
324 if (addRoom && (UINT16_MAX - *responseLen) > addRoom) {
325 rewrittenResponse.reserve(*responseLen + addRoom);
326 }
327 *responseSize = rewrittenResponse.capacity();
328 *response = reinterpret_cast<char*>(rewrittenResponse.data());
329 }
330 else {
331 warnlog("Error rewriting content");
332 }
333 }
fcffc585
RG
334 }
335 else {
ff73f02b
RG
336 /* the OPT RR was already present, but without ECS,
337 we need to remove the ECS option if any */
338 if (last) {
339 /* nothing after the OPT RR, we can simply remove the
340 ECS option */
341 size_t existingOptLen = optLen;
11d5d247 342 removeEDNSOptionFromOPT(*response + optStart, &optLen, EDNSOptionCode::ECS);
ff73f02b 343 *responseLen -= (existingOptLen - optLen);
fcffc585
RG
344 }
345 else {
ff73f02b 346 /* Removing an intermediary RR could lead to compression error */
11d5d247 347 if (rewriteResponseWithoutEDNSOption(responseStr, EDNSOptionCode::ECS, rewrittenResponse) == 0) {
ff73f02b
RG
348 *responseLen = rewrittenResponse.size();
349 if (addRoom && (UINT16_MAX - *responseLen) > addRoom) {
350 rewrittenResponse.reserve(*responseLen + addRoom);
351 }
352 *responseSize = rewrittenResponse.capacity();
353 *response = reinterpret_cast<char*>(rewrittenResponse.data());
354 }
355 else {
356 warnlog("Error rewriting content");
357 }
fcffc585
RG
358 }
359 }
360 }
361 }
362
363 return true;
364}
365
fcffc585 366#ifdef HAVE_DNSCRYPT
3e425868 367static bool encryptResponse(char* response, uint16_t* responseLen, size_t responseSize, bool tcp, std::shared_ptr<DNSCryptQuery> dnsCryptQuery, dnsheader** dh, dnsheader* dhCopy)
fcffc585 368{
0f72fd5c
RG
369 if (dnsCryptQuery) {
370 uint16_t encryptedResponseLen = 0;
57847d65
RG
371
372 /* save the original header before encrypting it in place */
373 if (dh != nullptr && *dh != nullptr && dhCopy != nullptr) {
374 memcpy(dhCopy, *dh, sizeof(dnsheader));
375 *dh = dhCopy;
376 }
377
43234e76 378 int res = dnsCryptQuery->encryptResponse(response, *responseLen, responseSize, tcp, &encryptedResponseLen);
fcffc585 379 if (res == 0) {
0f72fd5c 380 *responseLen = encryptedResponseLen;
fcffc585
RG
381 } else {
382 /* dropping response */
383 vinfolog("Error encrypting the response, dropping.");
384 return false;
385 }
386 }
0f72fd5c
RG
387 return true;
388}
7129b5c4 389#endif /* HAVE_DNSCRYPT */
fcffc585 390
3e425868
RG
391static bool applyRulesToResponse(LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr)
392{
393 DNSResponseAction::Action action=DNSResponseAction::Action::None;
394 std::string ruleresult;
395 for(const auto& lr : *localRespRulactions) {
396 if(lr.d_rule->matches(&dr)) {
397 lr.d_rule->d_matches++;
398 action=(*lr.d_action)(&dr, &ruleresult);
399 switch(action) {
400 case DNSResponseAction::Action::Allow:
401 return true;
402 break;
403 case DNSResponseAction::Action::Drop:
404 return false;
405 break;
406 case DNSResponseAction::Action::HeaderModify:
407 return true;
408 break;
409 case DNSResponseAction::Action::ServFail:
410 dr.dh->rcode = RCode::ServFail;
411 return true;
412 break;
413 /* non-terminal actions follow */
414 case DNSResponseAction::Action::Delay:
415 dr.delayMsec = static_cast<int>(pdns_stou(ruleresult)); // sorry
416 break;
417 case DNSResponseAction::Action::None:
418 break;
419 }
420 }
421 }
422
423 return true;
424}
425
426bool processResponse(char** response, uint16_t* responseLen, size_t* responseSize, LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr, size_t addRoom, std::vector<uint8_t>& rewrittenResponse, bool muted)
427{
428 if (!applyRulesToResponse(localRespRulactions, dr)) {
429 return false;
430 }
431
432 bool zeroScope = false;
433 if (!fixUpResponse(response, responseLen, responseSize, *dr.qname, dr.origFlags, dr.ednsAdded, dr.ecsAdded, rewrittenResponse, addRoom, dr.useZeroScope ? &zeroScope : nullptr)) {
434 return false;
435 }
436
437 if (dr.packetCache && !dr.skipCache) {
438 if (!dr.useZeroScope) {
439 /* if the query was not suitable for zero-scope, for
440 example because it had an existing ECS entry so the hash is
441 not really 'no ECS', so just insert it for the existing subnet
442 since:
443 - we don't have the correct hash for a non-ECS query
444 - inserting with hash computed before the ECS replacement but with
445 the subnet extracted _after_ the replacement would not work.
446 */
447 zeroScope = false;
448 }
449 // if zeroScope, pass the pre-ECS hash-key and do not pass the subnet to the cache
450 dr.packetCache->insert(zeroScope ? dr.cacheKeyNoECS : dr.cacheKey, zeroScope ? boost::none : dr.subnet, dr.origFlags, dr.dnssecOK, *dr.qname, dr.qtype, dr.qclass, *response, *responseLen, dr.tcp, dr.dh->rcode, dr.tempFailureTTL);
451 }
452
453#ifdef HAVE_DNSCRYPT
454 if (!muted) {
455 if (!encryptResponse(*response, responseLen, *responseSize, dr.tcp, dr.dnsCryptQuery, nullptr, nullptr)) {
456 return false;
457 }
458 }
7129b5c4 459#endif /* HAVE_DNSCRYPT */
3e425868
RG
460
461 return true;
462}
463
464static bool sendUDPResponse(int origFD, const char* response, const uint16_t responseLen, const int delayMsec, const ComboAddress& origDest, const ComboAddress& origRemote)
0f72fd5c 465{
fcffc585
RG
466 if(delayMsec && g_delay) {
467 DelayedPacket dp{origFD, string(response,responseLen), origRemote, origDest};
468 g_delay->submit(dp, delayMsec);
469 }
470 else {
20b019fa
RG
471 ssize_t res;
472 if(origDest.sin4.sin_family == 0) {
3e425868 473 res = sendto(origFD, response, responseLen, 0, reinterpret_cast<const struct sockaddr*>(&origRemote), origRemote.getSocklen());
20b019fa
RG
474 }
475 else {
476 res = sendfromto(origFD, response, responseLen, 0, origDest, origRemote);
477 }
478 if (res == -1) {
479 int err = errno;
480 vinfolog("Error sending response to %s: %s", origRemote.toStringWithPort(), strerror(err));
481 }
fcffc585
RG
482 }
483
484 return true;
485}
486
150105a2 487
4ab01344 488static int pickBackendSocketForSending(std::shared_ptr<DownstreamState>& state)
150105a2 489{
5bdbb83d 490 return state->sockets[state->socketsOffset++ % state->sockets.size()];
150105a2
RG
491}
492
5bdbb83d 493static void pickBackendSocketsReadyForReceiving(const std::shared_ptr<DownstreamState>& state, std::vector<int>& ready)
150105a2 494{
5bdbb83d 495 ready.clear();
150105a2 496
5bdbb83d
RG
497 if (state->sockets.size() == 1) {
498 ready.push_back(state->sockets[0]);
499 return ;
150105a2
RG
500 }
501
5bdbb83d
RG
502 {
503 std::lock_guard<std::mutex> lock(state->socketsLock);
504 state->mplexer->getAvailableFDs(ready, -1);
150105a2 505 }
150105a2
RG
506}
507
4632045b 508// listens on a dedicated socket, lobs answers from downstream servers to original requestors
9b73b71c 509void responderThread(std::shared_ptr<DownstreamState> dss)
66d1c0bf 510try {
519f5484 511 setThreadName("dnsdist/respond");
d8c19b98 512 auto localRespRulactions = g_resprulactions.getLocal();
11e1e08b 513 char packet[4096 + DNSCRYPT_MAX_RESPONSE_PADDING_AND_MAC_SIZE];
b50ee135 514 static_assert(sizeof(packet) <= UINT16_MAX, "Packet size should fit in a uint16_t");
3e425868
RG
515 /* when the answer is encrypted in place, we need to get a copy
516 of the original header before encryption to fill the ring buffer */
517 dnsheader cleartextDH;
ca404e94 518 vector<uint8_t> rewrittenResponse;
7267213a 519
66d1c0bf 520 uint16_t queryId = 0;
5bdbb83d
RG
521 std::vector<int> sockets;
522 sockets.reserve(dss->sockets.size());
523
7730131a 524 for(;;) {
57847d65 525 dnsheader* dh = reinterpret_cast<struct dnsheader*>(packet);
66d1c0bf 526 try {
5bdbb83d
RG
527 pickBackendSocketsReadyForReceiving(dss, sockets);
528 for (const auto& fd : sockets) {
529 ssize_t got = recv(fd, packet, sizeof(packet), 0);
530 char * response = packet;
531 size_t responseSize = sizeof(packet);
ca404e94 532
2aa2c0aa 533 if (got < 0 || static_cast<size_t>(got) < sizeof(dnsheader))
5bdbb83d 534 continue;
4f380af3 535
3e425868 536 uint16_t responseLen = static_cast<uint16_t>(got);
5bdbb83d 537 queryId = dh->id;
b50ee135 538
5bdbb83d
RG
539 if(queryId >= dss->idStates.size())
540 continue;
4f380af3 541
5bdbb83d
RG
542 IDState* ids = &dss->idStates[queryId];
543 int origFD = ids->origFD;
c9ba8478 544
5bdbb83d
RG
545 if(origFD < 0) // duplicate
546 continue;
7267213a 547
5bdbb83d
RG
548 /* setting age to 0 to prevent the maintainer thread from
549 cleaning this IDS while we process the response.
550 We have already a copy of the origFD, so it would
551 mostly mess up the outstanding counter.
552 */
553 ids->age = 0;
fcffc585 554
e7c732b8
RG
555 unsigned int consumed = 0;
556 if (!responseContentMatches(response, responseLen, ids->qname, ids->qtype, ids->qclass, dss->remote, consumed)) {
5bdbb83d
RG
557 continue;
558 }
7267213a 559
71b86bd8
RG
560 int oldFD = ids->origFD.exchange(-1);
561 if (oldFD == origFD) {
562 /* we only decrement the outstanding counter if the value was not
563 altered in the meantime, which would mean that the state has been actively reused
564 and the other thread has not incremented the outstanding counter, so we don't
565 want it to be decremented twice. */
566 --dss->outstanding; // you'd think an attacker could game this, but we're using connected socket
567 }
2c5db3f7 568
5bdbb83d 569 if(dh->tc && g_truncateTC) {
e0fd37ec 570 truncateTC(response, &responseLen, responseSize, consumed);
5bdbb83d 571 }
aeb36780 572
5bdbb83d 573 dh->id = ids->origID;
ca404e94 574
5bdbb83d 575 uint16_t addRoom = 0;
d0ae6360
RG
576 DNSResponse dr = makeDNSResponseFromIDState(*ids, dh, sizeof(packet), responseLen, false);
577 if (dr.dnsCryptQuery) {
5bdbb83d
RG
578 addRoom = DNSCRYPT_MAX_RESPONSE_PADDING_AND_MAC_SIZE;
579 }
ca404e94 580
3e425868
RG
581 memcpy(&cleartextDH, dr.dh, sizeof(cleartextDH));
582 if (!processResponse(&response, &responseLen, &responseSize, localRespRulactions, dr, addRoom, rewrittenResponse, ids->cs && ids->cs->muted)) {
583 continue;
5bdbb83d 584 }
886e2cf2 585
5bdbb83d 586 if (ids->cs && !ids->cs->muted) {
5bdbb83d
RG
587 ComboAddress empty;
588 empty.sin4.sin_family = 0;
589 /* if ids->destHarvested is false, origDest holds the listening address.
590 We don't want to use that as a source since it could be 0.0.0.0 for example. */
3e425868 591 sendUDPResponse(origFD, response, responseLen, dr.delayMsec, ids->destHarvested ? ids->origDest : empty, ids->origRemote);
5bdbb83d 592 }
66d1c0bf 593
cb167afd 594 ++g_stats.responses;
66d1c0bf 595
5bdbb83d 596 double udiff = ids->sentTime.udiff();
3e425868 597 vinfolog("Got answer from %s, relayed to %s, took %f usec", dss->remote.toStringWithPort(), dr.remote->toStringWithPort(), udiff);
66d1c0bf 598
01f6920b
RG
599 struct timespec ts;
600 gettime(&ts);
3e425868 601 g_rings.insertResponse(ts, *dr.remote, *dr.qname, dr.qtype, static_cast<unsigned int>(udiff), static_cast<unsigned int>(got), cleartextDH, dss->remote);
66d1c0bf 602
61d10a4d
MH
603 switch (dh->rcode) {
604 case RCode::NXDomain:
605 ++g_stats.frontendNXDomain;
606 break;
607 case RCode::ServFail:
cb167afd 608 ++g_stats.servfailResponses;
61d10a4d
MH
609 ++g_stats.frontendServFail;
610 break;
611 case RCode::NoError:
612 ++g_stats.frontendNoError;
613 break;
cb167afd 614 }
5bdbb83d 615 dss->latencyUsec = (127.0 * dss->latencyUsec / 128.0) + udiff/128.0;
66d1c0bf 616
5bdbb83d 617 doLatencyStats(udiff);
fcadd56e 618
5bdbb83d
RG
619 rewrittenResponse.clear();
620 }
66d1c0bf 621 }
df560083 622 catch(const std::exception& e){
cd7fa253 623 vinfolog("Got an error in UDP responder thread while parsing a response from %s, id %d: %s", dss->remote.toStringWithPort(), queryId, e.what());
66d1c0bf 624 }
7730131a 625 }
7730131a 626}
66d1c0bf
RG
627catch(const std::exception& e)
628{
629 errlog("UDP responder thread died because of exception: %s", e.what());
66d1c0bf
RG
630}
631catch(const PDNSException& e)
632{
633 errlog("UDP responder thread died because of PowerDNS exception: %s", e.reason);
66d1c0bf
RG
634}
635catch(...)
636{
637 errlog("UDP responder thread died because of an exception: %s", "unknown");
66d1c0bf 638}
24d5cb00 639
5d7e6765 640bool DownstreamState::reconnect()
3c115e0f 641{
5d7e6765
RG
642 std::unique_lock<std::mutex> tl(connectLock, std::try_to_lock);
643 if (!tl.owns_lock()) {
644 /* we are already reconnecting */
645 return false;
646 }
647
b58f08e5 648 connected = false;
5bdbb83d 649 for (auto& fd : sockets) {
150105a2 650 if (fd != -1) {
5d7e6765 651 if (sockets.size() > 1) {
5bdbb83d
RG
652 std::lock_guard<std::mutex> lock(socketsLock);
653 mplexer->removeReadFD(fd);
654 }
150105a2
RG
655 /* shutdown() is needed to wake up recv() in the responderThread */
656 shutdown(fd, SHUT_RDWR);
657 close(fd);
658 fd = -1;
f99e3aaf 659 }
150105a2
RG
660 if (!IsAnyAddress(remote)) {
661 fd = SSocket(remote.sin4.sin_family, SOCK_DGRAM, 0);
662 if (!IsAnyAddress(sourceAddr)) {
663 SSetsockopt(fd, SOL_SOCKET, SO_REUSEADDR, 1);
664 SBind(fd, sourceAddr);
665 }
666 try {
667 SConnect(fd, remote);
5d7e6765 668 if (sockets.size() > 1) {
5bdbb83d
RG
669 std::lock_guard<std::mutex> lock(socketsLock);
670 mplexer->addReadFD(fd, [](int, boost::any) {});
671 }
150105a2
RG
672 connected = true;
673 }
674 catch(const std::runtime_error& error) {
675 infolog("Error connecting to new server with address %s: %s", remote.toStringWithPort(), error.what());
38069e7e
RG
676 connected = false;
677 break;
678 }
7565f4e6 679 }
38069e7e
RG
680 }
681
682 /* if at least one (re-)connection failed, close all sockets */
683 if (!connected) {
5bdbb83d 684 for (auto& fd : sockets) {
38069e7e 685 if (fd != -1) {
5d7e6765
RG
686 if (sockets.size() > 1) {
687 std::lock_guard<std::mutex> lock(socketsLock);
688 mplexer->removeReadFD(fd);
689 }
38069e7e
RG
690 /* shutdown() is needed to wake up recv() in the responderThread */
691 shutdown(fd, SHUT_RDWR);
692 close(fd);
693 fd = -1;
150105a2 694 }
7565f4e6 695 }
b58f08e5 696 }
5d7e6765
RG
697
698 return connected;
b58f08e5 699}
f2caf657
CHB
700void DownstreamState::hash()
701{
d58e616a 702 vinfolog("Computing hashes for id=%s and weight=%d", id, weight);
f2caf657 703 auto w = weight;
d58e616a 704 WriteLock wl(&d_lock);
f2caf657
CHB
705 hashes.clear();
706 while (w > 0) {
707 std::string uuid = boost::str(boost::format("%s-%d") % id % w);
708 unsigned int wshash = burtleCI((const unsigned char*)uuid.c_str(), uuid.size(), g_hashperturb);
709 hashes.insert(wshash);
710 --w;
711 }
712}
713
714void DownstreamState::setId(const boost::uuids::uuid& newId)
715{
716 id = newId;
d58e616a
CHB
717 // compute hashes only if already done
718 if (!hashes.empty()) {
719 hash();
720 }
f2caf657
CHB
721}
722
723void DownstreamState::setWeight(int newWeight)
724{
5a2bbe8c
CHB
725 if (newWeight < 1) {
726 errlog("Error setting server's weight: downstream weight value must be greater than 0.");
727 return ;
728 }
f2caf657 729 weight = newWeight;
d58e616a
CHB
730 if (!hashes.empty()) {
731 hash();
732 }
f2caf657 733}
b58f08e5 734
150105a2 735DownstreamState::DownstreamState(const ComboAddress& remote_, const ComboAddress& sourceAddr_, unsigned int sourceItf_, size_t numberOfSockets): remote(remote_), sourceAddr(sourceAddr_), sourceItf(sourceItf_)
b58f08e5 736{
d58e616a 737 pthread_rwlock_init(&d_lock, nullptr);
d61aa945 738 id = getUniqueID();
5d7e6765
RG
739 threadStarted.clear();
740
5bdbb83d
RG
741 mplexer = std::unique_ptr<FDMultiplexer>(FDMultiplexer::getMultiplexerSilent());
742
743 sockets.resize(numberOfSockets);
744 for (auto& fd : sockets) {
150105a2
RG
745 fd = -1;
746 }
747
b58f08e5
RG
748 if (!IsAnyAddress(remote)) {
749 reconnect();
f99e3aaf
RG
750 idStates.resize(g_maxOutstanding);
751 sw.start();
752 infolog("Added downstream server %s", remote.toStringWithPort());
fbe2a2e0 753 }
1720247e 754
3c115e0f 755}
756
773470ca 757std::mutex g_luamutex;
3f25bce6 758LuaContext g_lua;
3f25bce6 759
ecbe9133 760GlobalStateHolder<ServerPolicy> g_policy;
773470ca 761
497a6e3a 762shared_ptr<DownstreamState> firstAvailable(const NumberedServerVector& servers, const DNSQuestion* dq)
773470ca 763{
22b2b326 764 for(auto& d : servers) {
da4e7813 765 if(d.second->isUp() && d.second->qps.check())
766 return d.second;
773470ca 767 }
497a6e3a 768 return leastOutstanding(servers, dq);
2c5db3f7 769}
770
d1fba58e 771// get server with least outstanding queries, and within those, with the lowest order, and within those: the fastest
497a6e3a 772shared_ptr<DownstreamState> leastOutstanding(const NumberedServerVector& servers, const DNSQuestion* dq)
c9262563 773{
886e2cf2
RG
774 if (servers.size() == 1 && servers[0].second->isUp()) {
775 return servers[0].second;
776 }
777
d1fba58e 778 vector<pair<tuple<int,int,double>, shared_ptr<DownstreamState>>> poss;
779 /* so you might wonder, why do we go through this trouble? The data on which we sort could change during the sort,
780 which would suck royally and could even lead to crashes. So first we snapshot on what we sort, and then we sort */
478ce172 781 poss.reserve(servers.size());
0e41337b 782 for(auto& d : servers) {
da4e7813 783 if(d.second->isUp()) {
d1fba58e 784 poss.push_back({make_tuple(d.second->outstanding.load(), d.second->order, d.second->latencyUsec), d.second});
c9262563 785 }
786 }
787 if(poss.empty())
788 return shared_ptr<DownstreamState>();
789 nth_element(poss.begin(), poss.begin(), poss.end(), [](const decltype(poss)::value_type& a, const decltype(poss)::value_type& b) { return a.first < b.first; });
790 return poss.begin()->second;
791}
792
497a6e3a 793shared_ptr<DownstreamState> valrandom(unsigned int val, const NumberedServerVector& servers, const DNSQuestion* dq)
c9262563 794{
795 vector<pair<int, shared_ptr<DownstreamState>>> poss;
278403d3
DM
796 int sum = 0;
797 int max = std::numeric_limits<int>::max();
798
22b2b326 799 for(auto& d : servers) { // w=1, w=10 -> 1, 11
da4e7813 800 if(d.second->isUp()) {
278403d3
DM
801 // Don't overflow sum when adding high weights
802 if(d.second->weight > max - sum) {
803 sum = max;
804 } else {
805 sum += d.second->weight;
806 }
807
da4e7813 808 poss.push_back({sum, d.second});
c9262563 809 }
810 }
d2894425
NJ
811
812 // Catch poss & sum are empty to avoid SIGFPE
813 if(poss.empty())
814 return shared_ptr<DownstreamState>();
815
a7f3108c 816 int r = val % sum;
af619119 817 auto p = upper_bound(poss.begin(), poss.end(),r, [](int r_, const decltype(poss)::value_type& a) { return r_ < a.first;});
c9262563 818 if(p==poss.end())
819 return shared_ptr<DownstreamState>();
820 return p->second;
821}
822
497a6e3a 823shared_ptr<DownstreamState> wrandom(const NumberedServerVector& servers, const DNSQuestion* dq)
a7f3108c 824{
497a6e3a 825 return valrandom(random(), servers, dq);
a7f3108c 826}
827
36e763fa 828uint32_t g_hashperturb;
497a6e3a 829shared_ptr<DownstreamState> whashed(const NumberedServerVector& servers, const DNSQuestion* dq)
a7f3108c 830{
497a6e3a 831 return valrandom(dq->qname->hash(g_hashperturb), servers, dq);
a7f3108c 832}
833
1720247e
CHB
834shared_ptr<DownstreamState> chashed(const NumberedServerVector& servers, const DNSQuestion* dq)
835{
1720247e 836 unsigned int qhash = dq->qname->hash(g_hashperturb);
b712c51e
CHB
837 unsigned int sel = std::numeric_limits<unsigned int>::max();
838 unsigned int min = std::numeric_limits<unsigned int>::max();
839 shared_ptr<DownstreamState> ret = nullptr, first = nullptr;
1720247e
CHB
840
841 for (const auto& d: servers) {
842 if (d.second->isUp()) {
93ca495f 843 // make sure hashes have been computed
d58e616a
CHB
844 if (d.second->hashes.empty()) {
845 d.second->hash();
846 }
847 {
848 ReadLock rl(&(d.second->d_lock));
93ca495f
CHB
849 const auto& server = d.second;
850 // we want to keep track of the last hash
b712c51e
CHB
851 if (min > *(server->hashes.begin())) {
852 min = *(server->hashes.begin());
853 first = server;
93ca495f 854 }
b712c51e
CHB
855
856 auto hash_it = server->hashes.lower_bound(qhash);
857 if (hash_it != server->hashes.end()) {
858 if (*hash_it < sel) {
93ca495f
CHB
859 sel = *hash_it;
860 ret = server;
861 }
d58e616a 862 }
1720247e
CHB
863 }
864 }
865 }
93ca495f
CHB
866 if (ret != nullptr) {
867 return ret;
1720247e 868 }
b712c51e
CHB
869 if (first != nullptr) {
870 return first;
1720247e 871 }
93ca495f 872 return shared_ptr<DownstreamState>();
1720247e 873}
a7f3108c 874
497a6e3a 875shared_ptr<DownstreamState> roundrobin(const NumberedServerVector& servers, const DNSQuestion* dq)
5f504638 876{
da4e7813 877 NumberedServerVector poss;
c9262563 878
22b2b326 879 for(auto& d : servers) {
da4e7813 880 if(d.second->isUp()) {
5f504638 881 poss.push_back(d);
c9262563 882 }
5f504638 883 }
c9262563 884
ecbe9133 885 const auto *res=&poss;
5f504638 886 if(poss.empty())
ecbe9133 887 res = &servers;
c9262563 888
889 if(res->empty())
890 return shared_ptr<DownstreamState>();
891
892 static unsigned int counter;
893
da4e7813 894 return (*res)[(counter++) % res->size()].second;
5f504638 895}
896
e6841c9e 897ComboAddress g_serverControl{"127.0.0.1:5199"};
b076b34a 898
886e2cf2
RG
899std::shared_ptr<ServerPool> createPoolIfNotExists(pools_t& pools, const string& poolName)
900{
901 std::shared_ptr<ServerPool> pool;
902 pools_t::iterator it = pools.find(poolName);
903 if (it != pools.end()) {
904 pool = it->second;
905 }
906 else {
8f4f5186
RG
907 if (!poolName.empty())
908 vinfolog("Creating pool %s", poolName);
886e2cf2
RG
909 pool = std::make_shared<ServerPool>();
910 pools.insert(std::pair<std::string,std::shared_ptr<ServerPool> >(poolName, pool));
911 }
912 return pool;
913}
78c8047b 914
742c079a
RG
915void setPoolPolicy(pools_t& pools, const string& poolName, std::shared_ptr<ServerPolicy> policy)
916{
917 std::shared_ptr<ServerPool> pool = createPoolIfNotExists(pools, poolName);
918 if (!poolName.empty()) {
919 vinfolog("Setting pool %s server selection policy to %s", poolName, policy->name);
920 } else {
921 vinfolog("Setting default pool server selection policy to %s", policy->name);
922 }
923 pool->policy = policy;
924}
925
886e2cf2 926void addServerToPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server)
22b2b326 927{
886e2cf2 928 std::shared_ptr<ServerPool> pool = createPoolIfNotExists(pools, poolName);
c218b223 929 if (!poolName.empty()) {
8f4f5186 930 vinfolog("Adding server to pool %s", poolName);
c218b223 931 } else {
8f4f5186 932 vinfolog("Adding server to default pool");
c218b223 933 }
a1b1a29d 934 pool->addServer(server);
886e2cf2
RG
935}
936
937void removeServerFromPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server)
938{
8f4f5186 939 std::shared_ptr<ServerPool> pool = getPool(pools, poolName);
886e2cf2 940
c218b223 941 if (!poolName.empty()) {
8f4f5186 942 vinfolog("Removing server from pool %s", poolName);
c218b223 943 }
944 else {
8f4f5186 945 vinfolog("Removing server from default pool");
c218b223 946 }
886e2cf2 947
a1b1a29d 948 pool->removeServer(server);
886e2cf2
RG
949}
950
951std::shared_ptr<ServerPool> getPool(const pools_t& pools, const std::string& poolName)
952{
953 pools_t::const_iterator it = pools.find(poolName);
954
955 if (it == pools.end()) {
956 throw std::out_of_range("No pool named " + poolName);
957 }
958
959 return it->second;
960}
961
a1b1a29d 962NumberedServerVector getDownstreamCandidates(const pools_t& pools, const std::string& poolName)
886e2cf2
RG
963{
964 std::shared_ptr<ServerPool> pool = getPool(pools, poolName);
a1b1a29d 965 return pool->getServers();
22b2b326 966}
c9262563 967
614239d0 968static void spoofResponseFromString(DNSQuestion& dq, const string& spoofContent)
7791f83a
RG
969{
970 string result;
614239d0
RG
971
972 std::vector<std::string> addrs;
973 stringtok(addrs, spoofContent, " ,");
974
975 if (addrs.size() == 1) {
976 try {
977 ComboAddress spoofAddr(spoofContent);
978 SpoofAction sa({spoofAddr});
979 sa(&dq, &result);
980 }
981 catch(const PDNSException &e) {
982 SpoofAction sa(spoofContent); // CNAME then
983 sa(&dq, &result);
984 }
985 } else {
986 std::vector<ComboAddress> cas;
987 for (const auto& addr : addrs) {
988 try {
989 cas.push_back(ComboAddress(addr));
990 }
991 catch (...) {
992 }
993 }
994 SpoofAction sa(cas);
6ca7a40a 995 sa(&dq, &result);
7791f83a
RG
996 }
997}
998
4ab01344 999static bool applyRulesToQuery(LocalHolders& holders, DNSQuestion& dq, string& poolname, const struct timespec& now)
e91084ce 1000{
4ab01344 1001 g_rings.insertQuery(now, *dq.remote, *dq.qname, dq.qtype, dq.len, *dq.dh);
e91084ce 1002
786e4d8c
RS
1003 if(g_qcount.enabled) {
1004 string qname = (*dq.qname).toString(".");
1005 bool countQuery{true};
1006 if(g_qcount.filter) {
1007 std::lock_guard<std::mutex> lock(g_luamutex);
1008 std::tie (countQuery, qname) = g_qcount.filter(dq);
1009 }
1010
1011 if(countQuery) {
1012 WriteLock wl(&g_qcount.queryLock);
1013 if(!g_qcount.records.count(qname)) {
1014 g_qcount.records[qname] = 0;
1015 }
1016 g_qcount.records[qname]++;
1017 }
1018 }
1019
0beaa5c8 1020 if(auto got = holders.dynNMGBlock->lookup(*dq.remote)) {
701f690b 1021 auto updateBlockStats = [&got]() {
cb167afd 1022 ++g_stats.dynBlocked;
701f690b 1023 got->second.blocks++;
1024 };
1025
e91084ce 1026 if(now < got->second.until) {
7b925432
RG
1027 DNSAction::Action action = got->second.action;
1028 if (action == DNSAction::Action::None) {
1029 action = g_dynBlockAction;
1030 }
477c86a0
RG
1031 switch (action) {
1032 case DNSAction::Action::NoOp:
1033 /* do nothing */
1034 break;
79ee8ff9
RG
1035
1036 case DNSAction::Action::Nxdomain:
1037 vinfolog("Query from %s turned into NXDomain because of dynamic block", dq.remote->toStringWithPort());
1038 updateBlockStats();
1039
1040 dq.dh->rcode = RCode::NXDomain;
1041 dq.dh->qr=true;
1042 return true;
1043
477c86a0 1044 case DNSAction::Action::Refused:
dd46e5e3 1045 vinfolog("Query from %s refused because of dynamic block", dq.remote->toStringWithPort());
701f690b 1046 updateBlockStats();
8477236d 1047
dd46e5e3 1048 dq.dh->rcode = RCode::Refused;
79ee8ff9 1049 dq.dh->qr = true;
dd46e5e3 1050 return true;
477c86a0
RG
1051
1052 case DNSAction::Action::Truncate:
8477236d 1053 if(!dq.tcp) {
701f690b 1054 updateBlockStats();
8477236d 1055 vinfolog("Query from %s truncated because of dynamic block", dq.remote->toStringWithPort());
1056 dq.dh->tc = true;
1057 dq.dh->qr = true;
1058 return true;
1059 }
1060 else {
1061 vinfolog("Query from %s for %s over TCP *not* truncated because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toString());
1062 }
477c86a0 1063 break;
3d60b39a 1064 case DNSAction::Action::NoRecurse:
1065 updateBlockStats();
1066 vinfolog("Query from %s setting rd=0 because of dynamic block", dq.remote->toStringWithPort());
1067 dq.dh->rd = false;
1068 return true;
477c86a0 1069 default:
701f690b 1070 updateBlockStats();
dd46e5e3
RG
1071 vinfolog("Query from %s dropped because of dynamic block", dq.remote->toStringWithPort());
1072 return false;
1073 }
e91084ce
RG
1074 }
1075 }
1076
0beaa5c8 1077 if(auto got = holders.dynSMTBlock->lookup(*dq.qname)) {
701f690b 1078 auto updateBlockStats = [&got]() {
cb167afd 1079 ++g_stats.dynBlocked;
701f690b 1080 got->blocks++;
1081 };
1082
71c94675 1083 if(now < got->until) {
7b925432
RG
1084 DNSAction::Action action = got->action;
1085 if (action == DNSAction::Action::None) {
1086 action = g_dynBlockAction;
1087 }
477c86a0
RG
1088 switch (action) {
1089 case DNSAction::Action::NoOp:
1090 /* do nothing */
1091 break;
79ee8ff9
RG
1092 case DNSAction::Action::Nxdomain:
1093 vinfolog("Query from %s for %s turned into NXDomain because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toString());
1094 updateBlockStats();
1095
1096 dq.dh->rcode = RCode::NXDomain;
1097 dq.dh->qr=true;
1098 return true;
477c86a0 1099 case DNSAction::Action::Refused:
dd46e5e3 1100 vinfolog("Query from %s for %s refused because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toString());
701f690b 1101 updateBlockStats();
8477236d 1102
dd46e5e3
RG
1103 dq.dh->rcode = RCode::Refused;
1104 dq.dh->qr=true;
1105 return true;
477c86a0 1106 case DNSAction::Action::Truncate:
8477236d 1107 if(!dq.tcp) {
701f690b 1108 updateBlockStats();
8477236d 1109
1110 vinfolog("Query from %s for %s truncated because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toString());
1111 dq.dh->tc = true;
1112 dq.dh->qr = true;
1113 return true;
1114 }
1115 else {
1116 vinfolog("Query from %s for %s over TCP *not* truncated because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toString());
1117 }
477c86a0 1118 break;
3d60b39a 1119 case DNSAction::Action::NoRecurse:
1120 updateBlockStats();
1121 vinfolog("Query from %s setting rd=0 because of dynamic block", dq.remote->toStringWithPort());
1122 dq.dh->rd = false;
1123 return true;
477c86a0 1124 default:
701f690b 1125 updateBlockStats();
dd46e5e3
RG
1126 vinfolog("Query from %s for %s dropped because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toString());
1127 return false;
1128 }
71c94675 1129 }
1130 }
1131
e91084ce
RG
1132 DNSAction::Action action=DNSAction::Action::None;
1133 string ruleresult;
0beaa5c8 1134 for(const auto& lr : *holders.rulactions) {
4d5959e6
RG
1135 if(lr.d_rule->matches(&dq)) {
1136 lr.d_rule->d_matches++;
1137 action=(*lr.d_action)(&dq, &ruleresult);
e91084ce
RG
1138
1139 switch(action) {
1140 case DNSAction::Action::Allow:
1141 return true;
1142 break;
1143 case DNSAction::Action::Drop:
cb167afd 1144 ++g_stats.ruleDrop;
e91084ce
RG
1145 return false;
1146 break;
1147 case DNSAction::Action::Nxdomain:
1148 dq.dh->rcode = RCode::NXDomain;
1149 dq.dh->qr=true;
cb167afd 1150 ++g_stats.ruleNXDomain;
e91084ce
RG
1151 return true;
1152 break;
dd46e5e3
RG
1153 case DNSAction::Action::Refused:
1154 dq.dh->rcode = RCode::Refused;
1155 dq.dh->qr=true;
cb167afd 1156 ++g_stats.ruleRefused;
dd46e5e3
RG
1157 return true;
1158 break;
5f23eb98
CH
1159 case DNSAction::Action::ServFail:
1160 dq.dh->rcode = RCode::ServFail;
1161 dq.dh->qr=true;
cb167afd 1162 ++g_stats.ruleServFail;
5f23eb98
CH
1163 return true;
1164 break;
e91084ce
RG
1165 case DNSAction::Action::Spoof:
1166 spoofResponseFromString(dq, ruleresult);
1167 return true;
1168 break;
c4f5aeff
RG
1169 case DNSAction::Action::Truncate:
1170 dq.dh->tc = true;
1171 dq.dh->qr = true;
1172 return true;
1173 break;
e91084ce
RG
1174 case DNSAction::Action::HeaderModify:
1175 return true;
1176 break;
1177 case DNSAction::Action::Pool:
1178 poolname=ruleresult;
1179 return true;
1180 break;
1181 /* non-terminal actions follow */
1182 case DNSAction::Action::Delay:
4ab01344 1183 dq.delayMsec = static_cast<int>(pdns_stou(ruleresult)); // sorry
e91084ce
RG
1184 break;
1185 case DNSAction::Action::None:
477c86a0
RG
1186 /* fall-through */
1187 case DNSAction::Action::NoOp:
e91084ce 1188 break;
3d60b39a 1189 case DNSAction::Action::NoRecurse:
1190 dq.dh->rd = false;
1191 return true;
1192 break;
e91084ce
RG
1193 }
1194 }
1195 }
1196
1197 return true;
1198}
1199
4ab01344 1200static ssize_t udpClientSendRequestToBackend(const std::shared_ptr<DownstreamState>& ss, const int sd, const char* request, const size_t requestLen, bool healthCheck=false)
fbe2a2e0 1201{
b58f08e5
RG
1202 ssize_t result;
1203
fbe2a2e0 1204 if (ss->sourceItf == 0) {
b58f08e5
RG
1205 result = send(sd, request, requestLen, 0);
1206 }
1207 else {
1208 struct msghdr msgh;
1209 struct iovec iov;
1210 char cbuf[256];
a2353842
RG
1211 ComboAddress remote(ss->remote);
1212 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), const_cast<char*>(request), requestLen, &remote);
b58f08e5
RG
1213 addCMsgSrcAddr(&msgh, cbuf, &ss->sourceAddr, ss->sourceItf);
1214 result = sendmsg(sd, &msgh, 0);
fbe2a2e0
RG
1215 }
1216
b58f08e5
RG
1217 if (result == -1) {
1218 int savederrno = errno;
1219 vinfolog("Error sending request to backend %s: %d", ss->remote.toStringWithPort(), savederrno);
1220
1221 /* This might sound silly, but on Linux send() might fail with EINVAL
1b126225
RG
1222 if the interface the socket was bound to doesn't exist anymore.
1223 We don't want to reconnect the real socket if the healthcheck failed,
1224 because it's not using the same socket.
1225 */
1226 if (!healthCheck && (savederrno == EINVAL || savederrno == ENODEV)) {
b58f08e5
RG
1227 ss->reconnect();
1228 }
fbe2a2e0
RG
1229 }
1230
b58f08e5 1231 return result;
fbe2a2e0
RG
1232}
1233
0beaa5c8 1234static bool isUDPQueryAcceptable(ClientState& cs, LocalHolders& holders, const struct msghdr* msgh, const ComboAddress& remote, ComboAddress& dest)
24d5cb00 1235{
0beaa5c8
RG
1236 if (msgh->msg_flags & MSG_TRUNC) {
1237 /* message was too large for our buffer */
1238 vinfolog("Dropping message too large for our buffer");
cb167afd 1239 ++g_stats.nonCompliantQueries;
0beaa5c8
RG
1240 return false;
1241 }
a4652d55 1242
0beaa5c8
RG
1243 if(!holders.acl->match(remote)) {
1244 vinfolog("Query from %s dropped because of ACL", remote.toStringWithPort());
cb167afd 1245 ++g_stats.aclDrops;
0beaa5c8 1246 return false;
2b3eefc3 1247 }
2b3eefc3 1248
0beaa5c8 1249 cs.queries++;
cb167afd 1250 ++g_stats.queries;
2b3eefc3 1251
0beaa5c8
RG
1252 if (HarvestDestinationAddress(msgh, &dest)) {
1253 /* we don't get the port, only the address */
1254 dest.sin4.sin_port = cs.local.sin4.sin_port;
1255 }
1256 else {
1257 dest.sin4.sin_family = 0;
2b3eefc3 1258 }
549d63c9 1259
0beaa5c8
RG
1260 return true;
1261}
1262
4ab01344 1263boost::optional<std::vector<uint8_t>> checkDNSCryptQuery(const ClientState& cs, const char* query, uint16_t& len, std::shared_ptr<DNSCryptQuery>& dnsCryptQuery, time_t now, bool tcp)
0beaa5c8
RG
1264{
1265 if (cs.dnscryptCtx) {
7129b5c4 1266#ifdef HAVE_DNSCRYPT
0beaa5c8
RG
1267 vector<uint8_t> response;
1268 uint16_t decryptedQueryLen = 0;
2b3eefc3 1269
43234e76 1270 dnsCryptQuery = std::make_shared<DNSCryptQuery>(cs.dnscryptCtx);
0beaa5c8 1271
4ab01344 1272 bool decrypted = handleDNSCryptQuery(const_cast<char*>(query), len, dnsCryptQuery, &decryptedQueryLen, tcp, now, response);
0beaa5c8
RG
1273
1274 if (!decrypted) {
1275 if (response.size() > 0) {
4ab01344 1276 return response;
2b3eefc3 1277 }
4ab01344 1278 throw std::runtime_error("Unable to decrypt DNSCrypt query, dropping.");
0beaa5c8 1279 }
2b3eefc3 1280
0beaa5c8 1281 len = decryptedQueryLen;
7129b5c4 1282#endif /* HAVE_DNSCRYPT */
0beaa5c8 1283 }
4ab01344 1284 return boost::none;
0beaa5c8 1285}
2b3eefc3 1286
0beaa5c8
RG
1287bool checkQueryHeaders(const struct dnsheader* dh)
1288{
1289 if (dh->qr) { // don't respond to responses
cb167afd 1290 ++g_stats.nonCompliantQueries;
0beaa5c8
RG
1291 return false;
1292 }
2b3eefc3 1293
0beaa5c8 1294 if (dh->qdcount == 0) {
cb167afd 1295 ++g_stats.emptyQueries;
0beaa5c8
RG
1296 return false;
1297 }
0ba5eecf 1298
0beaa5c8 1299 if (dh->rd) {
cb167afd 1300 ++g_stats.rdQueries;
0beaa5c8 1301 }
e91084ce 1302
0beaa5c8
RG
1303 return true;
1304}
963bef8d 1305
0beaa5c8
RG
1306#if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE)
1307static void queueResponse(const ClientState& cs, const char* response, uint16_t responseLen, const ComboAddress& dest, const ComboAddress& remote, struct mmsghdr& outMsg, struct iovec* iov, char* cbuf)
1308{
1309 outMsg.msg_len = 0;
1310 fillMSGHdr(&outMsg.msg_hdr, iov, nullptr, 0, const_cast<char*>(response), responseLen, const_cast<ComboAddress*>(&remote));
11e1e08b 1311
0beaa5c8
RG
1312 if (dest.sin4.sin_family == 0) {
1313 outMsg.msg_hdr.msg_control = nullptr;
1314 }
1315 else {
1316 addCMsgSrcAddr(&outMsg.msg_hdr, cbuf, &dest, 0);
1317 }
1318}
1319#endif /* defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) */
43eeadc1 1320
3e425868
RG
1321/* self-generated responses or cache hits */
1322static bool prepareOutgoingResponse(LocalHolders& holders, ClientState& cs, DNSQuestion& dq, bool cacheHit)
54aaa82b 1323{
3e425868 1324 DNSResponse dr(dq.qname, dq.qtype, dq.qclass, dq.consumed, dq.local, dq.remote, reinterpret_cast<dnsheader*>(dq.dh), dq.size, dq.len, dq.tcp, dq.queryTime);
4ab01344 1325
54aaa82b 1326#ifdef HAVE_PROTOBUF
1327 dr.uniqueId = dq.uniqueId;
1328#endif
1329 dr.qTag = dq.qTag;
3e425868 1330 dr.delayMsec = dq.delayMsec;
54aaa82b 1331
3e425868
RG
1332 if (!applyRulesToResponse(cacheHit ? holders.cacheHitRespRulactions : holders.selfAnsweredRespRulactions, dr)) {
1333 return false;
54aaa82b 1334 }
1335
3e425868
RG
1336 /* in case a rule changed it */
1337 dq.delayMsec = dr.delayMsec;
1338
54aaa82b 1339#ifdef HAVE_DNSCRYPT
7129b5c4 1340 if (!cs.muted) {
3e425868
RG
1341 if (!encryptResponse(reinterpret_cast<char*>(dq.dh), &dq.len, dq.size, dq.tcp, dq.dnsCryptQuery, nullptr, nullptr)) {
1342 return false;
54aaa82b 1343 }
54aaa82b 1344 }
7129b5c4 1345#endif /* HAVE_DNSCRYPT */
54aaa82b 1346
389d903a
RG
1347 if (cacheHit) {
1348 ++g_stats.cacheHits;
1349 }
3e425868 1350
61d10a4d
MH
1351 switch (dr.dh->rcode) {
1352 case RCode::NXDomain:
1353 ++g_stats.frontendNXDomain;
1354 break;
1355 case RCode::ServFail:
1356 ++g_stats.frontendServFail;
1357 break;
1358 case RCode::NoError:
1359 ++g_stats.frontendNoError;
1360 break;
1361 }
3e425868 1362
54aaa82b 1363 doLatencyStats(0); // we're not going to measure this
3e425868 1364 return true;
54aaa82b 1365}
1366
3e425868 1367ProcessQueryResult processQuery(DNSQuestion& dq, ClientState& cs, LocalHolders& holders, std::shared_ptr<DownstreamState>& selectedBackend)
0beaa5c8 1368{
4ab01344 1369 const uint16_t queryId = ntohs(dq.dh->id);
2b3eefc3 1370
0beaa5c8 1371 try {
43234e76
RG
1372 /* we need an accurate ("real") value for the response and
1373 to store into the IDS, but not for insertion into the
1374 rings for example */
43234e76
RG
1375 struct timespec now;
1376 gettime(&now);
2efd427d 1377
0beaa5c8 1378 string poolname;
0beaa5c8 1379
4ab01344 1380 if (!applyRulesToQuery(holders, dq, poolname, now)) {
3e425868 1381 return ProcessQueryResult::Drop;
0beaa5c8 1382 }
b63add03 1383
0beaa5c8 1384 if(dq.dh->qr) { // something turned it into a response
4ab01344 1385 fixUpQueryTurnedResponse(dq, dq.origFlags);
0beaa5c8 1386
3e425868
RG
1387 if (!prepareOutgoingResponse(holders, cs, dq, false)) {
1388 return ProcessQueryResult::Drop;
22b2b326 1389 }
5f504638 1390
3e425868
RG
1391 ++g_stats.selfAnswered;
1392 return ProcessQueryResult::SendAnswer;
0beaa5c8
RG
1393 }
1394
0beaa5c8 1395 std::shared_ptr<ServerPool> serverPool = getPool(*holders.pools, poolname);
4ab01344 1396 dq.packetCache = serverPool->packetCache;
a1b1a29d 1397 auto policy = *(holders.policy);
0beaa5c8 1398 if (serverPool->policy != nullptr) {
a1b1a29d 1399 policy = *(serverPool->policy);
0beaa5c8 1400 }
a1b1a29d
RG
1401 auto servers = serverPool->getServers();
1402 if (policy.isLua) {
0beaa5c8 1403 std::lock_guard<std::mutex> lock(g_luamutex);
3e425868 1404 selectedBackend = policy.policy(servers, &dq);
a1b1a29d
RG
1405 }
1406 else {
3e425868 1407 selectedBackend = policy.policy(servers, &dq);
0beaa5c8 1408 }
228e4fe8 1409
9837850d 1410 uint16_t cachedResponseSize = dq.size;
3e425868 1411 uint32_t allowExpired = selectedBackend ? 0 : g_staleCacheEntriesTTL;
9837850d 1412
4ab01344
RG
1413 if (dq.packetCache && !dq.skipCache) {
1414 dq.dnssecOK = (getEDNSZ(dq) & EDNS_HEADER_FLAG_DO);
1ef18cab 1415 }
1416
3e425868 1417 if (dq.useECS && ((selectedBackend && selectedBackend->useECS) || (!selectedBackend && serverPool->getECS()))) {
389d903a 1418 // we special case our cache in case a downstream explicitly gave us a universally valid response with a 0 scope
3e425868 1419 if (dq.packetCache && !dq.skipCache && (!selectedBackend || !selectedBackend->disableZeroScope) && dq.packetCache->isECSParsingEnabled()) {
4ab01344 1420 if (dq.packetCache->get(dq, dq.consumed, dq.dh->id, reinterpret_cast<char*>(dq.dh), &cachedResponseSize, &dq.cacheKeyNoECS, dq.subnet, dq.dnssecOK, allowExpired)) {
3e425868
RG
1421 dq.len = cachedResponseSize;
1422
1423 if (!prepareOutgoingResponse(holders, cs, dq, true)) {
1424 return ProcessQueryResult::Drop;
1425 }
1426
1427 return ProcessQueryResult::SendAnswer;
389d903a
RG
1428 }
1429
4ab01344 1430 if (!dq.subnet) {
389d903a 1431 /* there was no existing ECS on the query, enable the zero-scope feature */
4ab01344 1432 dq.useZeroScope = true;
389d903a 1433 }
9837850d 1434 }
389d903a 1435
4ab01344
RG
1436 if (!handleEDNSClientSubnet(dq, &(dq.ednsAdded), &(dq.ecsAdded), g_preserveTrailingData)) {
1437 vinfolog("Dropping query from %s because we couldn't insert the ECS value", dq.remote->toStringWithPort());
3e425868 1438 return ProcessQueryResult::Drop;
886e2cf2 1439 }
0beaa5c8 1440 }
886e2cf2 1441
4ab01344
RG
1442 if (dq.packetCache && !dq.skipCache) {
1443 if (dq.packetCache->get(dq, dq.consumed, dq.dh->id, reinterpret_cast<char*>(dq.dh), &cachedResponseSize, &dq.cacheKey, dq.subnet, dq.dnssecOK, allowExpired)) {
3e425868
RG
1444 dq.len = cachedResponseSize;
1445
1446 if (!prepareOutgoingResponse(holders, cs, dq, true)) {
1447 return ProcessQueryResult::Drop;
1448 }
1449
1450 return ProcessQueryResult::SendAnswer;
886e2cf2 1451 }
cb167afd 1452 ++g_stats.cacheMisses;
0beaa5c8 1453 }
886e2cf2 1454
3e425868 1455 if(!selectedBackend) {
cb167afd 1456 ++g_stats.noPolicy;
26a3cdb7 1457
3e425868
RG
1458 vinfolog("%s query for %s|%s from %s, no policy applied", g_servFailOnNoPolicy ? "ServFailed" : "Dropped", dq.qname->toString(), QType(dq.qtype).getName(), dq.remote->toStringWithPort());
1459 if (g_servFailOnNoPolicy) {
4ab01344 1460 restoreFlags(dq.dh, dq.origFlags);
26a3cdb7 1461
0beaa5c8
RG
1462 dq.dh->rcode = RCode::ServFail;
1463 dq.dh->qr = true;
26a3cdb7 1464
3e425868
RG
1465 if (!prepareOutgoingResponse(holders, cs, dq, false)) {
1466 return ProcessQueryResult::Drop;
1467 }
be6c318f 1468 // no response-only statistics counter to update.
3e425868 1469 return ProcessQueryResult::SendAnswer;
1ea747c0 1470 }
3e425868
RG
1471
1472 return ProcessQueryResult::Drop;
0beaa5c8 1473 }
1ea747c0 1474
3e425868
RG
1475 if (dq.addXPF && selectedBackend->xpfRRCode != 0) {
1476 addXPF(dq, selectedBackend->xpfRRCode, g_preserveTrailingData);
5cc8371b
RG
1477 }
1478
3e425868
RG
1479 selectedBackend->queries++;
1480 return ProcessQueryResult::PassToBackend;
4ab01344
RG
1481 }
1482 catch(const std::exception& e){
1483 vinfolog("Got an error while parsing a %s query from %s, id %d: %s", (dq.tcp ? "TCP" : "UDP"), dq.remote->toStringWithPort(), queryId, e.what());
4ab01344 1484 }
3e425868 1485 return ProcessQueryResult::Drop;
4ab01344
RG
1486}
1487
1488static void processUDPQuery(ClientState& cs, LocalHolders& holders, const struct msghdr* msgh, const ComboAddress& remote, ComboAddress& dest, char* query, uint16_t len, size_t queryBufferSize, struct mmsghdr* responsesVect, unsigned int* queuedResponses, struct iovec* respIOV, char* respCBuf)
1489{
1490 assert(responsesVect == nullptr || (queuedResponses != nullptr && respIOV != nullptr && respCBuf != nullptr));
1491 uint16_t queryId = 0;
1492
1493 try {
1494 if (!isUDPQueryAcceptable(cs, holders, msgh, remote, dest)) {
1495 return;
1496 }
1497
1498 /* we need an accurate ("real") value for the response and
1499 to store into the IDS, but not for insertion into the
1500 rings for example */
1501 struct timespec queryRealTime;
4ab01344
RG
1502 gettime(&queryRealTime, true);
1503
1504 std::shared_ptr<DNSCryptQuery> dnsCryptQuery = nullptr;
4ab01344
RG
1505 auto dnsCryptResponse = checkDNSCryptQuery(cs, query, len, dnsCryptQuery, queryRealTime.tv_sec, false);
1506 if (dnsCryptResponse) {
1507 sendUDPResponse(cs.udpFD, reinterpret_cast<char*>(dnsCryptResponse->data()), static_cast<uint16_t>(dnsCryptResponse->size()), 0, dest, remote);
1508 return;
1509 }
4ab01344
RG
1510
1511 struct dnsheader* dh = reinterpret_cast<struct dnsheader*>(query);
1512 queryId = ntohs(dh->id);
1513
1514 if (!checkQueryHeaders(dh)) {
1515 return;
1516 }
1517
1518 uint16_t qtype, qclass;
1519 unsigned int consumed = 0;
1520 DNSName qname(query, len, sizeof(dnsheader), false, &qtype, &qclass, &consumed);
1521 DNSQuestion dq(&qname, qtype, qclass, consumed, dest.sin4.sin_family != 0 ? &dest : &cs.local, &remote, dh, queryBufferSize, len, false, &queryRealTime);
1522 dq.dnsCryptQuery = std::move(dnsCryptQuery);
3e425868
RG
1523 std::shared_ptr<DownstreamState> ss{nullptr};
1524 auto result = processQuery(dq, cs, holders, ss);
4ab01344 1525
3e425868
RG
1526 if (result == ProcessQueryResult::Drop) {
1527 return;
1528 }
1529
1530 if (result == ProcessQueryResult::SendAnswer) {
4ab01344 1531#if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE)
3e425868
RG
1532 if (dq.delayMsec == 0 && responsesVect != nullptr) {
1533 queueResponse(cs, reinterpret_cast<char*>(dq.dh), dq.len, *dq.local, *dq.remote, responsesVect[*queuedResponses], respIOV, respCBuf);
4ab01344
RG
1534 (*queuedResponses)++;
1535 return;
1536 }
1537#endif /* defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) */
6d192f51
RG
1538 /* we use dest, always, because we don't want to use the listening address to send a response since it could be 0.0.0.0 */
1539 sendUDPResponse(cs.udpFD, reinterpret_cast<char*>(dq.dh), dq.len, dq.delayMsec, dest, *dq.remote);
3e425868
RG
1540 return;
1541 }
4ab01344 1542
3e425868 1543 if (result != ProcessQueryResult::PassToBackend || ss == nullptr) {
4ab01344
RG
1544 return;
1545 }
11e1e08b 1546
0beaa5c8
RG
1547 unsigned int idOffset = (ss->idOffset++) % ss->idStates.size();
1548 IDState* ids = &ss->idStates[idOffset];
1549 ids->age = 0;
c9ba8478 1550
71b86bd8
RG
1551 int oldFD = ids->origFD.exchange(cs.udpFD);
1552 if(oldFD < 0) {
1553 // if we are reusing, no change in outstanding
0beaa5c8 1554 ss->outstanding++;
71b86bd8 1555 }
0beaa5c8
RG
1556 else {
1557 ss->reuseds++;
cb167afd 1558 ++g_stats.downstreamTimeouts;
0beaa5c8 1559 }
0e41337b 1560
0beaa5c8 1561 ids->cs = &cs;
0beaa5c8 1562 ids->origID = dh->id;
d0ae6360 1563 setIDStateFromDNSQuestion(*ids, dq, std::move(qname));
0beaa5c8
RG
1564
1565 /* If we couldn't harvest the real dest addr, still
1566 write down the listening addr since it will be useful
1567 (especially if it's not an 'any' one).
1568 We need to keep track of which one it is since we may
1569 want to use the real but not the listening addr to reply.
1570 */
1571 if (dest.sin4.sin_family != 0) {
1572 ids->origDest = dest;
1573 ids->destHarvested = true;
1574 }
1575 else {
1576 ids->origDest = cs.local;
1577 ids->destHarvested = false;
1578 }
7129b5c4 1579
0beaa5c8 1580 dh->id = idOffset;
ca404e94 1581
38069e7e 1582 int fd = pickBackendSocketForSending(ss);
150105a2 1583 ssize_t ret = udpClientSendRequestToBackend(ss, fd, query, dq.len);
11e1e08b 1584
0beaa5c8
RG
1585 if(ret < 0) {
1586 ss->sendErrors++;
cb167afd 1587 ++g_stats.downstreamSendErrors;
0beaa5c8 1588 }
ca404e94 1589
0beaa5c8
RG
1590 vinfolog("Got query for %s|%s from %s, relayed to %s", ids->qname.toString(), QType(ids->qtype).getName(), remote.toStringWithPort(), ss->getName());
1591 }
1592 catch(const std::exception& e){
1593 vinfolog("Got an error in UDP question thread while parsing a query from %s, id %d: %s", remote.toStringWithPort(), queryId, e.what());
1594 }
1595}
1596
1597#if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE)
1598static void MultipleMessagesUDPClientThread(ClientState* cs, LocalHolders& holders)
1599{
1600 struct MMReceiver
1601 {
1602 char packet[4096];
1603 /* used by HarvestDestinationAddress */
1604 char cbuf[256];
1605 ComboAddress remote;
1606 ComboAddress dest;
1607 struct iovec iov;
1608 };
1609 const size_t vectSize = g_udpVectorSize;
1610 /* the actual buffer is larger because:
1611 - we may have to add EDNS and/or ECS
1612 - we use it for self-generated responses (from rule or cache)
1613 but we only accept incoming payloads up to that size
1614 */
1615 static_assert(s_udpIncomingBufferSize <= sizeof(MMReceiver::packet), "the incoming buffer size should not be larger than sizeof(MMReceiver::packet)");
1616
1617 auto recvData = std::unique_ptr<MMReceiver[]>(new MMReceiver[vectSize]);
1618 auto msgVec = std::unique_ptr<struct mmsghdr[]>(new struct mmsghdr[vectSize]);
1619 auto outMsgVec = std::unique_ptr<struct mmsghdr[]>(new struct mmsghdr[vectSize]);
1620
1621 /* initialize the structures needed to receive our messages */
1622 for (size_t idx = 0; idx < vectSize; idx++) {
1623 recvData[idx].remote.sin4.sin_family = cs->local.sin4.sin_family;
1624 fillMSGHdr(&msgVec[idx].msg_hdr, &recvData[idx].iov, recvData[idx].cbuf, sizeof(recvData[idx].cbuf), recvData[idx].packet, s_udpIncomingBufferSize, &recvData[idx].remote);
1625 }
1626
1627 /* go now */
1628 for(;;) {
1629
1630 /* reset the IO vector, since it's also used to send the vector of responses
1631 to avoid having to copy the data around */
1632 for (size_t idx = 0; idx < vectSize; idx++) {
1633 recvData[idx].iov.iov_base = recvData[idx].packet;
1634 recvData[idx].iov.iov_len = sizeof(recvData[idx].packet);
5f504638 1635 }
0beaa5c8
RG
1636
1637 /* block until we have at least one message ready, but return
1638 as many as possible to save the syscall costs */
1639 int msgsGot = recvmmsg(cs->udpFD, msgVec.get(), vectSize, MSG_WAITFORONE | MSG_TRUNC, nullptr);
1640
1641 if (msgsGot <= 0) {
1642 vinfolog("Getting UDP messages via recvmmsg() failed with: %s", strerror(errno));
1643 continue;
773470ca 1644 }
0beaa5c8
RG
1645
1646 unsigned int msgsToSend = 0;
1647
1648 /* process the received messages */
1649 for (int msgIdx = 0; msgIdx < msgsGot; msgIdx++) {
1650 const struct msghdr* msgh = &msgVec[msgIdx].msg_hdr;
1651 unsigned int got = msgVec[msgIdx].msg_len;
1652 const ComboAddress& remote = recvData[msgIdx].remote;
1653
3e425868
RG
1654 if (got < 0 || static_cast<size_t>(got) < sizeof(struct dnsheader)) {
1655 ++g_stats.nonCompliantQueries;
0beaa5c8
RG
1656 continue;
1657 }
1658
1659 processUDPQuery(*cs, holders, msgh, remote, recvData[msgIdx].dest, recvData[msgIdx].packet, static_cast<uint16_t>(got), sizeof(recvData[msgIdx].packet), outMsgVec.get(), &msgsToSend, &recvData[msgIdx].iov, recvData[msgIdx].cbuf);
1660
2b3eefc3 1661 }
0beaa5c8
RG
1662
1663 /* immediate (not delayed or sent to a backend) responses (mostly from a rule, dynamic block
1664 or the cache) can be sent in batch too */
1665
1666 if (msgsToSend > 0 && msgsToSend <= static_cast<unsigned int>(msgsGot)) {
1667 int sent = sendmmsg(cs->udpFD, outMsgVec.get(), msgsToSend, 0);
1668
2b3eefc3 1669 if (sent < 0 || static_cast<unsigned int>(sent) != msgsToSend) {
0beaa5c8 1670 vinfolog("Error sending responses with sendmmsg() (%d on %u): %s", sent, msgsToSend, strerror(errno));
2b3eefc3 1671 }
2b3eefc3 1672 }
0beaa5c8 1673
24d5cb00 1674 }
0beaa5c8
RG
1675}
1676#endif /* defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) */
1677
1678// listens to incoming queries, sends out to downstream servers, noting the intended return path
9b73b71c 1679static void udpClientThread(ClientState* cs)
0beaa5c8
RG
1680try
1681{
519f5484 1682 setThreadName("dnsdist/udpClie");
0beaa5c8
RG
1683 LocalHolders holders;
1684
1685#if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE)
1686 if (g_udpVectorSize > 1) {
1687 MultipleMessagesUDPClientThread(cs, holders);
1688
1689 }
1690 else
1691#endif /* defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) */
1692 {
1693 char packet[4096];
1694 /* the actual buffer is larger because:
1695 - we may have to add EDNS and/or ECS
1696 - we use it for self-generated responses (from rule or cache)
1697 but we only accept incoming payloads up to that size
1698 */
1699 static_assert(s_udpIncomingBufferSize <= sizeof(packet), "the incoming buffer size should not be larger than sizeof(MMReceiver::packet)");
1700 struct msghdr msgh;
1701 struct iovec iov;
1702 /* used by HarvestDestinationAddress */
1703 char cbuf[256];
1704
1705 ComboAddress remote;
1706 ComboAddress dest;
1707 remote.sin4.sin_family = cs->local.sin4.sin_family;
1708 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), packet, sizeof(packet), &remote);
1709
1710 for(;;) {
1711 ssize_t got = recvmsg(cs->udpFD, &msgh, 0);
1712
1713 if (got < 0 || static_cast<size_t>(got) < sizeof(struct dnsheader)) {
cb167afd 1714 ++g_stats.nonCompliantQueries;
0beaa5c8
RG
1715 continue;
1716 }
1717
1718 processUDPQuery(*cs, holders, &msgh, remote, dest, packet, static_cast<uint16_t>(got), s_udpIncomingBufferSize, nullptr, nullptr, nullptr, nullptr);
1719 }
1720 }
24d5cb00 1721}
2b3eefc3 1722catch(const std::exception &e)
a4652d55 1723{
1724 errlog("UDP client thread died because of exception: %s", e.what());
a4652d55 1725}
2b3eefc3 1726catch(const PDNSException &e)
a4652d55 1727{
1728 errlog("UDP client thread died because of PowerDNS exception: %s", e.reason);
a4652d55 1729}
1730catch(...)
1731{
1732 errlog("UDP client thread died because of an exception: %s", "unknown");
a4652d55 1733}
24d5cb00 1734
555970c9
RG
1735uint16_t getRandomDNSID()
1736{
1737#ifdef HAVE_LIBSODIUM
1738 return (randombytes_random() % 65536);
1739#else
1740 return (random() % 65536);
1741#endif
1742}
1743
4ab01344 1744static bool upCheck(const shared_ptr<DownstreamState>& ds)
773470ca 1745try
1746{
4ab01344
RG
1747 DNSName checkName = ds->checkName;
1748 uint16_t checkType = ds->checkType.getCode();
1749 uint16_t checkClass = ds->checkClass;
98650fde
RG
1750 dnsheader checkHeader;
1751 memset(&checkHeader, 0, sizeof(checkHeader));
1752
1753 checkHeader.qdcount = htons(1);
555970c9 1754 checkHeader.id = getRandomDNSID();
98650fde
RG
1755
1756 checkHeader.rd = true;
4ab01344 1757 if (ds->setCD) {
98650fde
RG
1758 checkHeader.cd = true;
1759 }
1760
4ab01344 1761 if (ds->checkFunction) {
98650fde 1762 std::lock_guard<std::mutex> lock(g_luamutex);
4ab01344 1763 auto ret = ds->checkFunction(checkName, checkType, checkClass, &checkHeader);
98650fde
RG
1764 checkName = std::get<0>(ret);
1765 checkType = std::get<1>(ret);
1766 checkClass = std::get<2>(ret);
21830638 1767 }
773470ca 1768
98650fde
RG
1769 vector<uint8_t> packet;
1770 DNSPacketWriter dpw(packet, checkName, checkType, checkClass);
1771 dnsheader * requestHeader = dpw.getHeader();
1772 *requestHeader = checkHeader;
1773
4ab01344 1774 Socket sock(ds->remote.sin4.sin_family, SOCK_DGRAM);
773470ca 1775 sock.setNonBlocking();
4ab01344 1776 if (!IsAnyAddress(ds->sourceAddr)) {
fbe2a2e0 1777 sock.setReuseAddr();
4ab01344 1778 sock.bind(ds->sourceAddr);
fbe2a2e0 1779 }
4ab01344 1780 sock.connect(ds->remote);
3e425868 1781 ssize_t sent = udpClientSendRequestToBackend(ds, sock.getHandle(), reinterpret_cast<char*>(&packet[0]), packet.size(), true);
9e87dcb8
RG
1782 if (sent < 0) {
1783 int ret = errno;
1784 if (g_verboseHealthChecks)
4ab01344 1785 infolog("Error while sending a health check query to backend %s: %d", ds->getNameWithAddr(), ret);
fbe2a2e0 1786 return false;
9e87dcb8 1787 }
fbe2a2e0 1788
4ab01344 1789 int ret = waitForRWData(sock.getHandle(), true, /* ms to seconds */ ds->checkTimeout / 1000, /* remaining ms to us */ (ds->checkTimeout % 1000) * 1000);
9e87dcb8
RG
1790 if(ret < 0 || !ret) { // error, timeout, both are down!
1791 if (ret < 0) {
1792 ret = errno;
1793 if (g_verboseHealthChecks)
4ab01344 1794 infolog("Error while waiting for the health check response from backend %s: %d", ds->getNameWithAddr(), ret);
9e87dcb8
RG
1795 }
1796 else {
1797 if (g_verboseHealthChecks)
4ab01344 1798 infolog("Timeout while waiting for the health check response from backend %s", ds->getNameWithAddr());
9e87dcb8 1799 }
773470ca 1800 return false;
9e87dcb8
RG
1801 }
1802
773470ca 1803 string reply;
a2353842
RG
1804 ComboAddress from;
1805 sock.recvFrom(reply, from);
1806
1807 /* we are using a connected socket but hey.. */
4ab01344 1808 if (from != ds->remote) {
a2353842 1809 if (g_verboseHealthChecks)
4ab01344 1810 infolog("Invalid health check response received from %s, expecting one from %s", from.toStringWithPort(), ds->remote.toStringWithPort());
a2353842
RG
1811 return false;
1812 }
773470ca 1813
98650fde 1814 const dnsheader * responseHeader = reinterpret_cast<const dnsheader *>(reply.c_str());
fc01e0b1 1815
9e87dcb8
RG
1816 if (reply.size() < sizeof(*responseHeader)) {
1817 if (g_verboseHealthChecks)
4ab01344 1818 infolog("Invalid health check response of size %d from backend %s, expecting at least %d", reply.size(), ds->getNameWithAddr(), sizeof(*responseHeader));
fc01e0b1 1819 return false;
9e87dcb8 1820 }
fc01e0b1 1821
9e87dcb8
RG
1822 if (responseHeader->id != requestHeader->id) {
1823 if (g_verboseHealthChecks)
4ab01344 1824 infolog("Invalid health check response id %d from backend %s, expecting %d", responseHeader->id, ds->getNameWithAddr(), requestHeader->id);
fc01e0b1 1825 return false;
9e87dcb8
RG
1826 }
1827
1828 if (!responseHeader->qr) {
1829 if (g_verboseHealthChecks)
4ab01344 1830 infolog("Invalid health check response from backend %s, expecting QR to be set", ds->getNameWithAddr());
fc01e0b1 1831 return false;
9e87dcb8
RG
1832 }
1833
1834 if (responseHeader->rcode == RCode::ServFail) {
1835 if (g_verboseHealthChecks)
4ab01344 1836 infolog("Backend %s responded to health check with ServFail", ds->getNameWithAddr());
fc01e0b1 1837 return false;
9e87dcb8
RG
1838 }
1839
4ab01344 1840 if (ds->mustResolve && (responseHeader->rcode == RCode::NXDomain || responseHeader->rcode == RCode::Refused)) {
9e87dcb8 1841 if (g_verboseHealthChecks)
4ab01344 1842 infolog("Backend %s responded to health check with %s while mustResolve is set", ds->getNameWithAddr(), responseHeader->rcode == RCode::NXDomain ? "NXDomain" : "Refused");
a6e02424 1843 return false;
9e87dcb8 1844 }
fc01e0b1 1845
98650fde
RG
1846 uint16_t receivedType;
1847 uint16_t receivedClass;
1848 DNSName receivedName(reply.c_str(), reply.size(), sizeof(dnsheader), false, &receivedType, &receivedClass);
1849
1850 if (receivedName != checkName || receivedType != checkType || receivedClass != checkClass) {
1851 if (g_verboseHealthChecks)
4ab01344 1852 infolog("Backend %s responded to health check with an invalid qname (%s vs %s), qtype (%s vs %s) or qclass (%d vs %d)", ds->getNameWithAddr(), receivedName.toLogString(), checkName.toLogString(), QType(receivedType).getName(), QType(checkType).getName(), receivedClass, checkClass);
98650fde
RG
1853 return false;
1854 }
1855
773470ca 1856 return true;
1857}
9e87dcb8
RG
1858catch(const std::exception& e)
1859{
1860 if (g_verboseHealthChecks)
4ab01344 1861 infolog("Error checking the health of backend %s: %s", ds->getNameWithAddr(), e.what());
9e87dcb8
RG
1862 return false;
1863}
773470ca 1864catch(...)
1865{
9e87dcb8 1866 if (g_verboseHealthChecks)
4ab01344 1867 infolog("Unknown exception while checking the health of backend %s", ds->getNameWithAddr());
773470ca 1868 return false;
1869}
726ddf60 1870
6c1ca990 1871uint64_t g_maxTCPClientThreads{10};
886e2cf2 1872std::atomic<uint16_t> g_cacheCleaningDelay{60};
f65ea0c2 1873std::atomic<uint16_t> g_cacheCleaningPercentage{100};
e41f8165 1874
9b73b71c 1875void maintThread()
886e2cf2 1876{
519f5484 1877 setThreadName("dnsdist/main");
886e2cf2
RG
1878 int interval = 1;
1879 size_t counter = 0;
5f3ea719 1880 int32_t secondsToWaitLog = 0;
886e2cf2
RG
1881
1882 for(;;) {
1883 sleep(interval);
1884
069e59db 1885 {
1886 std::lock_guard<std::mutex> lock(g_luamutex);
5f3ea719
PL
1887 auto f = g_lua.readVariable<boost::optional<std::function<void()> > >("maintenance");
1888 if(f) {
1889 try {
1890 (*f)();
1891 secondsToWaitLog = 0;
1892 }
1893 catch(std::exception &e) {
1894 if (secondsToWaitLog <= 0) {
1895 infolog("Error during execution of maintenance function: %s", e.what());
1896 secondsToWaitLog = 61;
1897 }
1898 secondsToWaitLog -= interval;
1899 }
1900 }
069e59db 1901 }
886e2cf2
RG
1902
1903 counter++;
1904 if (counter >= g_cacheCleaningDelay) {
c1b81381
RG
1905 /* keep track, for each cache, of whether we should keep
1906 expired entries */
1907 std::map<std::shared_ptr<DNSDistPacketCache>, bool> caches;
1908
1909 /* gather all caches actually used by at least one pool, and see
1910 if something prevents us from cleaning the expired entries */
a9c2e4ab 1911 auto localPools = g_pools.getLocal();
a9c2e4ab 1912 for (const auto& entry : *localPools) {
c1b81381
RG
1913 auto& pool = entry.second;
1914
1915 auto packetCache = pool->packetCache;
1916 if (!packetCache) {
1917 continue;
1918 }
1919
1920 auto pair = caches.insert({packetCache, false});
1921 auto& iter = pair.first;
1922 /* if we need to keep stale data for this cache (ie, not clear
1923 expired entries when at least one pool using this cache
1924 has all its backends down) */
1925 if (packetCache->keepStaleData() && iter->second == false) {
1926 /* so far all pools had at least one backend up */
1927 if (pool->countServers(true) == 0) {
1928 iter->second = true;
1929 }
886e2cf2
RG
1930 }
1931 }
c1b81381
RG
1932
1933 for (auto pair : caches) {
1934 /* shall we keep expired entries ? */
1935 if (pair.second == true) {
1936 continue;
886e2cf2 1937 }
c1b81381
RG
1938 auto& packetCache = pair.first;
1939 size_t upTo = (packetCache->getMaxEntries()* (100 - g_cacheCleaningPercentage)) / 100;
1940 packetCache->purgeExpired(upTo);
886e2cf2
RG
1941 }
1942 counter = 0;
1943 }
1944
1945 // ponder pruning g_dynblocks of expired entries here
1946 }
886e2cf2
RG
1947}
1948
9b73b71c 1949static void secPollThread()
5d4e1ef8
RG
1950{
1951 setThreadName("dnsdist/secpoll");
1952
1953 for (;;) {
1954 try {
1955 doSecPoll(g_secPollSuffix);
1956 }
1957 catch(...) {
1958 }
1959 sleep(g_secPollInterval);
1960 }
1961}
1962
9b73b71c 1963static void healthChecksThread()
3ae86514 1964{
519f5484 1965 setThreadName("dnsdist/healthC");
77c9bc9a 1966
fcadd56e 1967 int interval = 1;
64e4ebb4 1968
3ae86514 1969 for(;;) {
1970 sleep(interval);
773470ca 1971
ded1985a 1972 if(g_tcpclientthreads->getQueuedCount() > 1 && !g_tcpclientthreads->hasReachedMaxThreads())
a9bf3ec4 1973 g_tcpclientthreads->addTCPClientThread();
3ae86514 1974
a9c2e4ab
RG
1975 auto states = g_dstates.getLocal(); // this points to the actual shared_ptrs!
1976 for(auto& dss : *states) {
7c9bf18d 1977 if(++dss->lastCheck < dss->checkInterval)
1978 continue;
1979 dss->lastCheck = 0;
773470ca 1980 if(dss->availability==DownstreamState::Availability::Auto) {
4ab01344 1981 bool newState=upCheck(dss);
2993d58d 1982 if (newState) {
1b633bec
RG
1983 /* check succeeded */
1984 dss->currentCheckFailures = 0;
1985
1986 if (!dss->upStatus) {
1987 /* we were marked as down */
853faf61
JS
1988 dss->consecutiveSuccessfulChecks++;
1989 if (dss->consecutiveSuccessfulChecks < dss->minRiseSuccesses) {
1b633bec
RG
1990 /* if we need more than one successful check to rise
1991 and we didn't reach the threshold yet,
1992 let's stay down */
1993 newState = false;
1994 }
2993d58d 1995 }
1996 }
1b633bec
RG
1997 else {
1998 /* check failed */
853faf61 1999 dss->consecutiveSuccessfulChecks = 0;
1b633bec
RG
2000
2001 if (dss->upStatus) {
2002 /* we are currently up */
2003 dss->currentCheckFailures++;
2004 if (dss->currentCheckFailures < dss->maxCheckFailures) {
2005 /* we need more than one failure to be marked as down,
2006 and we did not reach the threshold yet, let's stay down */
2007 newState = true;
2008 }
2993d58d 2009 }
2010 }
2011
2012 if(newState != dss->upStatus) {
2013 warnlog("Marking downstream %s as '%s'", dss->getNameWithAddr(), newState ? "up" : "down");
7565f4e6
RG
2014
2015 if (newState && !dss->connected) {
5d7e6765
RG
2016 newState = dss->reconnect();
2017
2018 if (dss->connected && !dss->threadStarted.test_and_set()) {
150105a2 2019 dss->tid = thread(responderThread, dss);
7565f4e6
RG
2020 }
2021 }
2022
2993d58d 2023 dss->upStatus = newState;
2024 dss->currentCheckFailures = 0;
853faf61 2025 dss->consecutiveSuccessfulChecks = 0;
9f4eb5cc
RG
2026 if (g_snmpAgent && g_snmpTrapsEnabled) {
2027 g_snmpAgent->sendBackendStatusChangeTrap(dss);
2028 }
2993d58d 2029 }
773470ca 2030 }
b076b34a 2031
773470ca 2032 auto delta = dss->sw.udiffAndSet()/1000000.0;
2033 dss->queryLoad = 1.0*(dss->queries.load() - dss->prev.queries.load())/delta;
2034 dss->dropRate = 1.0*(dss->reuseds.load() - dss->prev.reuseds.load())/delta;
3c115e0f 2035 dss->prev.queries.store(dss->queries.load());
773470ca 2036 dss->prev.reuseds.store(dss->reuseds.load());
3c115e0f 2037
773470ca 2038 for(IDState& ids : dss->idStates) { // timeouts
71b86bd8
RG
2039 int origFD = ids.origFD;
2040 if(origFD >=0 && ids.age++ > g_udpTimeout) {
51642fe3
RG
2041 /* We set origFD to -1 as soon as possible
2042 to limit the risk of racing with the
2043 responder thread.
2044 The UDP client thread only checks origFD to
2045 know whether outstanding has to be incremented,
2046 so the sooner the better any way since we _will_
2047 decrement it.
2048 */
71b86bd8
RG
2049 if (ids.origFD.exchange(-1) != origFD) {
2050 /* this state has been altered in the meantime,
2051 don't go anywhere near it */
2052 continue;
2053 }
64e4ebb4 2054 ids.age = 0;
3c115e0f 2055 dss->reuseds++;
2056 --dss->outstanding;
cb167afd 2057 ++g_stats.downstreamTimeouts; // this is an 'actively' discovered timeout
c08a5092 2058 vinfolog("Had a downstream timeout from %s (%s) for query for %s|%s from %s",
2059 dss->remote.toStringWithPort(), dss->name,
2060 ids.qname.toString(), QType(ids.qtype).getName(), ids.origRemote.toStringWithPort());
51642fe3 2061
c2fbeb27 2062 struct timespec ts;
85c7ca75 2063 gettime(&ts);
c2fbeb27 2064
2d11d1b2 2065 struct dnsheader fake;
2066 memset(&fake, 0, sizeof(fake));
2067 fake.id = ids.origID;
c2fbeb27 2068
6d31c8b6 2069 g_rings.insertResponse(ts, ids.origRemote, ids.qname, ids.qtype, std::numeric_limits<unsigned int>::max(), 0, fake, dss->remote);
232f0877 2070 }
3ae86514 2071 }
2072 }
3ae86514 2073 }
3ae86514 2074}
2075
c2a42f97 2076static void bindAny(int af, int sock)
2077{
18f8e493 2078 __attribute__((unused)) int one = 1;
c2a42f97 2079
2080#ifdef IP_FREEBIND
2081 if (setsockopt(sock, IPPROTO_IP, IP_FREEBIND, &one, sizeof(one)) < 0)
2082 warnlog("Warning: IP_FREEBIND setsockopt failed: %s", strerror(errno));
2083#endif
2084
2085#ifdef IP_BINDANY
2086 if (af == AF_INET)
2087 if (setsockopt(sock, IPPROTO_IP, IP_BINDANY, &one, sizeof(one)) < 0)
2088 warnlog("Warning: IP_BINDANY setsockopt failed: %s", strerror(errno));
2089#endif
2090#ifdef IPV6_BINDANY
2091 if (af == AF_INET6)
2092 if (setsockopt(sock, IPPROTO_IPV6, IPV6_BINDANY, &one, sizeof(one)) < 0)
2093 warnlog("Warning: IPV6_BINDANY setsockopt failed: %s", strerror(errno));
2094#endif
2095#ifdef SO_BINDANY
2096 if (setsockopt(sock, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) < 0)
2097 warnlog("Warning: SO_BINDANY setsockopt failed: %s", strerror(errno));
2098#endif
2099}
2100
a36ce055
RG
2101static void dropGroupPrivs(gid_t gid)
2102{
2103 if (gid) {
2104 if (setgid(gid) == 0) {
2105 if (setgroups(0, NULL) < 0) {
2106 warnlog("Warning: Unable to drop supplementary gids: %s", strerror(errno));
2107 }
2108 }
2109 else {
2110 warnlog("Warning: Unable to set group ID to %d: %s", gid, strerror(errno));
2111 }
2112 }
2113}
2114
2115static void dropUserPrivs(uid_t uid)
2116{
2117 if(uid) {
2118 if(setuid(uid) < 0) {
2119 warnlog("Warning: Unable to set user ID to %d: %s", uid, strerror(errno));
2120 }
2121 }
2122}
2123
41408d3a
RG
2124static void checkFileDescriptorsLimits(size_t udpBindsCount, size_t tcpBindsCount)
2125{
2126 /* stdin, stdout, stderr */
2127 size_t requiredFDsCount = 3;
a9c2e4ab 2128 auto backends = g_dstates.getLocal();
cd73ceeb
RG
2129 /* UDP sockets to backends */
2130 size_t backendUDPSocketsCount = 0;
a9c2e4ab 2131 for (const auto& backend : *backends) {
5bdbb83d 2132 backendUDPSocketsCount += backend->sockets.size();
cd73ceeb
RG
2133 }
2134 requiredFDsCount += backendUDPSocketsCount;
2135 /* TCP sockets to backends */
a9c2e4ab 2136 requiredFDsCount += (backends->size() * g_maxTCPClientThreads);
9fcd6adb 2137 /* listening sockets */
41408d3a
RG
2138 requiredFDsCount += udpBindsCount;
2139 requiredFDsCount += tcpBindsCount;
2140 /* max TCP connections currently served */
2141 requiredFDsCount += g_maxTCPClientThreads;
f2e29d04 2142 /* max pipes for communicating between TCP acceptors and client threads */
41408d3a 2143 requiredFDsCount += (g_maxTCPClientThreads * 2);
41408d3a 2144 /* max TCP queued connections */
9fcd6adb 2145 requiredFDsCount += g_maxTCPQueuedConnections;
41408d3a
RG
2146 /* DelayPipe pipe */
2147 requiredFDsCount += 2;
2148 /* syslog socket */
2149 requiredFDsCount++;
2150 /* webserver main socket */
2151 requiredFDsCount++;
2152 /* console main socket */
2153 requiredFDsCount++;
2154 /* carbon export */
2155 requiredFDsCount++;
2156 /* history file */
2157 requiredFDsCount++;
2158 struct rlimit rl;
2159 getrlimit(RLIMIT_NOFILE, &rl);
9fcd6adb 2160 if (rl.rlim_cur <= requiredFDsCount) {
41408d3a
RG
2161 warnlog("Warning, this configuration can use more than %d file descriptors, web server and console connections not included, and the current limit is %d.", std::to_string(requiredFDsCount), std::to_string(rl.rlim_cur));
2162#ifdef HAVE_SYSTEMD
2163 warnlog("You can increase this value by using LimitNOFILE= in the systemd unit file or ulimit.");
2164#else
2165 warnlog("You can increase this value by using ulimit.");
2166#endif
2167 }
2168}
c2a42f97 2169
6e9fd124
RG
2170static void setUpLocalBind(std::unique_ptr<ClientState>& cs)
2171{
2172 /* skip some warnings if there is an identical UDP context */
2173 bool warn = cs->tcp == false || cs->tlsFrontend != nullptr;
2174 int& fd = cs->tcp == false ? cs->udpFD : cs->tcpFD;
2175 (void) warn;
2176
2177 fd = SSocket(cs->local.sin4.sin_family, cs->tcp == false ? SOCK_DGRAM : SOCK_STREAM, 0);
2178
2179 if (cs->tcp) {
2180 SSetsockopt(fd, SOL_SOCKET, SO_REUSEADDR, 1);
2181#ifdef TCP_DEFER_ACCEPT
2182 SSetsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, 1);
2183#endif
2184 if (cs->fastOpenQueueSize > 0) {
2185#ifdef TCP_FASTOPEN
2186 SSetsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, cs->fastOpenQueueSize);
2187#else
2188 if (warn) {
2189 warnlog("TCP Fast Open has been configured on local address '%s' but is not supported", cs->local.toStringWithPort());
2190 }
2191#endif
2192 }
2193 }
2194
2195 if(cs->local.sin4.sin_family == AF_INET6) {
2196 SSetsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, 1);
2197 }
2198
2199 bindAny(cs->local.sin4.sin_family, fd);
2200
2201 if(!cs->tcp && IsAnyAddress(cs->local)) {
2202 int one=1;
2203 setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one)); // linux supports this, so why not - might fail on other systems
2204#ifdef IPV6_RECVPKTINFO
2205 setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one));
2206#endif
2207 }
2208
2209 if (cs->reuseport) {
2210#ifdef SO_REUSEPORT
2211 SSetsockopt(fd, SOL_SOCKET, SO_REUSEPORT, 1);
2212#else
2213 if (warn) {
2214 /* no need to warn again if configured but support is not available, we already did for UDP */
2215 warnlog("SO_REUSEPORT has been configured on local address '%s' but is not supported", cs->local.toStringWithPort());
2216 }
2217#endif
2218 }
2219
2220 const std::string& itf = cs->interface;
2221 if (!itf.empty()) {
2222#ifdef SO_BINDTODEVICE
2223 int res = setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, itf.c_str(), itf.length());
2224 if (res != 0) {
2225 warnlog("Error setting up the interface on local address '%s': %s", cs->local.toStringWithPort(), strerror(errno));
2226 }
2227#else
2228 if (warn) {
2229 warnlog("An interface has been configured on local address '%s' but SO_BINDTODEVICE is not supported", cs->local.toStringWithPort());
2230 }
2231#endif
2232 }
2233
2234#ifdef HAVE_EBPF
2235 if (g_defaultBPFFilter) {
2236 cs->attachFilter(g_defaultBPFFilter);
2237 vinfolog("Attaching default BPF Filter to %s frontend %s", (!cs->tcp ? "UDP" : "TCP"), cs->local.toStringWithPort());
2238 }
2239#endif /* HAVE_EBPF */
2240
2241 if (cs->tlsFrontend != nullptr) {
2242 if (!cs->tlsFrontend->setupTLS()) {
2243 errlog("Error while setting up TLS on local address '%s', exiting", cs->local.toStringWithPort());
2244 _exit(EXIT_FAILURE);
2245 }
2246 }
2247
2248 SBind(fd, cs->local);
2249
2250 if (cs->tcp) {
2251 SListen(cs->tcpFD, 64);
2252 if (cs->tlsFrontend != nullptr) {
2253 warnlog("Listening on %s for TLS", cs->local.toStringWithPort());
2254 }
2255 else if (cs->dnscryptCtx != nullptr) {
2256 warnlog("Listening on %s for DNSCrypt", cs->local.toStringWithPort());
2257 }
2258 else {
2259 warnlog("Listening on %s", cs->local.toStringWithPort());
2260 }
2261 }
2262
2263 cs->ready = true;
2264}
2265
7cc68f53 2266struct
2267{
2268 vector<string> locals;
2269 vector<string> remotes;
5efcfa63 2270 bool checkConfig{false};
7cc68f53 2271 bool beClient{false};
505ca3d1 2272 bool beSupervised{false};
7cc68f53 2273 string command;
2274 string config;
a36ce055
RG
2275 string uid;
2276 string gid;
7cc68f53 2277} g_cmdLine;
520eb5a0 2278
e41f8165 2279std::atomic<bool> g_configurationDone{false};
520eb5a0 2280
7d3ee2bb
PL
2281static void usage()
2282{
2283 cout<<endl;
b82a127e
RG
2284 cout<<"Syntax: dnsdist [-C,--config file] [-c,--client [IP[:PORT]]]\n";
2285 cout<<"[-e,--execute cmd] [-h,--help] [-l,--local addr]\n";
4406b79b 2286 cout<<"[-v,--verbose] [--check-config] [--version]\n";
7d3ee2bb
PL
2287 cout<<"\n";
2288 cout<<"-a,--acl netmask Add this netmask to the ACL\n";
2289 cout<<"-C,--config file Load configuration from 'file'\n";
2290 cout<<"-c,--client Operate as a client, connect to dnsdist. This reads\n";
2291 cout<<" controlSocket from your configuration file, but also\n";
2292 cout<<" accepts an IP:PORT argument\n";
2293#ifdef HAVE_LIBSODIUM
2294 cout<<"-k,--setkey KEY Use KEY for encrypted communication to dnsdist. This\n";
2295 cout<<" is similar to setting setKey in the configuration file.\n";
17a0ddad
CH
2296 cout<<" NOTE: this will leak this key in your shell's history\n";
2297 cout<<" and in the systems running process list.\n";
7d3ee2bb
PL
2298#endif
2299 cout<<"--check-config Validate the configuration file and exit. The exit-code\n";
2300 cout<<" reflects the validation, 0 is OK, 1 means an error.\n";
2301 cout<<" Any errors are printed as well.\n";
7d3ee2bb
PL
2302 cout<<"-e,--execute cmd Connect to dnsdist and execute 'cmd'\n";
2303 cout<<"-g,--gid gid Change the process group ID after binding sockets\n";
2304 cout<<"-h,--help Display this helpful message\n";
2305 cout<<"-l,--local address Listen on this local address\n";
2306 cout<<"--supervised Don't open a console, I'm supervised\n";
2307 cout<<" (use with e.g. systemd and daemontools)\n";
2308 cout<<"--disable-syslog Don't log to syslog, only to stdout\n";
2309 cout<<" (use with e.g. systemd)\n";
7d3ee2bb
PL
2310 cout<<"-u,--uid uid Change the process user ID after binding sockets\n";
2311 cout<<"-v,--verbose Enable verbose mode\n";
4406b79b 2312 cout<<"-V,--version Show dnsdist version information and exit\n";
7d3ee2bb
PL
2313}
2314
24d5cb00 2315int main(int argc, char** argv)
2316try
2317{
41408d3a
RG
2318 size_t udpBindsCount = 0;
2319 size_t tcpBindsCount = 0;
94721140 2320 rl_attempted_completion_function = my_completion;
2321 rl_completion_append_character = 0;
2322
726ddf60 2323 signal(SIGPIPE, SIG_IGN);
6d01c80c 2324 signal(SIGCHLD, SIG_IGN);
0ca6a67f 2325 openlog("dnsdist", LOG_PID|LOG_NDELAY, LOG_DAEMON);
6d01c80c 2326
0b62ec78 2327#ifdef HAVE_LIBSODIUM
6d01c80c 2328 if (sodium_init() == -1) {
2329 cerr<<"Unable to initialize crypto library"<<endl;
2330 exit(EXIT_FAILURE);
2331 }
7691e7df 2332 g_hashperturb=randombytes_uniform(0xffffffff);
2333 srandom(randombytes_uniform(0xffffffff));
2334#else
2335 {
2336 struct timeval tv;
2337 gettimeofday(&tv, 0);
2338 srandom(tv.tv_sec ^ tv.tv_usec ^ getpid());
2339 g_hashperturb=random();
2340 }
2341
0b62ec78 2342#endif
094b6aff 2343 ComboAddress clientAddress = ComboAddress();
11058bd6 2344 g_cmdLine.config=SYSCONFDIR "/dnsdist.conf";
359bdba5 2345 struct option longopts[]={
8f2d5ec3 2346 {"acl", required_argument, 0, 'a'},
359bdba5
CH
2347 {"check-config", no_argument, 0, 1},
2348 {"client", no_argument, 0, 'c'},
7cc68f53 2349 {"config", required_argument, 0, 'C'},
359bdba5 2350 {"disable-syslog", no_argument, 0, 2},
7cc68f53 2351 {"execute", required_argument, 0, 'e'},
359bdba5
CH
2352 {"gid", required_argument, 0, 'g'},
2353 {"help", no_argument, 0, 'h'},
2354 {"local", required_argument, 0, 'l'},
359bdba5 2355 {"setkey", required_argument, 0, 'k'},
359bdba5
CH
2356 {"supervised", no_argument, 0, 3},
2357 {"uid", required_argument, 0, 'u'},
2358 {"verbose", no_argument, 0, 'v'},
2359 {"version", no_argument, 0, 'V'},
2360 {0,0,0,0}
7cc68f53 2361 };
2362 int longindex=0;
8f2d5ec3 2363 string optstring;
7cc68f53 2364 for(;;) {
359bdba5 2365 int c=getopt_long(argc, argv, "a:cC:e:g:hk:l:u:vV", longopts, &longindex);
7cc68f53 2366 if(c==-1)
2367 break;
2368 switch(c) {
5efcfa63
PL
2369 case 1:
2370 g_cmdLine.checkConfig=true;
2371 break;
bbfaaa6f
PL
2372 case 2:
2373 g_syslog=false;
2374 break;
b7165327
CH
2375 case 3:
2376 g_cmdLine.beSupervised=true;
2377 break;
7cc68f53 2378 case 'C':
2379 g_cmdLine.config=optarg;
2380 break;
2381 case 'c':
2382 g_cmdLine.beClient=true;
2383 break;
7cc68f53 2384 case 'e':
2385 g_cmdLine.command=optarg;
2386 break;
a36ce055
RG
2387 case 'g':
2388 g_cmdLine.gid=optarg;
2389 break;
7cc68f53 2390 case 'h':
6306c282 2391 cout<<"dnsdist "<<VERSION<<endl;
7d3ee2bb 2392 usage();
7cc68f53 2393 cout<<"\n";
2394 exit(EXIT_SUCCESS);
2395 break;
8f2d5ec3 2396 case 'a':
2397 optstring=optarg;
2398 g_ACL.modify([optstring](NetmaskGroup& nmg) { nmg.addMask(optstring); });
2399 break;
ddb14ec9 2400 case 'k':
b4b5edbd 2401#ifdef HAVE_LIBSODIUM
b5521206 2402 if (B64Decode(string(optarg), g_consoleKey) < 0) {
ddb14ec9
PL
2403 cerr<<"Unable to decode key '"<<optarg<<"'."<<endl;
2404 exit(EXIT_FAILURE);
2405 }
b4b5edbd
CH
2406#else
2407 cerr<<"dnsdist has been built without libsodium, -k/--setkey is unsupported."<<endl;
2408 exit(EXIT_FAILURE);
ddb14ec9 2409#endif
b4b5edbd 2410 break;
7cc68f53 2411 case 'l':
6a363878 2412 g_cmdLine.locals.push_back(trim_copy(string(optarg)));
7cc68f53 2413 break;
a36ce055
RG
2414 case 'u':
2415 g_cmdLine.uid=optarg;
2416 break;
7cc68f53 2417 case 'v':
2418 g_verbose=true;
2419 break;
6306c282 2420 case 'V':
d4d796e5
PD
2421#ifdef LUAJIT_VERSION
2422 cout<<"dnsdist "<<VERSION<<" ("<<LUA_RELEASE<<" ["<<LUAJIT_VERSION<<"])"<<endl;
2423#else
2424 cout<<"dnsdist "<<VERSION<<" ("<<LUA_RELEASE<<")"<<endl;
2425#endif
70829d97 2426 cout<<"Enabled features: ";
a227f47d
RG
2427#ifdef HAVE_DNS_OVER_TLS
2428 cout<<"dns-over-tls(";
2429#ifdef HAVE_GNUTLS
3909bf10
CH
2430 cout<<"gnutls";
2431 #ifdef HAVE_LIBSSL
2432 cout<<" ";
2433 #endif
a227f47d
RG
2434#endif
2435#ifdef HAVE_LIBSSL
2436 cout<<"openssl";
2437#endif
2438 cout<<") ";
2439#endif
70829d97
PL
2440#ifdef HAVE_DNSCRYPT
2441 cout<<"dnscrypt ";
2442#endif
0beaa5c8
RG
2443#ifdef HAVE_EBPF
2444 cout<<"ebpf ";
2445#endif
82a91ddf
CH
2446#ifdef HAVE_FSTRM
2447 cout<<"fstrm ";
2448#endif
f4b1f1fd
RG
2449#ifdef HAVE_LIBCRYPTO
2450 cout<<"ipcipher ";
2451#endif
3909bf10
CH
2452#ifdef HAVE_LIBSODIUM
2453 cout<<"libsodium ";
2454#endif
70829d97
PL
2455#ifdef HAVE_PROTOBUF
2456 cout<<"protobuf ";
2457#endif
2458#ifdef HAVE_RE2
2459 cout<<"re2 ";
2460#endif
0beaa5c8
RG
2461#if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE)
2462 cout<<"recvmmsg/sendmmsg ";
2463#endif
2464#ifdef HAVE_NET_SNMP
2465 cout<<"snmp ";
2466#endif
70829d97
PL
2467#ifdef HAVE_SYSTEMD
2468 cout<<"systemd";
2469#endif
2470 cout<<endl;
6306c282
PL
2471 exit(EXIT_SUCCESS);
2472 break;
7d3ee2bb
PL
2473 case '?':
2474 //getopt_long printed an error message.
2475 usage();
2476 exit(EXIT_FAILURE);
2477 break;
7cc68f53 2478 }
24d5cb00 2479 }
6ab65223 2480
7cc68f53 2481 argc-=optind;
2482 argv+=optind;
2483 for(auto p = argv; *p; ++p) {
094b6aff
PL
2484 if(g_cmdLine.beClient) {
2485 clientAddress = ComboAddress(*p, 5199);
2486 } else {
2487 g_cmdLine.remotes.push_back(*p);
2488 }
7cc68f53 2489 }
2490
a1b1a29d 2491 ServerPolicy leastOutstandingPol{"leastOutstanding", leastOutstanding, false};
cd29dcb1 2492
e5a14b2b 2493 g_policy.setState(leastOutstandingPol);
7cc68f53 2494 if(g_cmdLine.beClient || !g_cmdLine.command.empty()) {
2495 setupLua(true, g_cmdLine.config);
094b6aff
PL
2496 if (clientAddress != ComboAddress())
2497 g_serverControl = clientAddress;
7cc68f53 2498 doClient(g_serverControl, g_cmdLine.command);
e16fd59c 2499 _exit(EXIT_SUCCESS);
6d01c80c 2500 }
2e72cc0e 2501
8f133915 2502 auto acl = g_ACL.getCopy();
8f2d5ec3 2503 if(acl.empty()) {
2504 for(auto& addr : {"127.0.0.0/8", "10.0.0.0/8", "100.64.0.0/10", "169.254.0.0/16", "192.168.0.0/16", "172.16.0.0/12", "::1/128", "fc00::/7", "fe80::/10"})
2505 acl.addMask(addr);
2506 g_ACL.setState(acl);
2507 }
8f133915 2508
b5521206 2509 auto consoleACL = g_consoleACL.getCopy();
5ceea33e
RG
2510 for (const auto& mask : { "127.0.0.1/8", "::1/128" }) {
2511 consoleACL.addMask(mask);
2512 }
b5521206
RG
2513 g_consoleACL.setState(consoleACL);
2514
5efcfa63
PL
2515 if (g_cmdLine.checkConfig) {
2516 setupLua(true, g_cmdLine.config);
2517 // No exception was thrown
2518 infolog("Configuration '%s' OK!", g_cmdLine.config);
d8c19b98 2519 _exit(EXIT_SUCCESS);
5efcfa63
PL
2520 }
2521
7cc68f53 2522 auto todo=setupLua(false, g_cmdLine.config);
2e72cc0e 2523
636cc544
CHB
2524 auto localPools = g_pools.getCopy();
2525 {
2526 bool precompute = false;
2527 if (g_policy.getLocal()->name == "chashed") {
2528 precompute = true;
2529 } else {
2530 for (const auto& entry: localPools) {
2531 if (entry.second->policy != nullptr && entry.second->policy->name == "chashed") {
2532 precompute = true;
2533 break ;
2534 }
2535 }
2536 }
2537 if (precompute) {
2538 vinfolog("Pre-computing hashes for consistent hash load-balancing policy");
2539 // pre compute hashes
2540 auto backends = g_dstates.getLocal();
2541 for (auto& backend: *backends) {
2542 backend->hash();
2543 }
d58e616a
CHB
2544 }
2545 }
2546
6e9fd124
RG
2547 if (!g_cmdLine.locals.empty()) {
2548 for (auto it = g_frontends.begin(); it != g_frontends.end(); ) {
2549 /* TLS and DNSCrypt frontends are separate */
2550 if ((*it)->tlsFrontend == nullptr && (*it)->dnscryptCtx == nullptr) {
2551 it = g_frontends.erase(it);
9f67b883 2552 }
6e9fd124
RG
2553 else {
2554 ++it;
9f67b883 2555 }
9f67b883
RG
2556 }
2557
6e9fd124
RG
2558 for(const auto& loc : g_cmdLine.locals) {
2559 /* UDP */
2560 g_frontends.push_back(std::unique_ptr<ClientState>(new ClientState(ComboAddress(loc, 53), false, false, 0, "", {})));
2561 /* TCP */
2562 g_frontends.push_back(std::unique_ptr<ClientState>(new ClientState(ComboAddress(loc, 53), true, false, 0, "", {})));
87b515ed 2563 }
a36ce055
RG
2564 }
2565
6e9fd124
RG
2566 if (g_frontends.empty()) {
2567 /* UDP */
2568 g_frontends.push_back(std::unique_ptr<ClientState>(new ClientState(ComboAddress("127.0.0.1", 53), false, false, 0, "", {})));
2569 /* TCP */
2570 g_frontends.push_back(std::unique_ptr<ClientState>(new ClientState(ComboAddress("127.0.0.1", 53), true, false, 0, "", {})));
11e1e08b 2571 }
11e1e08b 2572
6e9fd124 2573 g_configurationDone = true;
a227f47d 2574
6e9fd124
RG
2575 for(auto& frontend : g_frontends) {
2576 setUpLocalBind(frontend);
a227f47d 2577
6e9fd124
RG
2578 if (frontend->tcp == false) {
2579 ++udpBindsCount;
a227f47d
RG
2580 }
2581 else {
6e9fd124 2582 ++tcpBindsCount;
a227f47d
RG
2583 }
2584 }
2585
b82a127e 2586 warnlog("dnsdist %s comes with ABSOLUTELY NO WARRANTY. This is free software, and you are welcome to redistribute it according to the terms of the GPL version 2", VERSION);
9c9b4998 2587
b82a127e
RG
2588 vector<string> vec;
2589 std::string acls;
2590 g_ACL.getLocal()->toStringVector(&vec);
2591 for(const auto& s : vec) {
2592 if (!acls.empty())
2593 acls += ", ";
2594 acls += s;
b076b34a 2595 }
b82a127e 2596 infolog("ACL allowing queries from: %s", acls.c_str());
b5521206
RG
2597 vec.clear();
2598 acls.clear();
2599 g_consoleACL.getLocal()->toStringVector(&vec);
2600 for (const auto& entry : vec) {
2601 if (!acls.empty()) {
2602 acls += ", ";
2603 }
2604 acls += entry;
2605 }
2606 infolog("Console ACL allowing connections from: %s", acls.c_str());
6d01c80c 2607
9c9b4998
RG
2608#ifdef HAVE_LIBSODIUM
2609 if (g_consoleEnabled && g_consoleKey.empty()) {
2610 warnlog("Warning, the console has been enabled via 'controlSocket()' but no key has been set with 'setKey()' so all connections will fail until a key has been set");
2611 }
2612#endif
2613
aac59883
RG
2614 uid_t newgid=0;
2615 gid_t newuid=0;
2616
2617 if(!g_cmdLine.gid.empty())
2618 newgid = strToGID(g_cmdLine.gid.c_str());
2619
2620 if(!g_cmdLine.uid.empty())
2621 newuid = strToUID(g_cmdLine.uid.c_str());
2622
2623 dropGroupPrivs(newgid);
2624 dropUserPrivs(newuid);
fdc3ea42
RG
2625 try {
2626 /* we might still have capabilities remaining,
2627 for example if we have been started as root
2628 without --uid or --gid (please don't do that)
2629 or as an unprivileged user with ambient
2630 capabilities like CAP_NET_BIND_SERVICE.
2631 */
2632 dropCapabilities();
2633 }
2634 catch(const std::exception& e) {
2635 warnlog("%s", e.what());
2636 }
aac59883 2637
a36ce055
RG
2638 /* this need to be done _after_ dropping privileges */
2639 g_delay = new DelayPipe<DelayedPacket>();
2640
9f4eb5cc
RG
2641 if (g_snmpAgent) {
2642 g_snmpAgent->run();
2643 }
2644
1f7646c2 2645 g_tcpclientthreads = std::unique_ptr<TCPClientCollection>(new TCPClientCollection(g_maxTCPClientThreads, g_useTCPSinglePipe));
a9bf3ec4 2646
2e72cc0e 2647 for(auto& t : todo)
2648 t();
2649
636cc544 2650 localPools = g_pools.getCopy();
8f4f5186
RG
2651 /* create the default pool no matter what */
2652 createPoolIfNotExists(localPools, "");
7cc68f53 2653 if(g_cmdLine.remotes.size()) {
2654 for(const auto& address : g_cmdLine.remotes) {
c9262563 2655 auto ret=std::make_shared<DownstreamState>(ComboAddress(address, 53));
886e2cf2 2656 addServerToPool(localPools, "", ret);
5d7e6765 2657 if (ret->connected && !ret->threadStarted.test_and_set()) {
2717a92f 2658 ret->tid = thread(responderThread, ret);
7565f4e6 2659 }
ecbe9133 2660 g_dstates.modify([ret](servers_t& servers) { servers.push_back(ret); });
6d01c80c 2661 }
2662 }
886e2cf2 2663 g_pools.setState(localPools);
6d01c80c 2664
a9c2e4ab 2665 if(g_dstates.getLocal()->empty()) {
e73ec7d3 2666 errlog("No downstream servers defined: all packets will get dropped");
2667 // you might define them later, but you need to know
2668 }
2669
41408d3a
RG
2670 checkFileDescriptorsLimits(udpBindsCount, tcpBindsCount);
2671
e5a14b2b 2672 for(auto& dss : g_dstates.getCopy()) { // it is a copy, but the internal shared_ptrs are the real deal
773470ca 2673 if(dss->availability==DownstreamState::Availability::Auto) {
4ab01344 2674 bool newState=upCheck(dss);
a7940c06 2675 warnlog("Marking downstream %s as '%s'", dss->getNameWithAddr(), newState ? "up" : "down");
773470ca 2676 dss->upStatus = newState;
2677 }
2678 }
b076b34a 2679
6e9fd124 2680 for(auto& cs : g_frontends) {
a36ce055 2681 if (cs->udpFD >= 0) {
6e9fd124 2682 thread t1(udpClientThread, cs.get());
f0e4dcba
RG
2683 if (!cs->cpus.empty()) {
2684 mapThreadToCPUList(t1.native_handle(), cs->cpus);
2685 }
a36ce055 2686 t1.detach();
652a7355 2687 }
a36ce055 2688 else if (cs->tcpFD >= 0) {
6e9fd124 2689 thread t1(tcpAcceptorThread, cs.get());
f0e4dcba
RG
2690 if (!cs->cpus.empty()) {
2691 mapThreadToCPUList(t1.native_handle(), cs->cpus);
2692 }
a36ce055 2693 t1.detach();
726ddf60 2694 }
24d5cb00 2695 }
7730131a 2696
42fae326 2697 thread carbonthread(carbonDumpThread);
2698 carbonthread.detach();
2699
3c115e0f 2700 thread stattid(maintThread);
886e2cf2 2701 stattid.detach();
6d01c80c 2702
886e2cf2
RG
2703 thread healththread(healthChecksThread);
2704
5d4e1ef8
RG
2705 if (!g_secPollSuffix.empty()) {
2706 thread secpollthread(secPollThread);
2707 secpollthread.detach();
2708 }
2709
b82a127e 2710 if(g_cmdLine.beSupervised) {
6ab65223
PL
2711#ifdef HAVE_SYSTEMD
2712 sd_notify(0, "READY=1");
2713#endif
886e2cf2 2714 healththread.join();
773470ca 2715 }
6d01c80c 2716 else {
886e2cf2 2717 healththread.detach();
505ca3d1 2718 doConsole();
3c115e0f 2719 }
9cf811d1 2720 _exit(EXIT_SUCCESS);
3c115e0f 2721
6d01c80c 2722}
3f5c3f1d
PD
2723catch(const LuaContext::ExecutionErrorException& e) {
2724 try {
2725 errlog("Fatal Lua error: %s", e.what());
2726 std::rethrow_if_nested(e);
2010ac95
RG
2727 } catch(const std::exception& ne) {
2728 errlog("Details: %s", ne.what());
3f5c3f1d
PD
2729 }
2730 catch(PDNSException &ae)
2731 {
2732 errlog("Fatal pdns error: %s", ae.reason);
2733 }
2734 _exit(EXIT_FAILURE);
2735}
24d5cb00 2736catch(std::exception &e)
2737{
6d01c80c 2738 errlog("Fatal error: %s", e.what());
4a966472 2739 _exit(EXIT_FAILURE);
24d5cb00 2740}
3f81d239 2741catch(PDNSException &ae)
7730131a 2742{
6d01c80c 2743 errlog("Fatal pdns error: %s", ae.reason);
4a966472 2744 _exit(EXIT_FAILURE);
7730131a 2745}