2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
29 #include <netinet/tcp.h>
31 #include <sys/resource.h>
34 #if defined (__OpenBSD__) || defined(__NetBSD__)
35 #include <readline/readline.h>
37 #include <editline/readline.h>
40 #include "dnsdist-systemd.hh"
42 #include <systemd/sd-daemon.h>
46 #include "dnsdist-cache.hh"
47 #include "dnsdist-console.hh"
48 #include "dnsdist-ecs.hh"
49 #include "dnsdist-healthchecks.hh"
50 #include "dnsdist-lua.hh"
51 #include "dnsdist-proxy-protocol.hh"
52 #include "dnsdist-rings.hh"
53 #include "dnsdist-secpoll.hh"
54 #include "dnsdist-xpf.hh"
57 #include "delaypipe.hh"
60 #include "dnsparser.hh"
61 #include "ednsoptions.hh"
65 #include "sodcrypto.hh"
67 #include "threadname.hh"
71 Receiver is currently single threaded
72 not *that* bad actually, but now that we are thread safe, might want to scale
76 Set of Rules, if one matches, it leads to an Action
77 Both rules and actions could conceivably be Lua based.
78 On the C++ side, both could be inherited from a class Rule and a class Action,
79 on the Lua side we can't do that. */
85 struct DNSDistStats g_stats
;
87 uint16_t g_maxOutstanding
{std::numeric_limits
<uint16_t>::max()};
88 uint32_t g_staleCacheEntriesTTL
{0};
90 bool g_allowEmptyResponse
{false};
92 GlobalStateHolder
<NetmaskGroup
> g_ACL
;
93 string g_outputBuffer
;
95 std::vector
<std::shared_ptr
<TLSFrontend
>> g_tlslocals
;
96 std::vector
<std::shared_ptr
<DOHFrontend
>> g_dohlocals
;
97 std::vector
<std::shared_ptr
<DNSCryptContext
>> g_dnsCryptLocals
;
99 shared_ptr
<BPFFilter
> g_defaultBPFFilter
;
100 std::vector
<std::shared_ptr
<DynBPFFilter
> > g_dynBPFFilters
;
101 #endif /* HAVE_EBPF */
102 std::vector
<std::unique_ptr
<ClientState
>> g_frontends
;
103 GlobalStateHolder
<pools_t
> g_pools
;
104 size_t g_udpVectorSize
{1};
106 /* UDP: the grand design. Per socket we listen on for incoming queries there is one thread.
107 Then we have a bunch of connected sockets for talking to downstream servers.
108 We send directly to those sockets.
110 For the return path, per downstream server we have a thread that listens to responses.
112 Per socket there is an array of 2^16 states, when we send out a packet downstream, we note
113 there the original requestor and the original id. The new ID is the offset in the array.
115 When an answer comes in on a socket, we look up the offset by the id, and lob it to the
118 IDs are assigned by atomic increments of the socket offset.
121 GlobalStateHolder
<vector
<DNSDistRuleAction
> > g_rulactions
;
122 GlobalStateHolder
<vector
<DNSDistResponseRuleAction
> > g_resprulactions
;
123 GlobalStateHolder
<vector
<DNSDistResponseRuleAction
> > g_cachehitresprulactions
;
124 GlobalStateHolder
<vector
<DNSDistResponseRuleAction
> > g_selfansweredresprulactions
;
129 GlobalStateHolder
<servers_t
> g_dstates
;
130 GlobalStateHolder
<NetmaskTree
<DynBlock
>> g_dynblockNMG
;
131 GlobalStateHolder
<SuffixMatchTree
<DynBlock
>> g_dynblockSMT
;
132 DNSAction::Action g_dynBlockAction
= DNSAction::Action::Drop
;
133 int g_tcpRecvTimeout
{2};
134 int g_tcpSendTimeout
{2};
137 bool g_servFailOnNoPolicy
{false};
138 bool g_truncateTC
{false};
139 bool g_fixupCase
{false};
140 bool g_preserveTrailingData
{false};
142 std::set
<std::string
> g_capabilitiesToRetain
;
144 static void truncateTC(char* packet
, uint16_t* len
, size_t responseSize
, unsigned int consumed
)
147 bool hadEDNS
= false;
148 uint16_t payloadSize
= 0;
151 if (g_addEDNSToSelfGeneratedResponses
) {
152 hadEDNS
= getEDNSUDPPayloadSizeAndZ(packet
, *len
, &payloadSize
, &z
);
155 *len
=static_cast<uint16_t>(sizeof(dnsheader
)+consumed
+DNS_TYPE_SIZE
+DNS_CLASS_SIZE
);
156 struct dnsheader
* dh
= reinterpret_cast<struct dnsheader
*>(packet
);
157 dh
->ancount
= dh
->arcount
= dh
->nscount
= 0;
160 addEDNS(dh
, *len
, responseSize
, z
& EDNS_HEADER_FLAG_DO
, payloadSize
, 0);
172 ComboAddress destination
;
173 ComboAddress origDest
;
177 if(origDest
.sin4
.sin_family
== 0) {
178 res
= sendto(fd
, packet
.c_str(), packet
.size(), 0, (struct sockaddr
*)&destination
, destination
.getSocklen());
181 res
= sendfromto(fd
, packet
.c_str(), packet
.size(), 0, origDest
, destination
);
185 vinfolog("Error sending delayed response to %s: %s", destination
.toStringWithPort(), strerror(err
));
190 DelayPipe
<DelayedPacket
>* g_delay
= nullptr;
192 std::string
DNSQuestion::getTrailingData() const
194 const char* message
= reinterpret_cast<const char*>(this->dh
);
195 const uint16_t messageLen
= getDNSPacketLength(message
, this->len
);
196 return std::string(message
+ messageLen
, this->len
- messageLen
);
199 bool DNSQuestion::setTrailingData(const std::string
& tail
)
201 char* message
= reinterpret_cast<char*>(this->dh
);
202 const uint16_t messageLen
= getDNSPacketLength(message
, this->len
);
203 const uint16_t tailLen
= tail
.size();
204 if (tailLen
> (this->size
- messageLen
)) {
208 /* Update length and copy data from the Lua string. */
209 this->len
= messageLen
+ tailLen
;
211 tail
.copy(message
+ messageLen
, tailLen
);
216 void doLatencyStats(double udiff
)
218 if(udiff
< 1000) ++g_stats
.latency0_1
;
219 else if(udiff
< 10000) ++g_stats
.latency1_10
;
220 else if(udiff
< 50000) ++g_stats
.latency10_50
;
221 else if(udiff
< 100000) ++g_stats
.latency50_100
;
222 else if(udiff
< 1000000) ++g_stats
.latency100_1000
;
223 else ++g_stats
.latencySlow
;
224 g_stats
.latencySum
+= udiff
/ 1000;
226 auto doAvg
= [](double& var
, double n
, double weight
) {
227 var
= (weight
-1) * var
/weight
+ n
/weight
;
230 doAvg(g_stats
.latencyAvg100
, udiff
, 100);
231 doAvg(g_stats
.latencyAvg1000
, udiff
, 1000);
232 doAvg(g_stats
.latencyAvg10000
, udiff
, 10000);
233 doAvg(g_stats
.latencyAvg1000000
, udiff
, 1000000);
236 bool responseContentMatches(const char* response
, const uint16_t responseLen
, const DNSName
& qname
, const uint16_t qtype
, const uint16_t qclass
, const ComboAddress
& remote
, unsigned int& consumed
)
238 if (responseLen
< sizeof(dnsheader
)) {
242 const struct dnsheader
* dh
= reinterpret_cast<const struct dnsheader
*>(response
);
244 ++g_stats
.nonCompliantResponses
;
248 if (dh
->qdcount
== 0) {
249 if ((dh
->rcode
!= RCode::NoError
&& dh
->rcode
!= RCode::NXDomain
) || g_allowEmptyResponse
) {
253 ++g_stats
.nonCompliantResponses
;
258 uint16_t rqtype
, rqclass
;
261 rqname
=DNSName(response
, responseLen
, sizeof(dnsheader
), false, &rqtype
, &rqclass
, &consumed
);
263 catch(const std::exception
& e
) {
264 if(responseLen
> 0 && static_cast<size_t>(responseLen
) > sizeof(dnsheader
)) {
265 infolog("Backend %s sent us a response with id %d that did not parse: %s", remote
.toStringWithPort(), ntohs(dh
->id
), e
.what());
267 ++g_stats
.nonCompliantResponses
;
271 if (rqtype
!= qtype
|| rqclass
!= qclass
|| rqname
!= qname
) {
278 static void restoreFlags(struct dnsheader
* dh
, uint16_t origFlags
)
280 static const uint16_t rdMask
= 1 << FLAGS_RD_OFFSET
;
281 static const uint16_t cdMask
= 1 << FLAGS_CD_OFFSET
;
282 static const uint16_t restoreFlagsMask
= UINT16_MAX
& ~(rdMask
| cdMask
);
283 uint16_t * flags
= getFlagsFromDNSHeader(dh
);
284 /* clear the flags we are about to restore */
285 *flags
&= restoreFlagsMask
;
286 /* only keep the flags we want to restore */
287 origFlags
&= ~restoreFlagsMask
;
288 /* set the saved flags as they were */
292 static bool fixUpQueryTurnedResponse(DNSQuestion
& dq
, const uint16_t origFlags
)
294 restoreFlags(dq
.dh
, origFlags
);
296 return addEDNSToQueryTurnedResponse(dq
);
299 static bool fixUpResponse(char** response
, uint16_t* responseLen
, size_t* responseSize
, const DNSName
& qname
, uint16_t origFlags
, bool ednsAdded
, bool ecsAdded
, std::vector
<uint8_t>& rewrittenResponse
, uint16_t addRoom
, bool* zeroScope
)
301 if (*responseLen
< sizeof(dnsheader
)) {
305 struct dnsheader
* dh
= reinterpret_cast<struct dnsheader
*>(*response
);
306 restoreFlags(dh
, origFlags
);
308 if (*responseLen
== sizeof(dnsheader
)) {
313 string realname
= qname
.toDNSString();
314 if (*responseLen
>= (sizeof(dnsheader
) + realname
.length())) {
315 memcpy(*response
+ sizeof(dnsheader
), realname
.c_str(), realname
.length());
319 if (ednsAdded
|| ecsAdded
) {
324 const std::string
responseStr(*response
, *responseLen
);
325 int res
= locateEDNSOptRR(responseStr
, &optStart
, &optLen
, &last
);
328 if (zeroScope
) { // this finds if an EDNS Client Subnet scope was set, and if it is 0
329 size_t optContentStart
= 0;
330 uint16_t optContentLen
= 0;
331 /* we need at least 4 bytes after the option length (family: 2, source prefix-length: 1, scope prefix-length: 1) */
332 if (isEDNSOptionInOpt(responseStr
, optStart
, optLen
, EDNSOptionCode::ECS
, &optContentStart
, &optContentLen
) && optContentLen
>= 4) {
333 /* see if the EDNS Client Subnet SCOPE PREFIX-LENGTH byte in position 3 is set to 0, which is the only thing
335 *zeroScope
= responseStr
.at(optContentStart
+ 3) == 0;
340 /* we added the entire OPT RR,
341 therefore we need to remove it entirely */
343 /* simply remove the last AR */
344 *responseLen
-= optLen
;
345 uint16_t arcount
= ntohs(dh
->arcount
);
347 dh
->arcount
= htons(arcount
);
350 /* Removing an intermediary RR could lead to compression error */
351 if (rewriteResponseWithoutEDNS(responseStr
, rewrittenResponse
) == 0) {
352 *responseLen
= rewrittenResponse
.size();
353 if (addRoom
&& (UINT16_MAX
- *responseLen
) > addRoom
) {
354 rewrittenResponse
.reserve(*responseLen
+ addRoom
);
356 *responseSize
= rewrittenResponse
.capacity();
357 *response
= reinterpret_cast<char*>(rewrittenResponse
.data());
360 warnlog("Error rewriting content");
365 /* the OPT RR was already present, but without ECS,
366 we need to remove the ECS option if any */
368 /* nothing after the OPT RR, we can simply remove the
370 size_t existingOptLen
= optLen
;
371 removeEDNSOptionFromOPT(*response
+ optStart
, &optLen
, EDNSOptionCode::ECS
);
372 *responseLen
-= (existingOptLen
- optLen
);
375 /* Removing an intermediary RR could lead to compression error */
376 if (rewriteResponseWithoutEDNSOption(responseStr
, EDNSOptionCode::ECS
, rewrittenResponse
) == 0) {
377 *responseLen
= rewrittenResponse
.size();
378 if (addRoom
&& (UINT16_MAX
- *responseLen
) > addRoom
) {
379 rewrittenResponse
.reserve(*responseLen
+ addRoom
);
381 *responseSize
= rewrittenResponse
.capacity();
382 *response
= reinterpret_cast<char*>(rewrittenResponse
.data());
385 warnlog("Error rewriting content");
396 static bool encryptResponse(char* response
, uint16_t* responseLen
, size_t responseSize
, bool tcp
, std::shared_ptr
<DNSCryptQuery
> dnsCryptQuery
, dnsheader
** dh
, dnsheader
* dhCopy
)
399 uint16_t encryptedResponseLen
= 0;
401 /* save the original header before encrypting it in place */
402 if (dh
!= nullptr && *dh
!= nullptr && dhCopy
!= nullptr) {
403 memcpy(dhCopy
, *dh
, sizeof(dnsheader
));
407 int res
= dnsCryptQuery
->encryptResponse(response
, *responseLen
, responseSize
, tcp
, &encryptedResponseLen
);
409 *responseLen
= encryptedResponseLen
;
411 /* dropping response */
412 vinfolog("Error encrypting the response, dropping.");
418 #endif /* HAVE_DNSCRYPT */
420 static bool applyRulesToResponse(LocalStateHolder
<vector
<DNSDistResponseRuleAction
> >& localRespRulactions
, DNSResponse
& dr
)
422 DNSResponseAction::Action action
=DNSResponseAction::Action::None
;
423 std::string ruleresult
;
424 for(const auto& lr
: *localRespRulactions
) {
425 if(lr
.d_rule
->matches(&dr
)) {
426 lr
.d_rule
->d_matches
++;
427 action
=(*lr
.d_action
)(&dr
, &ruleresult
);
429 case DNSResponseAction::Action::Allow
:
432 case DNSResponseAction::Action::Drop
:
435 case DNSResponseAction::Action::HeaderModify
:
438 case DNSResponseAction::Action::ServFail
:
439 dr
.dh
->rcode
= RCode::ServFail
;
442 /* non-terminal actions follow */
443 case DNSResponseAction::Action::Delay
:
444 dr
.delayMsec
= static_cast<int>(pdns_stou(ruleresult
)); // sorry
446 case DNSResponseAction::Action::None
:
455 bool processResponse(char** response
, uint16_t* responseLen
, size_t* responseSize
, LocalStateHolder
<vector
<DNSDistResponseRuleAction
> >& localRespRulactions
, DNSResponse
& dr
, size_t addRoom
, std::vector
<uint8_t>& rewrittenResponse
, bool muted
)
457 if (!applyRulesToResponse(localRespRulactions
, dr
)) {
461 bool zeroScope
= false;
462 if (!fixUpResponse(response
, responseLen
, responseSize
, *dr
.qname
, dr
.origFlags
, dr
.ednsAdded
, dr
.ecsAdded
, rewrittenResponse
, addRoom
, dr
.useZeroScope
? &zeroScope
: nullptr)) {
466 if (dr
.packetCache
&& !dr
.skipCache
&& *responseLen
<= s_maxPacketCacheEntrySize
) {
467 if (!dr
.useZeroScope
) {
468 /* if the query was not suitable for zero-scope, for
469 example because it had an existing ECS entry so the hash is
470 not really 'no ECS', so just insert it for the existing subnet
472 - we don't have the correct hash for a non-ECS query
473 - inserting with hash computed before the ECS replacement but with
474 the subnet extracted _after_ the replacement would not work.
478 // if zeroScope, pass the pre-ECS hash-key and do not pass the subnet to the cache
479 dr
.packetCache
->insert(zeroScope
? dr
.cacheKeyNoECS
: dr
.cacheKey
, zeroScope
? boost::none
: dr
.subnet
, dr
.origFlags
, dr
.dnssecOK
, *dr
.qname
, dr
.qtype
, dr
.qclass
, *response
, *responseLen
, dr
.tcp
, dr
.dh
->rcode
, dr
.tempFailureTTL
);
484 if (!encryptResponse(*response
, responseLen
, *responseSize
, dr
.tcp
, dr
.dnsCryptQuery
, nullptr, nullptr)) {
488 #endif /* HAVE_DNSCRYPT */
493 static bool sendUDPResponse(int origFD
, const char* response
, const uint16_t responseLen
, const int delayMsec
, const ComboAddress
& origDest
, const ComboAddress
& origRemote
)
495 if(delayMsec
&& g_delay
) {
496 DelayedPacket dp
{origFD
, string(response
,responseLen
), origRemote
, origDest
};
497 g_delay
->submit(dp
, delayMsec
);
501 if(origDest
.sin4
.sin_family
== 0) {
502 res
= sendto(origFD
, response
, responseLen
, 0, reinterpret_cast<const struct sockaddr
*>(&origRemote
), origRemote
.getSocklen());
505 res
= sendfromto(origFD
, response
, responseLen
, 0, origDest
, origRemote
);
509 vinfolog("Error sending response to %s: %s", origRemote
.toStringWithPort(), stringerror(err
));
517 int pickBackendSocketForSending(std::shared_ptr
<DownstreamState
>& state
)
519 return state
->sockets
[state
->socketsOffset
++ % state
->sockets
.size()];
522 static void pickBackendSocketsReadyForReceiving(const std::shared_ptr
<DownstreamState
>& state
, std::vector
<int>& ready
)
526 if (state
->sockets
.size() == 1) {
527 ready
.push_back(state
->sockets
[0]);
532 std::lock_guard
<std::mutex
> lock(state
->socketsLock
);
533 state
->mplexer
->getAvailableFDs(ready
, -1);
537 // listens on a dedicated socket, lobs answers from downstream servers to original requestors
538 void responderThread(std::shared_ptr
<DownstreamState
> dss
)
540 setThreadName("dnsdist/respond");
541 auto localRespRulactions
= g_resprulactions
.getLocal();
542 char packet
[s_maxPacketCacheEntrySize
+ DNSCRYPT_MAX_RESPONSE_PADDING_AND_MAC_SIZE
];
543 static_assert(sizeof(packet
) <= UINT16_MAX
, "Packet size should fit in a uint16_t");
544 /* when the answer is encrypted in place, we need to get a copy
545 of the original header before encryption to fill the ring buffer */
546 dnsheader cleartextDH
;
547 vector
<uint8_t> rewrittenResponse
;
549 uint16_t queryId
= 0;
550 std::vector
<int> sockets
;
551 sockets
.reserve(dss
->sockets
.size());
554 dnsheader
* dh
= reinterpret_cast<struct dnsheader
*>(packet
);
556 pickBackendSocketsReadyForReceiving(dss
, sockets
);
557 for (const auto& fd
: sockets
) {
558 ssize_t got
= recv(fd
, packet
, sizeof(packet
), 0);
559 char * response
= packet
;
560 size_t responseSize
= sizeof(packet
);
562 if (got
< 0 || static_cast<size_t>(got
) < sizeof(dnsheader
))
565 uint16_t responseLen
= static_cast<uint16_t>(got
);
568 if(queryId
>= dss
->idStates
.size()) {
572 IDState
* ids
= &dss
->idStates
[queryId
];
573 int64_t usageIndicator
= ids
->usageIndicator
;
575 if(!IDState::isInUse(usageIndicator
)) {
576 /* the corresponding state is marked as not in use, meaning that:
577 - it was already cleaned up by another thread and the state is gone ;
578 - we already got a response for this query and this one is a duplicate.
579 Either way, we don't touch it.
584 /* read the potential DOHUnit state as soon as possible, but don't use it
585 until we have confirmed that we own this state by updating usageIndicator */
587 /* setting age to 0 to prevent the maintainer thread from
588 cleaning this IDS while we process the response.
591 int origFD
= ids
->origFD
;
593 unsigned int consumed
= 0;
594 if (!responseContentMatches(response
, responseLen
, ids
->qname
, ids
->qtype
, ids
->qclass
, dss
->remote
, consumed
)) {
598 bool isDoH
= du
!= nullptr;
599 /* atomically mark the state as available, but only if it has not been altered
601 if (ids
->tryMarkUnused(usageIndicator
)) {
602 /* clear the potential DOHUnit asap, it's ours now
603 and since we just marked the state as unused,
604 someone could overwrite it. */
606 /* we only decrement the outstanding counter if the value was not
607 altered in the meantime, which would mean that the state has been actively reused
608 and the other thread has not incremented the outstanding counter, so we don't
609 want it to be decremented twice. */
610 --dss
->outstanding
; // you'd think an attacker could game this, but we're using connected socket
612 /* someone updated the state in the meantime, we can't touch the existing pointer */
614 /* since the state has been updated, we can't safely access it so let's just drop
619 if(dh
->tc
&& g_truncateTC
) {
620 truncateTC(response
, &responseLen
, responseSize
, consumed
);
623 dh
->id
= ids
->origID
;
625 uint16_t addRoom
= 0;
626 DNSResponse dr
= makeDNSResponseFromIDState(*ids
, dh
, sizeof(packet
), responseLen
, false);
627 if (dr
.dnsCryptQuery
) {
628 addRoom
= DNSCRYPT_MAX_RESPONSE_PADDING_AND_MAC_SIZE
;
631 memcpy(&cleartextDH
, dr
.dh
, sizeof(cleartextDH
));
632 if (!processResponse(&response
, &responseLen
, &responseSize
, localRespRulactions
, dr
, addRoom
, rewrittenResponse
, ids
->cs
&& ids
->cs
->muted
)) {
636 if (ids
->cs
&& !ids
->cs
->muted
) {
638 #ifdef HAVE_DNS_OVER_HTTPS
640 du
->response
= std::string(response
, responseLen
);
641 if (send(du
->rsock
, &du
, sizeof(du
), 0) != sizeof(du
)) {
642 /* at this point we have the only remaining pointer on this
643 DOHUnit object since we did set ids->du to nullptr earlier,
644 except if we got the response before the pointer could be
645 released by the frontend */
648 #endif /* HAVE_DNS_OVER_HTTPS */
653 empty
.sin4
.sin_family
= 0;
654 /* if ids->destHarvested is false, origDest holds the listening address.
655 We don't want to use that as a source since it could be 0.0.0.0 for example. */
656 sendUDPResponse(origFD
, response
, responseLen
, dr
.delayMsec
, ids
->destHarvested
? ids
->origDest
: empty
, ids
->origRemote
);
662 ++ids
->cs
->responses
;
666 double udiff
= ids
->sentTime
.udiff();
667 vinfolog("Got answer from %s, relayed to %s%s, took %f usec", dss
->remote
.toStringWithPort(), ids
->origRemote
.toStringWithPort(),
668 isDoH
? " (https)": "", udiff
);
672 g_rings
.insertResponse(ts
, *dr
.remote
, *dr
.qname
, dr
.qtype
, static_cast<unsigned int>(udiff
), static_cast<unsigned int>(got
), cleartextDH
, dss
->remote
);
674 switch (cleartextDH
.rcode
) {
675 case RCode::NXDomain
:
676 ++g_stats
.frontendNXDomain
;
678 case RCode::ServFail
:
679 ++g_stats
.servfailResponses
;
680 ++g_stats
.frontendServFail
;
683 ++g_stats
.frontendNoError
;
686 dss
->latencyUsec
= (127.0 * dss
->latencyUsec
/ 128.0) + udiff
/128.0;
688 doLatencyStats(udiff
);
690 rewrittenResponse
.clear();
693 catch(const std::exception
& e
){
694 vinfolog("Got an error in UDP responder thread while parsing a response from %s, id %d: %s", dss
->remote
.toStringWithPort(), queryId
, e
.what());
698 catch(const std::exception
& e
)
700 errlog("UDP responder thread died because of exception: %s", e
.what());
702 catch(const PDNSException
& e
)
704 errlog("UDP responder thread died because of PowerDNS exception: %s", e
.reason
);
708 errlog("UDP responder thread died because of an exception: %s", "unknown");
711 std::mutex g_luamutex
;
713 ComboAddress g_serverControl
{"127.0.0.1:5199"};
716 static void spoofResponseFromString(DNSQuestion
& dq
, const string
& spoofContent
, bool raw
)
721 SpoofAction
sa(spoofContent
);
725 std::vector
<std::string
> addrs
;
726 stringtok(addrs
, spoofContent
, " ,");
728 if (addrs
.size() == 1) {
730 ComboAddress
spoofAddr(spoofContent
);
731 SpoofAction
sa({spoofAddr
});
734 catch(const PDNSException
&e
) {
735 DNSName
cname(spoofContent
);
736 SpoofAction
sa(cname
); // CNAME then
740 std::vector
<ComboAddress
> cas
;
741 for (const auto& addr
: addrs
) {
743 cas
.push_back(ComboAddress(addr
));
754 bool processRulesResult(const DNSAction::Action
& action
, DNSQuestion
& dq
, std::string
& ruleresult
, bool& drop
)
757 case DNSAction::Action::Allow
:
760 case DNSAction::Action::Drop
:
765 case DNSAction::Action::Nxdomain
:
766 dq
.dh
->rcode
= RCode::NXDomain
;
768 ++g_stats
.ruleNXDomain
;
771 case DNSAction::Action::Refused
:
772 dq
.dh
->rcode
= RCode::Refused
;
774 ++g_stats
.ruleRefused
;
777 case DNSAction::Action::ServFail
:
778 dq
.dh
->rcode
= RCode::ServFail
;
780 ++g_stats
.ruleServFail
;
783 case DNSAction::Action::Spoof
:
784 spoofResponseFromString(dq
, ruleresult
, false);
787 case DNSAction::Action::SpoofRaw
:
788 spoofResponseFromString(dq
, ruleresult
, true);
791 case DNSAction::Action::Truncate
:
794 dq
.dh
->ra
= dq
.dh
->rd
;
799 case DNSAction::Action::HeaderModify
:
802 case DNSAction::Action::Pool
:
803 dq
.poolname
=ruleresult
;
806 case DNSAction::Action::NoRecurse
:
810 /* non-terminal actions follow */
811 case DNSAction::Action::Delay
:
812 dq
.delayMsec
= static_cast<int>(pdns_stou(ruleresult
)); // sorry
814 case DNSAction::Action::None
:
816 case DNSAction::Action::NoOp
:
820 /* false means that we don't stop the processing */
825 static bool applyRulesToQuery(LocalHolders
& holders
, DNSQuestion
& dq
, const struct timespec
& now
)
827 g_rings
.insertQuery(now
, *dq
.remote
, *dq
.qname
, dq
.qtype
, dq
.len
, *dq
.dh
);
829 if(g_qcount
.enabled
) {
830 string qname
= (*dq
.qname
).toLogString();
831 bool countQuery
{true};
832 if(g_qcount
.filter
) {
833 std::lock_guard
<std::mutex
> lock(g_luamutex
);
834 std::tie (countQuery
, qname
) = g_qcount
.filter(&dq
);
838 WriteLock
wl(&g_qcount
.queryLock
);
839 if(!g_qcount
.records
.count(qname
)) {
840 g_qcount
.records
[qname
] = 0;
842 g_qcount
.records
[qname
]++;
846 if(auto got
= holders
.dynNMGBlock
->lookup(*dq
.remote
)) {
847 auto updateBlockStats
= [&got
]() {
848 ++g_stats
.dynBlocked
;
849 got
->second
.blocks
++;
852 if(now
< got
->second
.until
) {
853 DNSAction::Action action
= got
->second
.action
;
854 if (action
== DNSAction::Action::None
) {
855 action
= g_dynBlockAction
;
858 case DNSAction::Action::NoOp
:
862 case DNSAction::Action::Nxdomain
:
863 vinfolog("Query from %s turned into NXDomain because of dynamic block", dq
.remote
->toStringWithPort());
866 dq
.dh
->rcode
= RCode::NXDomain
;
870 case DNSAction::Action::Refused
:
871 vinfolog("Query from %s refused because of dynamic block", dq
.remote
->toStringWithPort());
874 dq
.dh
->rcode
= RCode::Refused
;
878 case DNSAction::Action::Truncate
:
881 vinfolog("Query from %s truncated because of dynamic block", dq
.remote
->toStringWithPort());
884 dq
.dh
->ra
= dq
.dh
->rd
;
890 vinfolog("Query from %s for %s over TCP *not* truncated because of dynamic block", dq
.remote
->toStringWithPort(), dq
.qname
->toLogString());
893 case DNSAction::Action::NoRecurse
:
895 vinfolog("Query from %s setting rd=0 because of dynamic block", dq
.remote
->toStringWithPort());
900 vinfolog("Query from %s dropped because of dynamic block", dq
.remote
->toStringWithPort());
906 if(auto got
= holders
.dynSMTBlock
->lookup(*dq
.qname
)) {
907 auto updateBlockStats
= [&got
]() {
908 ++g_stats
.dynBlocked
;
912 if(now
< got
->until
) {
913 DNSAction::Action action
= got
->action
;
914 if (action
== DNSAction::Action::None
) {
915 action
= g_dynBlockAction
;
918 case DNSAction::Action::NoOp
:
921 case DNSAction::Action::Nxdomain
:
922 vinfolog("Query from %s for %s turned into NXDomain because of dynamic block", dq
.remote
->toStringWithPort(), dq
.qname
->toLogString());
925 dq
.dh
->rcode
= RCode::NXDomain
;
928 case DNSAction::Action::Refused
:
929 vinfolog("Query from %s for %s refused because of dynamic block", dq
.remote
->toStringWithPort(), dq
.qname
->toLogString());
932 dq
.dh
->rcode
= RCode::Refused
;
935 case DNSAction::Action::Truncate
:
939 vinfolog("Query from %s for %s truncated because of dynamic block", dq
.remote
->toStringWithPort(), dq
.qname
->toLogString());
942 dq
.dh
->ra
= dq
.dh
->rd
;
948 vinfolog("Query from %s for %s over TCP *not* truncated because of dynamic block", dq
.remote
->toStringWithPort(), dq
.qname
->toLogString());
951 case DNSAction::Action::NoRecurse
:
953 vinfolog("Query from %s setting rd=0 because of dynamic block", dq
.remote
->toStringWithPort());
958 vinfolog("Query from %s for %s dropped because of dynamic block", dq
.remote
->toStringWithPort(), dq
.qname
->toLogString());
964 DNSAction::Action action
=DNSAction::Action::None
;
967 for(const auto& lr
: *holders
.rulactions
) {
968 if(lr
.d_rule
->matches(&dq
)) {
969 lr
.d_rule
->d_matches
++;
970 action
=(*lr
.d_action
)(&dq
, &ruleresult
);
971 if (processRulesResult(action
, dq
, ruleresult
, drop
)) {
984 ssize_t
udpClientSendRequestToBackend(const std::shared_ptr
<DownstreamState
>& ss
, const int sd
, const char* request
, const size_t requestLen
, bool healthCheck
)
988 if (ss
->sourceItf
== 0) {
989 result
= send(sd
, request
, requestLen
, 0);
994 cmsgbuf_aligned cbuf
;
995 ComboAddress
remote(ss
->remote
);
996 fillMSGHdr(&msgh
, &iov
, &cbuf
, sizeof(cbuf
), const_cast<char*>(request
), requestLen
, &remote
);
997 addCMsgSrcAddr(&msgh
, &cbuf
, &ss
->sourceAddr
, ss
->sourceItf
);
998 result
= sendmsg(sd
, &msgh
, 0);
1002 int savederrno
= errno
;
1003 vinfolog("Error sending request to backend %s: %d", ss
->remote
.toStringWithPort(), savederrno
);
1005 /* This might sound silly, but on Linux send() might fail with EINVAL
1006 if the interface the socket was bound to doesn't exist anymore.
1007 We don't want to reconnect the real socket if the healthcheck failed,
1008 because it's not using the same socket.
1010 if (!healthCheck
&& (savederrno
== EINVAL
|| savederrno
== ENODEV
)) {
1018 static bool isUDPQueryAcceptable(ClientState
& cs
, LocalHolders
& holders
, const struct msghdr
* msgh
, const ComboAddress
& remote
, ComboAddress
& dest
)
1020 if (msgh
->msg_flags
& MSG_TRUNC
) {
1021 /* message was too large for our buffer */
1022 vinfolog("Dropping message too large for our buffer");
1023 ++g_stats
.nonCompliantQueries
;
1027 if(!holders
.acl
->match(remote
)) {
1028 vinfolog("Query from %s dropped because of ACL", remote
.toStringWithPort());
1036 if (HarvestDestinationAddress(msgh
, &dest
)) {
1037 /* we don't get the port, only the address */
1038 dest
.sin4
.sin_port
= cs
.local
.sin4
.sin_port
;
1041 dest
.sin4
.sin_family
= 0;
1047 boost::optional
<std::vector
<uint8_t>> checkDNSCryptQuery(const ClientState
& cs
, const char* query
, uint16_t& len
, std::shared_ptr
<DNSCryptQuery
>& dnsCryptQuery
, time_t now
, bool tcp
)
1049 if (cs
.dnscryptCtx
) {
1050 #ifdef HAVE_DNSCRYPT
1051 vector
<uint8_t> response
;
1052 uint16_t decryptedQueryLen
= 0;
1054 dnsCryptQuery
= std::make_shared
<DNSCryptQuery
>(cs
.dnscryptCtx
);
1056 bool decrypted
= handleDNSCryptQuery(const_cast<char*>(query
), len
, dnsCryptQuery
, &decryptedQueryLen
, tcp
, now
, response
);
1059 if (response
.size() > 0) {
1062 throw std::runtime_error("Unable to decrypt DNSCrypt query, dropping.");
1065 len
= decryptedQueryLen
;
1066 #endif /* HAVE_DNSCRYPT */
1071 bool checkQueryHeaders(const struct dnsheader
* dh
)
1073 if (dh
->qr
) { // don't respond to responses
1074 ++g_stats
.nonCompliantQueries
;
1078 if (dh
->qdcount
== 0) {
1079 ++g_stats
.emptyQueries
;
1084 ++g_stats
.rdQueries
;
1090 #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE)
1091 static void queueResponse(const ClientState
& cs
, const char* response
, uint16_t responseLen
, const ComboAddress
& dest
, const ComboAddress
& remote
, struct mmsghdr
& outMsg
, struct iovec
* iov
, cmsgbuf_aligned
* cbuf
)
1094 fillMSGHdr(&outMsg
.msg_hdr
, iov
, nullptr, 0, const_cast<char*>(response
), responseLen
, const_cast<ComboAddress
*>(&remote
));
1096 if (dest
.sin4
.sin_family
== 0) {
1097 outMsg
.msg_hdr
.msg_control
= nullptr;
1100 addCMsgSrcAddr(&outMsg
.msg_hdr
, cbuf
, &dest
, 0);
1103 #endif /* defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) */
1105 /* self-generated responses or cache hits */
1106 static bool prepareOutgoingResponse(LocalHolders
& holders
, ClientState
& cs
, DNSQuestion
& dq
, bool cacheHit
)
1108 DNSResponse
dr(dq
.qname
, dq
.qtype
, dq
.qclass
, dq
.consumed
, dq
.local
, dq
.remote
, reinterpret_cast<dnsheader
*>(dq
.dh
), dq
.size
, dq
.len
, dq
.tcp
, dq
.queryTime
);
1110 #ifdef HAVE_PROTOBUF
1111 dr
.uniqueId
= dq
.uniqueId
;
1114 dr
.delayMsec
= dq
.delayMsec
;
1116 if (!applyRulesToResponse(cacheHit
? holders
.cacheHitRespRulactions
: holders
.selfAnsweredRespRulactions
, dr
)) {
1120 /* in case a rule changed it */
1121 dq
.delayMsec
= dr
.delayMsec
;
1123 #ifdef HAVE_DNSCRYPT
1125 if (!encryptResponse(reinterpret_cast<char*>(dq
.dh
), &dq
.len
, dq
.size
, dq
.tcp
, dq
.dnsCryptQuery
, nullptr, nullptr)) {
1129 #endif /* HAVE_DNSCRYPT */
1132 ++g_stats
.cacheHits
;
1135 switch (dr
.dh
->rcode
) {
1136 case RCode::NXDomain
:
1137 ++g_stats
.frontendNXDomain
;
1139 case RCode::ServFail
:
1140 ++g_stats
.frontendServFail
;
1142 case RCode::NoError
:
1143 ++g_stats
.frontendNoError
;
1147 doLatencyStats(0); // we're not going to measure this
1151 ProcessQueryResult
processQuery(DNSQuestion
& dq
, ClientState
& cs
, LocalHolders
& holders
, std::shared_ptr
<DownstreamState
>& selectedBackend
)
1153 const uint16_t queryId
= ntohs(dq
.dh
->id
);
1156 /* we need an accurate ("real") value for the response and
1157 to store into the IDS, but not for insertion into the
1158 rings for example */
1159 struct timespec now
;
1162 if (!applyRulesToQuery(holders
, dq
, now
)) {
1163 return ProcessQueryResult::Drop
;
1166 if(dq
.dh
->qr
) { // something turned it into a response
1167 fixUpQueryTurnedResponse(dq
, dq
.origFlags
);
1169 if (!prepareOutgoingResponse(holders
, cs
, dq
, false)) {
1170 return ProcessQueryResult::Drop
;
1173 ++g_stats
.selfAnswered
;
1175 return ProcessQueryResult::SendAnswer
;
1178 std::shared_ptr
<ServerPool
> serverPool
= getPool(*holders
.pools
, dq
.poolname
);
1179 dq
.packetCache
= serverPool
->packetCache
;
1180 auto policy
= *(holders
.policy
);
1181 if (serverPool
->policy
!= nullptr) {
1182 policy
= *(serverPool
->policy
);
1184 auto servers
= serverPool
->getServers();
1185 selectedBackend
= getSelectedBackendFromPolicy(policy
, servers
, dq
);
1187 uint16_t cachedResponseSize
= dq
.size
;
1188 uint32_t allowExpired
= selectedBackend
? 0 : g_staleCacheEntriesTTL
;
1190 if (dq
.packetCache
&& !dq
.skipCache
) {
1191 dq
.dnssecOK
= (getEDNSZ(dq
) & EDNS_HEADER_FLAG_DO
);
1194 if (dq
.useECS
&& ((selectedBackend
&& selectedBackend
->useECS
) || (!selectedBackend
&& serverPool
->getECS()))) {
1195 // we special case our cache in case a downstream explicitly gave us a universally valid response with a 0 scope
1196 // we need ECS parsing (parseECS) to be true so we can be sure that the initial incoming query did not have an existing
1197 // ECS option, which would make it unsuitable for the zero-scope feature.
1198 if (dq
.packetCache
&& !dq
.skipCache
&& (!selectedBackend
|| !selectedBackend
->disableZeroScope
) && dq
.packetCache
->isECSParsingEnabled()) {
1199 if (dq
.packetCache
->get(dq
, dq
.consumed
, dq
.dh
->id
, reinterpret_cast<char*>(dq
.dh
), &cachedResponseSize
, &dq
.cacheKeyNoECS
, dq
.subnet
, dq
.dnssecOK
, allowExpired
)) {
1200 dq
.len
= cachedResponseSize
;
1202 if (!prepareOutgoingResponse(holders
, cs
, dq
, true)) {
1203 return ProcessQueryResult::Drop
;
1206 return ProcessQueryResult::SendAnswer
;
1210 /* there was no existing ECS on the query, enable the zero-scope feature */
1211 dq
.useZeroScope
= true;
1215 if (!handleEDNSClientSubnet(dq
, dq
.ednsAdded
, dq
.ecsAdded
, g_preserveTrailingData
)) {
1216 vinfolog("Dropping query from %s because we couldn't insert the ECS value", dq
.remote
->toStringWithPort());
1217 return ProcessQueryResult::Drop
;
1221 if (dq
.packetCache
&& !dq
.skipCache
) {
1222 if (dq
.packetCache
->get(dq
, dq
.consumed
, dq
.dh
->id
, reinterpret_cast<char*>(dq
.dh
), &cachedResponseSize
, &dq
.cacheKey
, dq
.subnet
, dq
.dnssecOK
, allowExpired
)) {
1223 dq
.len
= cachedResponseSize
;
1225 if (!prepareOutgoingResponse(holders
, cs
, dq
, true)) {
1226 return ProcessQueryResult::Drop
;
1229 return ProcessQueryResult::SendAnswer
;
1231 ++g_stats
.cacheMisses
;
1234 if(!selectedBackend
) {
1237 vinfolog("%s query for %s|%s from %s, no policy applied", g_servFailOnNoPolicy
? "ServFailed" : "Dropped", dq
.qname
->toLogString(), QType(dq
.qtype
).getName(), dq
.remote
->toStringWithPort());
1238 if (g_servFailOnNoPolicy
) {
1239 restoreFlags(dq
.dh
, dq
.origFlags
);
1241 dq
.dh
->rcode
= RCode::ServFail
;
1244 if (!prepareOutgoingResponse(holders
, cs
, dq
, false)) {
1245 return ProcessQueryResult::Drop
;
1247 // no response-only statistics counter to update.
1248 return ProcessQueryResult::SendAnswer
;
1251 return ProcessQueryResult::Drop
;
1254 if (dq
.addXPF
&& selectedBackend
->xpfRRCode
!= 0) {
1255 addXPF(dq
, selectedBackend
->xpfRRCode
, g_preserveTrailingData
);
1258 selectedBackend
->queries
++;
1259 return ProcessQueryResult::PassToBackend
;
1261 catch(const std::exception
& e
){
1262 vinfolog("Got an error while parsing a %s query from %s, id %d: %s", (dq
.tcp
? "TCP" : "UDP"), dq
.remote
->toStringWithPort(), queryId
, e
.what());
1264 return ProcessQueryResult::Drop
;
1267 static void processUDPQuery(ClientState
& cs
, LocalHolders
& holders
, const struct msghdr
* msgh
, const ComboAddress
& remote
, ComboAddress
& dest
, char* query
, uint16_t len
, size_t queryBufferSize
, struct mmsghdr
* responsesVect
, unsigned int* queuedResponses
, struct iovec
* respIOV
, cmsgbuf_aligned
* respCBuf
)
1269 assert(responsesVect
== nullptr || (queuedResponses
!= nullptr && respIOV
!= nullptr && respCBuf
!= nullptr));
1270 uint16_t queryId
= 0;
1273 if (!isUDPQueryAcceptable(cs
, holders
, msgh
, remote
, dest
)) {
1277 /* we need an accurate ("real") value for the response and
1278 to store into the IDS, but not for insertion into the
1279 rings for example */
1280 struct timespec queryRealTime
;
1281 gettime(&queryRealTime
, true);
1283 std::shared_ptr
<DNSCryptQuery
> dnsCryptQuery
= nullptr;
1284 auto dnsCryptResponse
= checkDNSCryptQuery(cs
, query
, len
, dnsCryptQuery
, queryRealTime
.tv_sec
, false);
1285 if (dnsCryptResponse
) {
1286 sendUDPResponse(cs
.udpFD
, reinterpret_cast<char*>(dnsCryptResponse
->data()), static_cast<uint16_t>(dnsCryptResponse
->size()), 0, dest
, remote
);
1290 struct dnsheader
* dh
= reinterpret_cast<struct dnsheader
*>(query
);
1291 queryId
= ntohs(dh
->id
);
1293 if (!checkQueryHeaders(dh
)) {
1297 uint16_t qtype
, qclass
;
1298 unsigned int consumed
= 0;
1299 DNSName
qname(query
, len
, sizeof(dnsheader
), false, &qtype
, &qclass
, &consumed
);
1300 DNSQuestion
dq(&qname
, qtype
, qclass
, consumed
, dest
.sin4
.sin_family
!= 0 ? &dest
: &cs
.local
, &remote
, dh
, queryBufferSize
, len
, false, &queryRealTime
);
1301 dq
.dnsCryptQuery
= std::move(dnsCryptQuery
);
1302 std::shared_ptr
<DownstreamState
> ss
{nullptr};
1303 auto result
= processQuery(dq
, cs
, holders
, ss
);
1305 if (result
== ProcessQueryResult::Drop
) {
1309 if (result
== ProcessQueryResult::SendAnswer
) {
1310 #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE)
1311 if (dq
.delayMsec
== 0 && responsesVect
!= nullptr) {
1312 queueResponse(cs
, reinterpret_cast<char*>(dq
.dh
), dq
.len
, *dq
.local
, *dq
.remote
, responsesVect
[*queuedResponses
], respIOV
, respCBuf
);
1313 (*queuedResponses
)++;
1316 #endif /* defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) */
1317 /* we use dest, always, because we don't want to use the listening address to send a response since it could be 0.0.0.0 */
1318 sendUDPResponse(cs
.udpFD
, reinterpret_cast<char*>(dq
.dh
), dq
.len
, dq
.delayMsec
, dest
, *dq
.remote
);
1322 if (result
!= ProcessQueryResult::PassToBackend
|| ss
== nullptr) {
1326 unsigned int idOffset
= (ss
->idOffset
++) % ss
->idStates
.size();
1327 IDState
* ids
= &ss
->idStates
[idOffset
];
1329 DOHUnit
* du
= nullptr;
1331 /* that means that the state was in use, possibly with an allocated
1332 DOHUnit that we will need to handle, but we can't touch it before
1333 confirming that we now own this state */
1334 if (ids
->isInUse()) {
1338 /* we atomically replace the value, we now own this state */
1339 if (!ids
->markAsUsed()) {
1340 /* the state was not in use.
1341 we reset 'du' because it might have still been in use when we read it. */
1346 /* we are reusing a state, no change in outstanding but if there was an existing DOHUnit we need
1347 to handle it because it's about to be overwritten. */
1350 ++g_stats
.downstreamTimeouts
;
1351 handleDOHTimeout(du
);
1355 ids
->origFD
= cs
.udpFD
;
1356 ids
->origID
= dh
->id
;
1357 setIDStateFromDNSQuestion(*ids
, dq
, std::move(qname
));
1359 /* If we couldn't harvest the real dest addr, still
1360 write down the listening addr since it will be useful
1361 (especially if it's not an 'any' one).
1362 We need to keep track of which one it is since we may
1363 want to use the real but not the listening addr to reply.
1365 if (dest
.sin4
.sin_family
!= 0) {
1366 ids
->origDest
= dest
;
1367 ids
->destHarvested
= true;
1370 ids
->origDest
= cs
.local
;
1371 ids
->destHarvested
= false;
1376 if (ss
->useProxyProtocol
) {
1377 addProxyProtocol(dq
);
1380 int fd
= pickBackendSocketForSending(ss
);
1381 ssize_t ret
= udpClientSendRequestToBackend(ss
, fd
, query
, dq
.len
);
1385 ++g_stats
.downstreamSendErrors
;
1388 vinfolog("Got query for %s|%s from %s, relayed to %s", ids
->qname
.toLogString(), QType(ids
->qtype
).getName(), remote
.toStringWithPort(), ss
->getName());
1390 catch(const std::exception
& e
){
1391 vinfolog("Got an error in UDP question thread while parsing a query from %s, id %d: %s", remote
.toStringWithPort(), queryId
, e
.what());
1395 #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE)
1396 static void MultipleMessagesUDPClientThread(ClientState
* cs
, LocalHolders
& holders
)
1400 char packet
[s_maxPacketCacheEntrySize
];
1401 ComboAddress remote
;
1404 /* used by HarvestDestinationAddress */
1405 cmsgbuf_aligned cbuf
;
1407 const size_t vectSize
= g_udpVectorSize
;
1408 /* the actual buffer is larger because:
1409 - we may have to add EDNS and/or ECS
1410 - we use it for self-generated responses (from rule or cache)
1411 but we only accept incoming payloads up to that size
1413 static_assert(s_udpIncomingBufferSize
<= sizeof(MMReceiver::packet
), "the incoming buffer size should not be larger than sizeof(MMReceiver::packet)");
1415 auto recvData
= std::unique_ptr
<MMReceiver
[]>(new MMReceiver
[vectSize
]);
1416 auto msgVec
= std::unique_ptr
<struct mmsghdr
[]>(new struct mmsghdr
[vectSize
]);
1417 auto outMsgVec
= std::unique_ptr
<struct mmsghdr
[]>(new struct mmsghdr
[vectSize
]);
1419 /* initialize the structures needed to receive our messages */
1420 for (size_t idx
= 0; idx
< vectSize
; idx
++) {
1421 recvData
[idx
].remote
.sin4
.sin_family
= cs
->local
.sin4
.sin_family
;
1422 fillMSGHdr(&msgVec
[idx
].msg_hdr
, &recvData
[idx
].iov
, &recvData
[idx
].cbuf
, sizeof(recvData
[idx
].cbuf
), recvData
[idx
].packet
, cs
->dnscryptCtx
? sizeof(recvData
[idx
].packet
) : s_udpIncomingBufferSize
, &recvData
[idx
].remote
);
1428 /* reset the IO vector, since it's also used to send the vector of responses
1429 to avoid having to copy the data around */
1430 for (size_t idx
= 0; idx
< vectSize
; idx
++) {
1431 recvData
[idx
].iov
.iov_base
= recvData
[idx
].packet
;
1432 recvData
[idx
].iov
.iov_len
= sizeof(recvData
[idx
].packet
);
1435 /* block until we have at least one message ready, but return
1436 as many as possible to save the syscall costs */
1437 int msgsGot
= recvmmsg(cs
->udpFD
, msgVec
.get(), vectSize
, MSG_WAITFORONE
| MSG_TRUNC
, nullptr);
1440 vinfolog("Getting UDP messages via recvmmsg() failed with: %s", stringerror());
1444 unsigned int msgsToSend
= 0;
1446 /* process the received messages */
1447 for (int msgIdx
= 0; msgIdx
< msgsGot
; msgIdx
++) {
1448 const struct msghdr
* msgh
= &msgVec
[msgIdx
].msg_hdr
;
1449 unsigned int got
= msgVec
[msgIdx
].msg_len
;
1450 const ComboAddress
& remote
= recvData
[msgIdx
].remote
;
1452 if (static_cast<size_t>(got
) < sizeof(struct dnsheader
)) {
1453 ++g_stats
.nonCompliantQueries
;
1457 processUDPQuery(*cs
, holders
, msgh
, remote
, recvData
[msgIdx
].dest
, recvData
[msgIdx
].packet
, static_cast<uint16_t>(got
), sizeof(recvData
[msgIdx
].packet
), outMsgVec
.get(), &msgsToSend
, &recvData
[msgIdx
].iov
, &recvData
[msgIdx
].cbuf
);
1461 /* immediate (not delayed or sent to a backend) responses (mostly from a rule, dynamic block
1462 or the cache) can be sent in batch too */
1464 if (msgsToSend
> 0 && msgsToSend
<= static_cast<unsigned int>(msgsGot
)) {
1465 int sent
= sendmmsg(cs
->udpFD
, outMsgVec
.get(), msgsToSend
, 0);
1467 if (sent
< 0 || static_cast<unsigned int>(sent
) != msgsToSend
) {
1468 vinfolog("Error sending responses with sendmmsg() (%d on %u): %s", sent
, msgsToSend
, stringerror());
1474 #endif /* defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) */
1476 // listens to incoming queries, sends out to downstream servers, noting the intended return path
1477 static void udpClientThread(ClientState
* cs
)
1480 setThreadName("dnsdist/udpClie");
1481 LocalHolders holders
;
1483 #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE)
1484 if (g_udpVectorSize
> 1) {
1485 MultipleMessagesUDPClientThread(cs
, holders
);
1489 #endif /* defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) */
1491 char packet
[s_maxPacketCacheEntrySize
];
1492 /* the actual buffer is larger because:
1493 - we may have to add EDNS and/or ECS
1494 - we use it for self-generated responses (from rule or cache)
1495 but we only accept incoming payloads up to that size
1497 static_assert(s_udpIncomingBufferSize
<= sizeof(packet
), "the incoming buffer size should not be larger than sizeof(MMReceiver::packet)");
1500 /* used by HarvestDestinationAddress */
1501 cmsgbuf_aligned cbuf
;
1503 ComboAddress remote
;
1505 remote
.sin4
.sin_family
= cs
->local
.sin4
.sin_family
;
1506 fillMSGHdr(&msgh
, &iov
, &cbuf
, sizeof(cbuf
), packet
, cs
->dnscryptCtx
? sizeof(packet
) : s_udpIncomingBufferSize
, &remote
);
1509 ssize_t got
= recvmsg(cs
->udpFD
, &msgh
, 0);
1511 if (got
< 0 || static_cast<size_t>(got
) < sizeof(struct dnsheader
)) {
1512 ++g_stats
.nonCompliantQueries
;
1516 processUDPQuery(*cs
, holders
, &msgh
, remote
, dest
, packet
, static_cast<uint16_t>(got
), sizeof(packet
), nullptr, nullptr, nullptr, nullptr);
1520 catch(const std::exception
&e
)
1522 errlog("UDP client thread died because of exception: %s", e
.what());
1524 catch(const PDNSException
&e
)
1526 errlog("UDP client thread died because of PowerDNS exception: %s", e
.reason
);
1530 errlog("UDP client thread died because of an exception: %s", "unknown");
1533 uint16_t getRandomDNSID()
1535 #ifdef HAVE_LIBSODIUM
1536 return randombytes_uniform(65536);
1538 return (random() % 65536);
1542 uint64_t g_maxTCPClientThreads
{10};
1543 std::atomic
<uint16_t> g_cacheCleaningDelay
{60};
1544 std::atomic
<uint16_t> g_cacheCleaningPercentage
{100};
1546 static void maintThread()
1548 setThreadName("dnsdist/main");
1551 int32_t secondsToWaitLog
= 0;
1557 std::lock_guard
<std::mutex
> lock(g_luamutex
);
1558 auto f
= g_lua
.readVariable
<boost::optional
<std::function
<void()> > >("maintenance");
1562 secondsToWaitLog
= 0;
1564 catch(std::exception
&e
) {
1565 if (secondsToWaitLog
<= 0) {
1566 infolog("Error during execution of maintenance function: %s", e
.what());
1567 secondsToWaitLog
= 61;
1569 secondsToWaitLog
-= interval
;
1575 if (counter
>= g_cacheCleaningDelay
) {
1576 /* keep track, for each cache, of whether we should keep
1578 std::map
<std::shared_ptr
<DNSDistPacketCache
>, bool> caches
;
1580 /* gather all caches actually used by at least one pool, and see
1581 if something prevents us from cleaning the expired entries */
1582 auto localPools
= g_pools
.getLocal();
1583 for (const auto& entry
: *localPools
) {
1584 auto& pool
= entry
.second
;
1586 auto packetCache
= pool
->packetCache
;
1591 auto pair
= caches
.insert({packetCache
, false});
1592 auto& iter
= pair
.first
;
1593 /* if we need to keep stale data for this cache (ie, not clear
1594 expired entries when at least one pool using this cache
1595 has all its backends down) */
1596 if (packetCache
->keepStaleData() && iter
->second
== false) {
1597 /* so far all pools had at least one backend up */
1598 if (pool
->countServers(true) == 0) {
1599 iter
->second
= true;
1604 for (auto pair
: caches
) {
1605 /* shall we keep expired entries ? */
1606 if (pair
.second
== true) {
1609 auto& packetCache
= pair
.first
;
1610 size_t upTo
= (packetCache
->getMaxEntries()* (100 - g_cacheCleaningPercentage
)) / 100;
1611 packetCache
->purgeExpired(upTo
);
1616 // ponder pruning g_dynblocks of expired entries here
1620 static void secPollThread()
1622 setThreadName("dnsdist/secpoll");
1626 doSecPoll(g_secPollSuffix
);
1630 sleep(g_secPollInterval
);
1634 static void healthChecksThread()
1636 setThreadName("dnsdist/healthC");
1638 static const int interval
= 1;
1643 if(g_tcpclientthreads
->getQueuedCount() > 1 && !g_tcpclientthreads
->hasReachedMaxThreads()) {
1644 g_tcpclientthreads
->addTCPClientThread();
1647 auto mplexer
= std::shared_ptr
<FDMultiplexer
>(FDMultiplexer::getMultiplexerSilent());
1648 auto states
= g_dstates
.getLocal(); // this points to the actual shared_ptrs!
1649 for(auto& dss
: *states
) {
1650 if(++dss
->lastCheck
< dss
->checkInterval
) {
1656 if (dss
->availability
== DownstreamState::Availability::Auto
) {
1657 if (!queueHealthCheck(mplexer
, dss
)) {
1658 updateHealthCheckResult(dss
, false);
1662 auto delta
= dss
->sw
.udiffAndSet()/1000000.0;
1663 dss
->queryLoad
= 1.0*(dss
->queries
.load() - dss
->prev
.queries
.load())/delta
;
1664 dss
->dropRate
= 1.0*(dss
->reuseds
.load() - dss
->prev
.reuseds
.load())/delta
;
1665 dss
->prev
.queries
.store(dss
->queries
.load());
1666 dss
->prev
.reuseds
.store(dss
->reuseds
.load());
1668 for (IDState
& ids
: dss
->idStates
) { // timeouts
1669 int64_t usageIndicator
= ids
.usageIndicator
;
1670 if(IDState::isInUse(usageIndicator
) && ids
.age
++ > g_udpTimeout
) {
1671 /* We mark the state as unused as soon as possible
1672 to limit the risk of racing with the
1675 auto oldDU
= ids
.du
;
1677 if (!ids
.tryMarkUnused(usageIndicator
)) {
1678 /* this state has been altered in the meantime,
1679 don't go anywhere near it */
1683 handleDOHTimeout(oldDU
);
1687 ++g_stats
.downstreamTimeouts
; // this is an 'actively' discovered timeout
1688 vinfolog("Had a downstream timeout from %s (%s) for query for %s|%s from %s",
1689 dss
->remote
.toStringWithPort(), dss
->getName(),
1690 ids
.qname
.toLogString(), QType(ids
.qtype
).getName(), ids
.origRemote
.toStringWithPort());
1695 struct dnsheader fake
;
1696 memset(&fake
, 0, sizeof(fake
));
1697 fake
.id
= ids
.origID
;
1699 g_rings
.insertResponse(ts
, ids
.origRemote
, ids
.qname
, ids
.qtype
, std::numeric_limits
<unsigned int>::max(), 0, fake
, dss
->remote
);
1704 handleQueuedHealthChecks(mplexer
);
1708 static void bindAny(int af
, int sock
)
1710 __attribute__((unused
)) int one
= 1;
1713 if (setsockopt(sock
, IPPROTO_IP
, IP_FREEBIND
, &one
, sizeof(one
)) < 0)
1714 warnlog("Warning: IP_FREEBIND setsockopt failed: %s", stringerror());
1719 if (setsockopt(sock
, IPPROTO_IP
, IP_BINDANY
, &one
, sizeof(one
)) < 0)
1720 warnlog("Warning: IP_BINDANY setsockopt failed: %s", stringerror());
1724 if (setsockopt(sock
, IPPROTO_IPV6
, IPV6_BINDANY
, &one
, sizeof(one
)) < 0)
1725 warnlog("Warning: IPV6_BINDANY setsockopt failed: %s", stringerror());
1728 if (setsockopt(sock
, SOL_SOCKET
, SO_BINDANY
, &one
, sizeof(one
)) < 0)
1729 warnlog("Warning: SO_BINDANY setsockopt failed: %s", stringerror());
1733 static void dropGroupPrivs(gid_t gid
)
1736 if (setgid(gid
) == 0) {
1737 if (setgroups(0, NULL
) < 0) {
1738 warnlog("Warning: Unable to drop supplementary gids: %s", stringerror());
1742 warnlog("Warning: Unable to set group ID to %d: %s", gid
, stringerror());
1747 static void dropUserPrivs(uid_t uid
)
1750 if(setuid(uid
) < 0) {
1751 warnlog("Warning: Unable to set user ID to %d: %s", uid
, stringerror());
1756 static void checkFileDescriptorsLimits(size_t udpBindsCount
, size_t tcpBindsCount
)
1758 /* stdin, stdout, stderr */
1759 size_t requiredFDsCount
= 3;
1760 auto backends
= g_dstates
.getLocal();
1761 /* UDP sockets to backends */
1762 size_t backendUDPSocketsCount
= 0;
1763 for (const auto& backend
: *backends
) {
1764 backendUDPSocketsCount
+= backend
->sockets
.size();
1766 requiredFDsCount
+= backendUDPSocketsCount
;
1767 /* TCP sockets to backends */
1768 requiredFDsCount
+= (backends
->size() * g_maxTCPClientThreads
);
1769 /* listening sockets */
1770 requiredFDsCount
+= udpBindsCount
;
1771 requiredFDsCount
+= tcpBindsCount
;
1772 /* max TCP connections currently served */
1773 requiredFDsCount
+= g_maxTCPClientThreads
;
1774 /* max pipes for communicating between TCP acceptors and client threads */
1775 requiredFDsCount
+= (g_maxTCPClientThreads
* 2);
1776 /* max TCP queued connections */
1777 requiredFDsCount
+= g_maxTCPQueuedConnections
;
1778 /* DelayPipe pipe */
1779 requiredFDsCount
+= 2;
1782 /* webserver main socket */
1784 /* console main socket */
1791 getrlimit(RLIMIT_NOFILE
, &rl
);
1792 if (rl
.rlim_cur
<= requiredFDsCount
) {
1793 warnlog("Warning, this configuration can use more than %d file descriptors, web server and console connections not included, and the current limit is %d.", std::to_string(requiredFDsCount
), std::to_string(rl
.rlim_cur
));
1795 warnlog("You can increase this value by using LimitNOFILE= in the systemd unit file or ulimit.");
1797 warnlog("You can increase this value by using ulimit.");
1802 static void setUpLocalBind(std::unique_ptr
<ClientState
>& cs
)
1804 /* skip some warnings if there is an identical UDP context */
1805 bool warn
= cs
->tcp
== false || cs
->tlsFrontend
!= nullptr || cs
->dohFrontend
!= nullptr;
1806 int& fd
= cs
->tcp
== false ? cs
->udpFD
: cs
->tcpFD
;
1809 fd
= SSocket(cs
->local
.sin4
.sin_family
, cs
->tcp
== false ? SOCK_DGRAM
: SOCK_STREAM
, 0);
1812 SSetsockopt(fd
, SOL_SOCKET
, SO_REUSEADDR
, 1);
1813 #ifdef TCP_DEFER_ACCEPT
1814 SSetsockopt(fd
, IPPROTO_TCP
, TCP_DEFER_ACCEPT
, 1);
1816 if (cs
->fastOpenQueueSize
> 0) {
1818 SSetsockopt(fd
, IPPROTO_TCP
, TCP_FASTOPEN
, cs
->fastOpenQueueSize
);
1821 warnlog("TCP Fast Open has been configured on local address '%s' but is not supported", cs
->local
.toStringWithPort());
1827 if(cs
->local
.sin4
.sin_family
== AF_INET6
) {
1828 SSetsockopt(fd
, IPPROTO_IPV6
, IPV6_V6ONLY
, 1);
1831 bindAny(cs
->local
.sin4
.sin_family
, fd
);
1833 if(!cs
->tcp
&& IsAnyAddress(cs
->local
)) {
1835 setsockopt(fd
, IPPROTO_IP
, GEN_IP_PKTINFO
, &one
, sizeof(one
)); // linux supports this, so why not - might fail on other systems
1836 #ifdef IPV6_RECVPKTINFO
1837 setsockopt(fd
, IPPROTO_IPV6
, IPV6_RECVPKTINFO
, &one
, sizeof(one
));
1841 if (cs
->reuseport
) {
1843 SSetsockopt(fd
, SOL_SOCKET
, SO_REUSEPORT
, 1);
1846 /* no need to warn again if configured but support is not available, we already did for UDP */
1847 warnlog("SO_REUSEPORT has been configured on local address '%s' but is not supported", cs
->local
.toStringWithPort());
1852 /* Only set this on IPv4 UDP sockets.
1853 Don't set it for DNSCrypt binds. DNSCrypt pads queries for privacy
1854 purposes, so we do receive large, sometimes fragmented datagrams. */
1855 if (!cs
->tcp
&& cs
->local
.isIPv4() && !cs
->dnscryptCtx
) {
1857 setSocketIgnorePMTU(cs
->udpFD
);
1859 catch(const std::exception
& e
) {
1860 warnlog("Failed to set IP_MTU_DISCOVER on UDP server socket for local address '%s': %s", cs
->local
.toStringWithPort(), e
.what());
1864 const std::string
& itf
= cs
->interface
;
1866 #ifdef SO_BINDTODEVICE
1867 int res
= setsockopt(fd
, SOL_SOCKET
, SO_BINDTODEVICE
, itf
.c_str(), itf
.length());
1869 warnlog("Error setting up the interface on local address '%s': %s", cs
->local
.toStringWithPort(), stringerror());
1873 warnlog("An interface has been configured on local address '%s' but SO_BINDTODEVICE is not supported", cs
->local
.toStringWithPort());
1879 if (g_defaultBPFFilter
) {
1880 cs
->attachFilter(g_defaultBPFFilter
);
1881 vinfolog("Attaching default BPF Filter to %s frontend %s", (!cs
->tcp
? "UDP" : "TCP"), cs
->local
.toStringWithPort());
1883 #endif /* HAVE_EBPF */
1885 if (cs
->tlsFrontend
!= nullptr) {
1886 if (!cs
->tlsFrontend
->setupTLS()) {
1887 errlog("Error while setting up TLS on local address '%s', exiting", cs
->local
.toStringWithPort());
1888 _exit(EXIT_FAILURE
);
1892 if (cs
->dohFrontend
!= nullptr) {
1893 cs
->dohFrontend
->setup();
1896 SBind(fd
, cs
->local
);
1899 SListen(cs
->tcpFD
, cs
->tcpListenQueueSize
);
1901 if (cs
->tlsFrontend
!= nullptr) {
1902 warnlog("Listening on %s for TLS", cs
->local
.toStringWithPort());
1904 else if (cs
->dohFrontend
!= nullptr) {
1905 warnlog("Listening on %s for DoH", cs
->local
.toStringWithPort());
1907 else if (cs
->dnscryptCtx
!= nullptr) {
1908 warnlog("Listening on %s for DNSCrypt", cs
->local
.toStringWithPort());
1911 warnlog("Listening on %s", cs
->local
.toStringWithPort());
1920 vector
<string
> locals
;
1921 vector
<string
> remotes
;
1922 bool checkConfig
{false};
1923 bool beClient
{false};
1924 bool beSupervised
{false};
1931 std::atomic
<bool> g_configurationDone
{false};
1936 cout
<<"Syntax: dnsdist [-C,--config file] [-c,--client [IP[:PORT]]]\n";
1937 cout
<<"[-e,--execute cmd] [-h,--help] [-l,--local addr]\n";
1938 cout
<<"[-v,--verbose] [--check-config] [--version]\n";
1940 cout
<<"-a,--acl netmask Add this netmask to the ACL\n";
1941 cout
<<"-C,--config file Load configuration from 'file'\n";
1942 cout
<<"-c,--client Operate as a client, connect to dnsdist. This reads\n";
1943 cout
<<" controlSocket from your configuration file, but also\n";
1944 cout
<<" accepts an IP:PORT argument\n";
1945 #ifdef HAVE_LIBSODIUM
1946 cout
<<"-k,--setkey KEY Use KEY for encrypted communication to dnsdist. This\n";
1947 cout
<<" is similar to setting setKey in the configuration file.\n";
1948 cout
<<" NOTE: this will leak this key in your shell's history\n";
1949 cout
<<" and in the systems running process list.\n";
1951 cout
<<"--check-config Validate the configuration file and exit. The exit-code\n";
1952 cout
<<" reflects the validation, 0 is OK, 1 means an error.\n";
1953 cout
<<" Any errors are printed as well.\n";
1954 cout
<<"-e,--execute cmd Connect to dnsdist and execute 'cmd'\n";
1955 cout
<<"-g,--gid gid Change the process group ID after binding sockets\n";
1956 cout
<<"-h,--help Display this helpful message\n";
1957 cout
<<"-l,--local address Listen on this local address\n";
1958 cout
<<"--supervised Don't open a console, I'm supervised\n";
1959 cout
<<" (use with e.g. systemd and daemontools)\n";
1960 cout
<<"--disable-syslog Don't log to syslog, only to stdout\n";
1961 cout
<<" (use with e.g. systemd)\n";
1962 cout
<<"-u,--uid uid Change the process user ID after binding sockets\n";
1963 cout
<<"-v,--verbose Enable verbose mode\n";
1964 cout
<<"-V,--version Show dnsdist version information and exit\n";
1967 int main(int argc
, char** argv
)
1970 size_t udpBindsCount
= 0;
1971 size_t tcpBindsCount
= 0;
1972 rl_attempted_completion_function
= my_completion
;
1973 rl_completion_append_character
= 0;
1975 signal(SIGPIPE
, SIG_IGN
);
1976 signal(SIGCHLD
, SIG_IGN
);
1977 openlog("dnsdist", LOG_PID
|LOG_NDELAY
, LOG_DAEMON
);
1979 #ifdef HAVE_LIBSODIUM
1980 if (sodium_init() == -1) {
1981 cerr
<<"Unable to initialize crypto library"<<endl
;
1984 g_hashperturb
=randombytes_uniform(0xffffffff);
1985 srandom(randombytes_uniform(0xffffffff));
1989 gettimeofday(&tv
, 0);
1990 srandom(tv
.tv_sec
^ tv
.tv_usec
^ getpid());
1991 g_hashperturb
=random();
1995 ComboAddress clientAddress
= ComboAddress();
1996 g_cmdLine
.config
=SYSCONFDIR
"/dnsdist.conf";
1997 struct option longopts
[]={
1998 {"acl", required_argument
, 0, 'a'},
1999 {"check-config", no_argument
, 0, 1},
2000 {"client", no_argument
, 0, 'c'},
2001 {"config", required_argument
, 0, 'C'},
2002 {"disable-syslog", no_argument
, 0, 2},
2003 {"execute", required_argument
, 0, 'e'},
2004 {"gid", required_argument
, 0, 'g'},
2005 {"help", no_argument
, 0, 'h'},
2006 {"local", required_argument
, 0, 'l'},
2007 {"setkey", required_argument
, 0, 'k'},
2008 {"supervised", no_argument
, 0, 3},
2009 {"uid", required_argument
, 0, 'u'},
2010 {"verbose", no_argument
, 0, 'v'},
2011 {"version", no_argument
, 0, 'V'},
2017 int c
=getopt_long(argc
, argv
, "a:cC:e:g:hk:l:u:vV", longopts
, &longindex
);
2022 g_cmdLine
.checkConfig
=true;
2028 g_cmdLine
.beSupervised
=true;
2031 g_cmdLine
.config
=optarg
;
2034 g_cmdLine
.beClient
=true;
2037 g_cmdLine
.command
=optarg
;
2040 g_cmdLine
.gid
=optarg
;
2043 cout
<<"dnsdist "<<VERSION
<<endl
;
2050 g_ACL
.modify([optstring
](NetmaskGroup
& nmg
) { nmg
.addMask(optstring
); });
2053 #ifdef HAVE_LIBSODIUM
2054 if (B64Decode(string(optarg
), g_consoleKey
) < 0) {
2055 cerr
<<"Unable to decode key '"<<optarg
<<"'."<<endl
;
2059 cerr
<<"dnsdist has been built without libsodium, -k/--setkey is unsupported."<<endl
;
2064 g_cmdLine
.locals
.push_back(trim_copy(string(optarg
)));
2067 g_cmdLine
.uid
=optarg
;
2073 #ifdef LUAJIT_VERSION
2074 cout
<<"dnsdist "<<VERSION
<<" ("<<LUA_RELEASE
<<" ["<<LUAJIT_VERSION
<<"])"<<endl
;
2076 cout
<<"dnsdist "<<VERSION
<<" ("<<LUA_RELEASE
<<")"<<endl
;
2078 cout
<<"Enabled features: ";
2082 #ifdef HAVE_DNS_OVER_TLS
2083 cout
<<"dns-over-tls(";
2095 #ifdef HAVE_DNS_OVER_HTTPS
2096 cout
<<"dns-over-https(DOH) ";
2098 #ifdef HAVE_DNSCRYPT
2107 #ifdef HAVE_LIBCRYPTO
2110 #ifdef HAVE_LIBSODIUM
2116 #ifdef HAVE_PROTOBUF
2122 #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE)
2123 cout
<<"recvmmsg/sendmmsg ";
2125 #ifdef HAVE_NET_SNMP
2135 //getopt_long printed an error message.
2144 for(auto p
= argv
; *p
; ++p
) {
2145 if(g_cmdLine
.beClient
) {
2146 clientAddress
= ComboAddress(*p
, 5199);
2148 g_cmdLine
.remotes
.push_back(*p
);
2152 ServerPolicy leastOutstandingPol
{"leastOutstanding", leastOutstanding
, false};
2154 g_policy
.setState(leastOutstandingPol
);
2155 if(g_cmdLine
.beClient
|| !g_cmdLine
.command
.empty()) {
2156 setupLua(true, false, g_cmdLine
.config
);
2157 if (clientAddress
!= ComboAddress())
2158 g_serverControl
= clientAddress
;
2159 doClient(g_serverControl
, g_cmdLine
.command
);
2160 _exit(EXIT_SUCCESS
);
2163 auto acl
= g_ACL
.getCopy();
2165 for(auto& addr
: {"127.0.0.0/8", "10.0.0.0/8", "100.64.0.0/10", "169.254.0.0/16", "192.168.0.0/16", "172.16.0.0/12", "::1/128", "fc00::/7", "fe80::/10"})
2167 g_ACL
.setState(acl
);
2170 auto consoleACL
= g_consoleACL
.getCopy();
2171 for (const auto& mask
: { "127.0.0.1/8", "::1/128" }) {
2172 consoleACL
.addMask(mask
);
2174 g_consoleACL
.setState(consoleACL
);
2176 if (g_cmdLine
.checkConfig
) {
2177 setupLua(false, true, g_cmdLine
.config
);
2178 // No exception was thrown
2179 infolog("Configuration '%s' OK!", g_cmdLine
.config
);
2180 _exit(EXIT_SUCCESS
);
2183 auto todo
=setupLua(false, false, g_cmdLine
.config
);
2185 auto localPools
= g_pools
.getCopy();
2187 bool precompute
= false;
2188 if (g_policy
.getLocal()->name
== "chashed") {
2191 for (const auto& entry
: localPools
) {
2192 if (entry
.second
->policy
!= nullptr && entry
.second
->policy
->name
== "chashed") {
2199 vinfolog("Pre-computing hashes for consistent hash load-balancing policy");
2200 // pre compute hashes
2201 auto backends
= g_dstates
.getLocal();
2202 for (auto& backend
: *backends
) {
2203 if (backend
->weight
< 100) {
2204 vinfolog("Warning, the backend '%s' has a very low weight (%d), which will not yield a good distribution of queries with the 'chashed' policy. Please consider raising it to at least '100'.", backend
->getName(), backend
->weight
);
2212 if (!g_cmdLine
.locals
.empty()) {
2213 for (auto it
= g_frontends
.begin(); it
!= g_frontends
.end(); ) {
2214 /* DoH, DoT and DNSCrypt frontends are separate */
2215 if ((*it
)->dohFrontend
== nullptr && (*it
)->tlsFrontend
== nullptr && (*it
)->dnscryptCtx
== nullptr) {
2216 it
= g_frontends
.erase(it
);
2223 for(const auto& loc
: g_cmdLine
.locals
) {
2225 g_frontends
.push_back(std::unique_ptr
<ClientState
>(new ClientState(ComboAddress(loc
, 53), false, false, 0, "", {})));
2227 g_frontends
.push_back(std::unique_ptr
<ClientState
>(new ClientState(ComboAddress(loc
, 53), true, false, 0, "", {})));
2231 if (g_frontends
.empty()) {
2233 g_frontends
.push_back(std::unique_ptr
<ClientState
>(new ClientState(ComboAddress("127.0.0.1", 53), false, false, 0, "", {})));
2235 g_frontends
.push_back(std::unique_ptr
<ClientState
>(new ClientState(ComboAddress("127.0.0.1", 53), true, false, 0, "", {})));
2238 g_configurationDone
= true;
2240 for(auto& frontend
: g_frontends
) {
2241 setUpLocalBind(frontend
);
2243 if (frontend
->tcp
== false) {
2251 warnlog("dnsdist %s comes with ABSOLUTELY NO WARRANTY. This is free software, and you are welcome to redistribute it according to the terms of the GPL version 2", VERSION
);
2255 g_ACL
.getLocal()->toStringVector(&vec
);
2256 for(const auto& s
: vec
) {
2261 infolog("ACL allowing queries from: %s", acls
.c_str());
2264 g_consoleACL
.getLocal()->toStringVector(&vec
);
2265 for (const auto& entry
: vec
) {
2266 if (!acls
.empty()) {
2271 infolog("Console ACL allowing connections from: %s", acls
.c_str());
2273 #ifdef HAVE_LIBSODIUM
2274 if (g_consoleEnabled
&& g_consoleKey
.empty()) {
2275 warnlog("Warning, the console has been enabled via 'controlSocket()' but no key has been set with 'setKey()' so all connections will fail until a key has been set");
2279 uid_t newgid
=getegid();
2280 gid_t newuid
=geteuid();
2282 if(!g_cmdLine
.gid
.empty())
2283 newgid
= strToGID(g_cmdLine
.gid
.c_str());
2285 if(!g_cmdLine
.uid
.empty())
2286 newuid
= strToUID(g_cmdLine
.uid
.c_str());
2288 if (getegid() != newgid
) {
2289 if (running_in_service_mgr()) {
2290 errlog("--gid/-g set on command-line, but dnsdist was started as a systemd service. Use the 'Group' setting in the systemd unit file to set the group to run as");
2291 _exit(EXIT_FAILURE
);
2293 dropGroupPrivs(newgid
);
2296 if (geteuid() != newuid
) {
2297 if (running_in_service_mgr()) {
2298 errlog("--uid/-u set on command-line, but dnsdist was started as a systemd service. Use the 'User' setting in the systemd unit file to set the user to run as");
2299 _exit(EXIT_FAILURE
);
2301 dropUserPrivs(newuid
);
2305 /* we might still have capabilities remaining,
2306 for example if we have been started as root
2307 without --uid or --gid (please don't do that)
2308 or as an unprivileged user with ambient
2309 capabilities like CAP_NET_BIND_SERVICE.
2311 dropCapabilities(g_capabilitiesToRetain
);
2313 catch(const std::exception
& e
) {
2314 warnlog("%s", e
.what());
2317 /* this need to be done _after_ dropping privileges */
2318 g_delay
= new DelayPipe
<DelayedPacket
>();
2324 g_tcpclientthreads
= std::unique_ptr
<TCPClientCollection
>(new TCPClientCollection(g_maxTCPClientThreads
, g_useTCPSinglePipe
));
2329 localPools
= g_pools
.getCopy();
2330 /* create the default pool no matter what */
2331 createPoolIfNotExists(localPools
, "");
2332 if(g_cmdLine
.remotes
.size()) {
2333 for(const auto& address
: g_cmdLine
.remotes
) {
2334 auto ret
=std::make_shared
<DownstreamState
>(ComboAddress(address
, 53));
2335 addServerToPool(localPools
, "", ret
);
2336 if (ret
->connected
&& !ret
->threadStarted
.test_and_set()) {
2337 ret
->tid
= thread(responderThread
, ret
);
2339 g_dstates
.modify([ret
](servers_t
& servers
) { servers
.push_back(ret
); });
2342 g_pools
.setState(localPools
);
2344 if(g_dstates
.getLocal()->empty()) {
2345 errlog("No downstream servers defined: all packets will get dropped");
2346 // you might define them later, but you need to know
2349 checkFileDescriptorsLimits(udpBindsCount
, tcpBindsCount
);
2351 auto mplexer
= std::shared_ptr
<FDMultiplexer
>(FDMultiplexer::getMultiplexerSilent());
2352 for(auto& dss
: g_dstates
.getCopy()) { // it is a copy, but the internal shared_ptrs are the real deal
2353 if (dss
->availability
== DownstreamState::Availability::Auto
) {
2354 if (!queueHealthCheck(mplexer
, dss
, true)) {
2355 dss
->upStatus
= false;
2356 warnlog("Marking downstream %s as 'down'", dss
->getNameWithAddr());
2360 handleQueuedHealthChecks(mplexer
, true);
2362 for(auto& cs
: g_frontends
) {
2363 if (cs
->dohFrontend
!= nullptr) {
2364 #ifdef HAVE_DNS_OVER_HTTPS
2365 std::thread
t1(dohThread
, cs
.get());
2366 if (!cs
->cpus
.empty()) {
2367 mapThreadToCPUList(t1
.native_handle(), cs
->cpus
);
2370 #endif /* HAVE_DNS_OVER_HTTPS */
2373 if (cs
->udpFD
>= 0) {
2374 thread
t1(udpClientThread
, cs
.get());
2375 if (!cs
->cpus
.empty()) {
2376 mapThreadToCPUList(t1
.native_handle(), cs
->cpus
);
2380 else if (cs
->tcpFD
>= 0) {
2381 thread
t1(tcpAcceptorThread
, cs
.get());
2382 if (!cs
->cpus
.empty()) {
2383 mapThreadToCPUList(t1
.native_handle(), cs
->cpus
);
2389 thread
carbonthread(carbonDumpThread
);
2390 carbonthread
.detach();
2392 thread
stattid(maintThread
);
2395 thread
healththread(healthChecksThread
);
2397 if (!g_secPollSuffix
.empty()) {
2398 thread
secpollthread(secPollThread
);
2399 secpollthread
.detach();
2402 if(g_cmdLine
.beSupervised
) {
2404 sd_notify(0, "READY=1");
2406 healththread
.join();
2409 healththread
.detach();
2412 _exit(EXIT_SUCCESS
);
2415 catch(const LuaContext::ExecutionErrorException
& e
) {
2417 errlog("Fatal Lua error: %s", e
.what());
2418 std::rethrow_if_nested(e
);
2419 } catch(const std::exception
& ne
) {
2420 errlog("Details: %s", ne
.what());
2422 catch(PDNSException
&ae
)
2424 errlog("Fatal pdns error: %s", ae
.reason
);
2426 _exit(EXIT_FAILURE
);
2428 catch(std::exception
&e
)
2430 errlog("Fatal error: %s", e
.what());
2431 _exit(EXIT_FAILURE
);
2433 catch(PDNSException
&ae
)
2435 errlog("Fatal pdns error: %s", ae
.reason
);
2436 _exit(EXIT_FAILURE
);
2439 uint64_t getLatencyCount(const std::string
&)
2441 return g_stats
.responses
+ g_stats
.selfAnswered
+ g_stats
.cacheHits
;