]>
Commit | Line | Data |
---|---|---|
24d5cb00 | 1 | /* |
12471842 PL |
2 | * This file is part of PowerDNS or dnsdist. |
3 | * Copyright -- PowerDNS.COM B.V. and its contributors | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify | |
6 | * it under the terms of version 2 of the GNU General Public License as | |
7 | * published by the Free Software Foundation. | |
8 | * | |
9 | * In addition, for the avoidance of any doubt, permission is granted to | |
10 | * link this program with OpenSSL and to (re)distribute the binaries | |
11 | * produced as the result of such linking. | |
12 | * | |
13 | * This program is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | * GNU General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU General Public License | |
19 | * along with this program; if not, write to the Free Software | |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |
21 | */ | |
5cc8371b RG |
22 | |
23 | #include "config.h" | |
24 | ||
25 | #include <fstream> | |
26 | #include <getopt.h> | |
27 | #include <grp.h> | |
2c5db3f7 | 28 | #include <limits> |
5cc8371b RG |
29 | #include <netinet/tcp.h> |
30 | #include <pwd.h> | |
31 | #include <sys/resource.h> | |
32 | #include <unistd.h> | |
424bdfb1 | 33 | |
4d39d7f3 | 34 | #if defined (__OpenBSD__) || defined(__NetBSD__) |
424bdfb1 PD |
35 | #include <readline/readline.h> |
36 | #else | |
37 | #include <editline/readline.h> | |
38 | #endif | |
39 | ||
6ab65223 PL |
40 | #ifdef HAVE_SYSTEMD |
41 | #include <systemd/sd-daemon.h> | |
42 | #endif | |
43 | ||
5cc8371b RG |
44 | #include "dnsdist.hh" |
45 | #include "dnsdist-cache.hh" | |
b5521206 | 46 | #include "dnsdist-console.hh" |
5cc8371b | 47 | #include "dnsdist-ecs.hh" |
dd9c8246 | 48 | #include "dnsdist-healthchecks.hh" |
5cc8371b | 49 | #include "dnsdist-lua.hh" |
03b00917 | 50 | #include "dnsdist-rings.hh" |
5d4e1ef8 | 51 | #include "dnsdist-secpoll.hh" |
53c57da7 | 52 | #include "dnsdist-xpf.hh" |
5cc8371b RG |
53 | |
54 | #include "base64.hh" | |
55 | #include "delaypipe.hh" | |
56 | #include "dolog.hh" | |
57 | #include "dnsname.hh" | |
e0fd37ec | 58 | #include "dnsparser.hh" |
5cc8371b RG |
59 | #include "ednsoptions.hh" |
60 | #include "gettime.hh" | |
61 | #include "lock.hh" | |
62 | #include "misc.hh" | |
63 | #include "sodcrypto.hh" | |
64 | #include "sstuff.hh" | |
519f5484 | 65 | #include "threadname.hh" |
5cc8371b | 66 | |
a40df301 | 67 | /* Known sins: |
e48090d1 | 68 | |
d12cea01 | 69 | Receiver is currently single threaded |
e5a14b2b | 70 | not *that* bad actually, but now that we are thread safe, might want to scale |
a40df301 | 71 | */ |
24d5cb00 | 72 | |
0940e4eb | 73 | /* the Rulaction plan |
74 | Set of Rules, if one matches, it leads to an Action | |
75 | Both rules and actions could conceivably be Lua based. | |
76 | On the C++ side, both could be inherited from a class Rule and a class Action, | |
77 | on the Lua side we can't do that. */ | |
78 | ||
64e4ebb4 | 79 | using std::atomic; |
80 | using std::thread; | |
7730131a | 81 | bool g_verbose; |
b065f701 | 82 | |
e48090d1 | 83 | struct DNSDistStats g_stats; |
37a5c2d5 PO |
84 | MetricDefinitionStorage g_metricDefinitions; |
85 | ||
3b203c83 | 86 | uint16_t g_maxOutstanding{std::numeric_limits<uint16_t>::max()}; |
1ea747c0 | 87 | uint32_t g_staleCacheEntriesTTL{0}; |
bbfaaa6f | 88 | bool g_syslog{true}; |
0dffe9e3 | 89 | bool g_allowEmptyResponse{false}; |
cffde2fd | 90 | |
91 | GlobalStateHolder<NetmaskGroup> g_ACL; | |
c9262563 | 92 | string g_outputBuffer; |
a227f47d | 93 | |
a227f47d | 94 | std::vector<std::shared_ptr<TLSFrontend>> g_tlslocals; |
fbf14b03 | 95 | std::vector<std::shared_ptr<DOHFrontend>> g_dohlocals; |
6e9fd124 | 96 | std::vector<std::shared_ptr<DNSCryptContext>> g_dnsCryptLocals; |
87b515ed RG |
97 | #ifdef HAVE_EBPF |
98 | shared_ptr<BPFFilter> g_defaultBPFFilter; | |
8429ad04 | 99 | std::vector<std::shared_ptr<DynBPFFilter> > g_dynBPFFilters; |
87b515ed | 100 | #endif /* HAVE_EBPF */ |
6e9fd124 | 101 | std::vector<std::unique_ptr<ClientState>> g_frontends; |
886e2cf2 | 102 | GlobalStateHolder<pools_t> g_pools; |
0beaa5c8 | 103 | size_t g_udpVectorSize{1}; |
7c0860e1 | 104 | |
9f4eb5cc RG |
105 | bool g_snmpEnabled{false}; |
106 | bool g_snmpTrapsEnabled{false}; | |
107 | DNSDistSNMPAgent* g_snmpAgent{nullptr}; | |
108 | ||
726ddf60 | 109 | /* UDP: the grand design. Per socket we listen on for incoming queries there is one thread. |
7730131a | 110 | Then we have a bunch of connected sockets for talking to downstream servers. |
111 | We send directly to those sockets. | |
24d5cb00 | 112 | |
7730131a | 113 | For the return path, per downstream server we have a thread that listens to responses. |
24d5cb00 | 114 | |
7730131a | 115 | Per socket there is an array of 2^16 states, when we send out a packet downstream, we note |
116 | there the original requestor and the original id. The new ID is the offset in the array. | |
117 | ||
118 | When an answer comes in on a socket, we look up the offset by the id, and lob it to the | |
119 | original requestor. | |
120 | ||
121 | IDs are assigned by atomic increments of the socket offset. | |
122 | */ | |
123 | ||
4d5959e6 RG |
124 | GlobalStateHolder<vector<DNSDistRuleAction> > g_rulactions; |
125 | GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_resprulactions; | |
126 | GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_cachehitresprulactions; | |
2d4783a8 | 127 | GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_selfansweredresprulactions; |
4d5959e6 | 128 | |
df111b53 | 129 | Rings g_rings; |
786e4d8c | 130 | QueryCount g_qcount; |
7c0860e1 | 131 | |
ecbe9133 | 132 | GlobalStateHolder<servers_t> g_dstates; |
78ffa782 | 133 | GlobalStateHolder<NetmaskTree<DynBlock>> g_dynblockNMG; |
71c94675 | 134 | GlobalStateHolder<SuffixMatchTree<DynBlock>> g_dynblockSMT; |
dd46e5e3 | 135 | DNSAction::Action g_dynBlockAction = DNSAction::Action::Drop; |
3f6d07a4 RG |
136 | int g_tcpRecvTimeout{2}; |
137 | int g_tcpSendTimeout{2}; | |
e0b5e49d | 138 | int g_udpTimeout{2}; |
3f6d07a4 | 139 | |
26a3cdb7 | 140 | bool g_servFailOnNoPolicy{false}; |
e72fbfc4 | 141 | bool g_truncateTC{false}; |
53c57da7 RG |
142 | bool g_fixupCase{false}; |
143 | bool g_preserveTrailingData{false}; | |
32b86928 | 144 | bool g_roundrobinFailOnNoServer{false}; |
0beaa5c8 | 145 | |
83fe2c55 RG |
146 | std::set<std::string> g_capabilitiesToRetain; |
147 | ||
e0fd37ec | 148 | static void truncateTC(char* packet, uint16_t* len, size_t responseSize, unsigned int consumed) |
6ad8b29a | 149 | try |
150 | { | |
e0fd37ec RG |
151 | bool hadEDNS = false; |
152 | uint16_t payloadSize = 0; | |
153 | uint16_t z = 0; | |
154 | ||
155 | if (g_addEDNSToSelfGeneratedResponses) { | |
156 | hadEDNS = getEDNSUDPPayloadSizeAndZ(packet, *len, &payloadSize, &z); | |
157 | } | |
158 | ||
2aa2c0aa | 159 | *len=static_cast<uint16_t>(sizeof(dnsheader)+consumed+DNS_TYPE_SIZE+DNS_CLASS_SIZE); |
e7c732b8 RG |
160 | struct dnsheader* dh = reinterpret_cast<struct dnsheader*>(packet); |
161 | dh->ancount = dh->arcount = dh->nscount = 0; | |
e0fd37ec RG |
162 | |
163 | if (hadEDNS) { | |
5e98ccfa | 164 | addEDNS(dh, *len, responseSize, z & EDNS_HEADER_FLAG_DO, payloadSize, 0); |
e0fd37ec | 165 | } |
6ad8b29a | 166 | } |
167 | catch(...) | |
168 | { | |
169 | g_stats.truncFail++; | |
170 | } | |
171 | ||
7b3865cd | 172 | struct DelayedPacket |
173 | { | |
174 | int fd; | |
175 | string packet; | |
176 | ComboAddress destination; | |
68f3eb4d | 177 | ComboAddress origDest; |
7b3865cd | 178 | void operator()() |
179 | { | |
20b019fa RG |
180 | ssize_t res; |
181 | if(origDest.sin4.sin_family == 0) { | |
182 | res = sendto(fd, packet.c_str(), packet.size(), 0, (struct sockaddr*)&destination, destination.getSocklen()); | |
183 | } | |
184 | else { | |
185 | res = sendfromto(fd, packet.c_str(), packet.size(), 0, origDest, destination); | |
186 | } | |
187 | if (res == -1) { | |
188 | int err = errno; | |
189 | vinfolog("Error sending delayed response to %s: %s", destination.toStringWithPort(), strerror(err)); | |
190 | } | |
7b3865cd | 191 | } |
192 | }; | |
193 | ||
3e425868 | 194 | DelayPipe<DelayedPacket>* g_delay = nullptr; |
7b3865cd | 195 | |
f653b8df | 196 | void doLatencyStats(double udiff) |
daacd477 | 197 | { |
cb167afd CHB |
198 | if(udiff < 1000) ++g_stats.latency0_1; |
199 | else if(udiff < 10000) ++g_stats.latency1_10; | |
200 | else if(udiff < 50000) ++g_stats.latency10_50; | |
201 | else if(udiff < 100000) ++g_stats.latency50_100; | |
202 | else if(udiff < 1000000) ++g_stats.latency100_1000; | |
203 | else ++g_stats.latencySlow; | |
eb0335ff | 204 | g_stats.latencySum += udiff / 1000; |
be6c318f | 205 | |
daacd477 | 206 | auto doAvg = [](double& var, double n, double weight) { |
207 | var = (weight -1) * var/weight + n/weight; | |
208 | }; | |
209 | ||
210 | doAvg(g_stats.latencyAvg100, udiff, 100); | |
211 | doAvg(g_stats.latencyAvg1000, udiff, 1000); | |
212 | doAvg(g_stats.latencyAvg10000, udiff, 10000); | |
213 | doAvg(g_stats.latencyAvg1000000, udiff, 1000000); | |
214 | } | |
ca404e94 | 215 | |
e7c732b8 | 216 | bool responseContentMatches(const char* response, const uint16_t responseLen, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const ComboAddress& remote, unsigned int& consumed) |
fcffc585 | 217 | { |
fcffc585 RG |
218 | if (responseLen < sizeof(dnsheader)) { |
219 | return false; | |
220 | } | |
221 | ||
3e425868 | 222 | const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(response); |
c8c3d4e4 | 223 | if (dh->qdcount == 0) { |
ad7ec9a2 | 224 | if ((dh->rcode != RCode::NoError && dh->rcode != RCode::NXDomain) || g_allowEmptyResponse) { |
c8c3d4e4 RG |
225 | return true; |
226 | } | |
227 | else { | |
cb167afd | 228 | ++g_stats.nonCompliantResponses; |
c8c3d4e4 RG |
229 | return false; |
230 | } | |
231 | } | |
232 | ||
3e425868 RG |
233 | uint16_t rqtype, rqclass; |
234 | DNSName rqname; | |
fcffc585 RG |
235 | try { |
236 | rqname=DNSName(response, responseLen, sizeof(dnsheader), false, &rqtype, &rqclass, &consumed); | |
237 | } | |
3e425868 RG |
238 | catch(const std::exception& e) { |
239 | if(responseLen > 0 && static_cast<size_t>(responseLen) > sizeof(dnsheader)) { | |
fcffc585 | 240 | infolog("Backend %s sent us a response with id %d that did not parse: %s", remote.toStringWithPort(), ntohs(dh->id), e.what()); |
3e425868 | 241 | } |
cb167afd | 242 | ++g_stats.nonCompliantResponses; |
fcffc585 RG |
243 | return false; |
244 | } | |
245 | ||
246 | if (rqtype != qtype || rqclass != qclass || rqname != qname) { | |
247 | return false; | |
248 | } | |
249 | ||
250 | return true; | |
251 | } | |
252 | ||
4ab01344 | 253 | static void restoreFlags(struct dnsheader* dh, uint16_t origFlags) |
fcffc585 RG |
254 | { |
255 | static const uint16_t rdMask = 1 << FLAGS_RD_OFFSET; | |
256 | static const uint16_t cdMask = 1 << FLAGS_CD_OFFSET; | |
257 | static const uint16_t restoreFlagsMask = UINT16_MAX & ~(rdMask | cdMask); | |
0f72fd5c RG |
258 | uint16_t * flags = getFlagsFromDNSHeader(dh); |
259 | /* clear the flags we are about to restore */ | |
260 | *flags &= restoreFlagsMask; | |
261 | /* only keep the flags we want to restore */ | |
262 | origFlags &= ~restoreFlagsMask; | |
263 | /* set the saved flags as they were */ | |
264 | *flags |= origFlags; | |
265 | } | |
266 | ||
4ab01344 | 267 | static bool fixUpQueryTurnedResponse(DNSQuestion& dq, const uint16_t origFlags) |
e7c732b8 RG |
268 | { |
269 | restoreFlags(dq.dh, origFlags); | |
270 | ||
271 | return addEDNSToQueryTurnedResponse(dq); | |
272 | } | |
273 | ||
3e425868 | 274 | static bool fixUpResponse(char** response, uint16_t* responseLen, size_t* responseSize, const DNSName& qname, uint16_t origFlags, bool ednsAdded, bool ecsAdded, std::vector<uint8_t>& rewrittenResponse, uint16_t addRoom, bool* zeroScope) |
0f72fd5c | 275 | { |
fcffc585 RG |
276 | if (*responseLen < sizeof(dnsheader)) { |
277 | return false; | |
278 | } | |
279 | ||
3e425868 | 280 | struct dnsheader* dh = reinterpret_cast<struct dnsheader*>(*response); |
c8c3d4e4 RG |
281 | restoreFlags(dh, origFlags); |
282 | ||
283 | if (*responseLen == sizeof(dnsheader)) { | |
284 | return true; | |
285 | } | |
286 | ||
fcffc585 RG |
287 | if(g_fixupCase) { |
288 | string realname = qname.toDNSString(); | |
289 | if (*responseLen >= (sizeof(dnsheader) + realname.length())) { | |
290 | memcpy(*response + sizeof(dnsheader), realname.c_str(), realname.length()); | |
291 | } | |
292 | } | |
293 | ||
ff73f02b | 294 | if (ednsAdded || ecsAdded) { |
ceae225c | 295 | uint16_t optStart; |
fcffc585 RG |
296 | size_t optLen = 0; |
297 | bool last = false; | |
298 | ||
11d5d247 RG |
299 | const std::string responseStr(*response, *responseLen); |
300 | int res = locateEDNSOptRR(responseStr, &optStart, &optLen, &last); | |
fcffc585 RG |
301 | |
302 | if (res == 0) { | |
49c33a6c RG |
303 | if (zeroScope) { // this finds if an EDNS Client Subnet scope was set, and if it is 0 |
304 | size_t optContentStart = 0; | |
305 | uint16_t optContentLen = 0; | |
306 | /* we need at least 4 bytes after the option length (family: 2, source prefix-length: 1, scope prefix-length: 1) */ | |
307 | if (isEDNSOptionInOpt(responseStr, optStart, optLen, EDNSOptionCode::ECS, &optContentStart, &optContentLen) && optContentLen >= 4) { | |
308 | /* see if the EDNS Client Subnet SCOPE PREFIX-LENGTH byte in position 3 is set to 0, which is the only thing | |
309 | we care about. */ | |
310 | *zeroScope = responseStr.at(optContentStart + 3) == 0; | |
311 | } | |
40669042 | 312 | } |
9837850d | 313 | |
ff73f02b RG |
314 | if (ednsAdded) { |
315 | /* we added the entire OPT RR, | |
316 | therefore we need to remove it entirely */ | |
317 | if (last) { | |
318 | /* simply remove the last AR */ | |
319 | *responseLen -= optLen; | |
320 | uint16_t arcount = ntohs(dh->arcount); | |
321 | arcount--; | |
322 | dh->arcount = htons(arcount); | |
323 | } | |
324 | else { | |
325 | /* Removing an intermediary RR could lead to compression error */ | |
11d5d247 | 326 | if (rewriteResponseWithoutEDNS(responseStr, rewrittenResponse) == 0) { |
ff73f02b RG |
327 | *responseLen = rewrittenResponse.size(); |
328 | if (addRoom && (UINT16_MAX - *responseLen) > addRoom) { | |
329 | rewrittenResponse.reserve(*responseLen + addRoom); | |
330 | } | |
331 | *responseSize = rewrittenResponse.capacity(); | |
332 | *response = reinterpret_cast<char*>(rewrittenResponse.data()); | |
333 | } | |
334 | else { | |
335 | warnlog("Error rewriting content"); | |
336 | } | |
337 | } | |
fcffc585 RG |
338 | } |
339 | else { | |
ff73f02b RG |
340 | /* the OPT RR was already present, but without ECS, |
341 | we need to remove the ECS option if any */ | |
342 | if (last) { | |
343 | /* nothing after the OPT RR, we can simply remove the | |
344 | ECS option */ | |
345 | size_t existingOptLen = optLen; | |
11d5d247 | 346 | removeEDNSOptionFromOPT(*response + optStart, &optLen, EDNSOptionCode::ECS); |
ff73f02b | 347 | *responseLen -= (existingOptLen - optLen); |
fcffc585 RG |
348 | } |
349 | else { | |
ff73f02b | 350 | /* Removing an intermediary RR could lead to compression error */ |
11d5d247 | 351 | if (rewriteResponseWithoutEDNSOption(responseStr, EDNSOptionCode::ECS, rewrittenResponse) == 0) { |
ff73f02b RG |
352 | *responseLen = rewrittenResponse.size(); |
353 | if (addRoom && (UINT16_MAX - *responseLen) > addRoom) { | |
354 | rewrittenResponse.reserve(*responseLen + addRoom); | |
355 | } | |
356 | *responseSize = rewrittenResponse.capacity(); | |
357 | *response = reinterpret_cast<char*>(rewrittenResponse.data()); | |
358 | } | |
359 | else { | |
360 | warnlog("Error rewriting content"); | |
361 | } | |
fcffc585 RG |
362 | } |
363 | } | |
364 | } | |
365 | } | |
366 | ||
367 | return true; | |
368 | } | |
369 | ||
fcffc585 | 370 | #ifdef HAVE_DNSCRYPT |
3e425868 | 371 | static bool encryptResponse(char* response, uint16_t* responseLen, size_t responseSize, bool tcp, std::shared_ptr<DNSCryptQuery> dnsCryptQuery, dnsheader** dh, dnsheader* dhCopy) |
fcffc585 | 372 | { |
0f72fd5c RG |
373 | if (dnsCryptQuery) { |
374 | uint16_t encryptedResponseLen = 0; | |
57847d65 RG |
375 | |
376 | /* save the original header before encrypting it in place */ | |
377 | if (dh != nullptr && *dh != nullptr && dhCopy != nullptr) { | |
378 | memcpy(dhCopy, *dh, sizeof(dnsheader)); | |
379 | *dh = dhCopy; | |
380 | } | |
381 | ||
43234e76 | 382 | int res = dnsCryptQuery->encryptResponse(response, *responseLen, responseSize, tcp, &encryptedResponseLen); |
fcffc585 | 383 | if (res == 0) { |
0f72fd5c | 384 | *responseLen = encryptedResponseLen; |
fcffc585 RG |
385 | } else { |
386 | /* dropping response */ | |
387 | vinfolog("Error encrypting the response, dropping."); | |
388 | return false; | |
389 | } | |
390 | } | |
0f72fd5c RG |
391 | return true; |
392 | } | |
7129b5c4 | 393 | #endif /* HAVE_DNSCRYPT */ |
fcffc585 | 394 | |
3e425868 RG |
395 | static bool applyRulesToResponse(LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr) |
396 | { | |
397 | DNSResponseAction::Action action=DNSResponseAction::Action::None; | |
398 | std::string ruleresult; | |
399 | for(const auto& lr : *localRespRulactions) { | |
400 | if(lr.d_rule->matches(&dr)) { | |
401 | lr.d_rule->d_matches++; | |
402 | action=(*lr.d_action)(&dr, &ruleresult); | |
403 | switch(action) { | |
404 | case DNSResponseAction::Action::Allow: | |
405 | return true; | |
406 | break; | |
407 | case DNSResponseAction::Action::Drop: | |
408 | return false; | |
409 | break; | |
410 | case DNSResponseAction::Action::HeaderModify: | |
411 | return true; | |
412 | break; | |
413 | case DNSResponseAction::Action::ServFail: | |
414 | dr.dh->rcode = RCode::ServFail; | |
415 | return true; | |
416 | break; | |
417 | /* non-terminal actions follow */ | |
418 | case DNSResponseAction::Action::Delay: | |
419 | dr.delayMsec = static_cast<int>(pdns_stou(ruleresult)); // sorry | |
420 | break; | |
421 | case DNSResponseAction::Action::None: | |
422 | break; | |
423 | } | |
424 | } | |
425 | } | |
426 | ||
427 | return true; | |
428 | } | |
429 | ||
430 | bool processResponse(char** response, uint16_t* responseLen, size_t* responseSize, LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr, size_t addRoom, std::vector<uint8_t>& rewrittenResponse, bool muted) | |
431 | { | |
432 | if (!applyRulesToResponse(localRespRulactions, dr)) { | |
433 | return false; | |
434 | } | |
435 | ||
436 | bool zeroScope = false; | |
437 | if (!fixUpResponse(response, responseLen, responseSize, *dr.qname, dr.origFlags, dr.ednsAdded, dr.ecsAdded, rewrittenResponse, addRoom, dr.useZeroScope ? &zeroScope : nullptr)) { | |
438 | return false; | |
439 | } | |
440 | ||
01e23732 | 441 | if (dr.packetCache && !dr.skipCache && *responseLen <= s_maxPacketCacheEntrySize) { |
3e425868 RG |
442 | if (!dr.useZeroScope) { |
443 | /* if the query was not suitable for zero-scope, for | |
444 | example because it had an existing ECS entry so the hash is | |
445 | not really 'no ECS', so just insert it for the existing subnet | |
446 | since: | |
447 | - we don't have the correct hash for a non-ECS query | |
448 | - inserting with hash computed before the ECS replacement but with | |
449 | the subnet extracted _after_ the replacement would not work. | |
450 | */ | |
451 | zeroScope = false; | |
452 | } | |
453 | // if zeroScope, pass the pre-ECS hash-key and do not pass the subnet to the cache | |
454 | dr.packetCache->insert(zeroScope ? dr.cacheKeyNoECS : dr.cacheKey, zeroScope ? boost::none : dr.subnet, dr.origFlags, dr.dnssecOK, *dr.qname, dr.qtype, dr.qclass, *response, *responseLen, dr.tcp, dr.dh->rcode, dr.tempFailureTTL); | |
455 | } | |
456 | ||
457 | #ifdef HAVE_DNSCRYPT | |
458 | if (!muted) { | |
459 | if (!encryptResponse(*response, responseLen, *responseSize, dr.tcp, dr.dnsCryptQuery, nullptr, nullptr)) { | |
460 | return false; | |
461 | } | |
462 | } | |
7129b5c4 | 463 | #endif /* HAVE_DNSCRYPT */ |
3e425868 RG |
464 | |
465 | return true; | |
466 | } | |
467 | ||
468 | static bool sendUDPResponse(int origFD, const char* response, const uint16_t responseLen, const int delayMsec, const ComboAddress& origDest, const ComboAddress& origRemote) | |
0f72fd5c | 469 | { |
fcffc585 RG |
470 | if(delayMsec && g_delay) { |
471 | DelayedPacket dp{origFD, string(response,responseLen), origRemote, origDest}; | |
472 | g_delay->submit(dp, delayMsec); | |
473 | } | |
474 | else { | |
20b019fa RG |
475 | ssize_t res; |
476 | if(origDest.sin4.sin_family == 0) { | |
3e425868 | 477 | res = sendto(origFD, response, responseLen, 0, reinterpret_cast<const struct sockaddr*>(&origRemote), origRemote.getSocklen()); |
20b019fa RG |
478 | } |
479 | else { | |
480 | res = sendfromto(origFD, response, responseLen, 0, origDest, origRemote); | |
481 | } | |
482 | if (res == -1) { | |
483 | int err = errno; | |
a702a96c | 484 | vinfolog("Error sending response to %s: %s", origRemote.toStringWithPort(), stringerror(err)); |
20b019fa | 485 | } |
fcffc585 RG |
486 | } |
487 | ||
488 | return true; | |
489 | } | |
490 | ||
150105a2 | 491 | |
fbf14b03 | 492 | int pickBackendSocketForSending(std::shared_ptr<DownstreamState>& state) |
150105a2 | 493 | { |
5bdbb83d | 494 | return state->sockets[state->socketsOffset++ % state->sockets.size()]; |
150105a2 RG |
495 | } |
496 | ||
5bdbb83d | 497 | static void pickBackendSocketsReadyForReceiving(const std::shared_ptr<DownstreamState>& state, std::vector<int>& ready) |
150105a2 | 498 | { |
5bdbb83d | 499 | ready.clear(); |
150105a2 | 500 | |
5bdbb83d RG |
501 | if (state->sockets.size() == 1) { |
502 | ready.push_back(state->sockets[0]); | |
503 | return ; | |
150105a2 RG |
504 | } |
505 | ||
5bdbb83d RG |
506 | { |
507 | std::lock_guard<std::mutex> lock(state->socketsLock); | |
508 | state->mplexer->getAvailableFDs(ready, -1); | |
150105a2 | 509 | } |
150105a2 RG |
510 | } |
511 | ||
4632045b | 512 | // listens on a dedicated socket, lobs answers from downstream servers to original requestors |
9b73b71c | 513 | void responderThread(std::shared_ptr<DownstreamState> dss) |
66d1c0bf | 514 | try { |
519f5484 | 515 | setThreadName("dnsdist/respond"); |
d8c19b98 | 516 | auto localRespRulactions = g_resprulactions.getLocal(); |
8179b6d6 | 517 | char packet[s_maxPacketCacheEntrySize + DNSCRYPT_MAX_RESPONSE_PADDING_AND_MAC_SIZE]; |
b50ee135 | 518 | static_assert(sizeof(packet) <= UINT16_MAX, "Packet size should fit in a uint16_t"); |
3e425868 RG |
519 | /* when the answer is encrypted in place, we need to get a copy |
520 | of the original header before encryption to fill the ring buffer */ | |
521 | dnsheader cleartextDH; | |
ca404e94 | 522 | vector<uint8_t> rewrittenResponse; |
7267213a | 523 | |
66d1c0bf | 524 | uint16_t queryId = 0; |
5bdbb83d RG |
525 | std::vector<int> sockets; |
526 | sockets.reserve(dss->sockets.size()); | |
527 | ||
7730131a | 528 | for(;;) { |
57847d65 | 529 | dnsheader* dh = reinterpret_cast<struct dnsheader*>(packet); |
66d1c0bf | 530 | try { |
5bdbb83d RG |
531 | pickBackendSocketsReadyForReceiving(dss, sockets); |
532 | for (const auto& fd : sockets) { | |
533 | ssize_t got = recv(fd, packet, sizeof(packet), 0); | |
534 | char * response = packet; | |
535 | size_t responseSize = sizeof(packet); | |
ca404e94 | 536 | |
2aa2c0aa | 537 | if (got < 0 || static_cast<size_t>(got) < sizeof(dnsheader)) |
5bdbb83d | 538 | continue; |
4f380af3 | 539 | |
3e425868 | 540 | uint16_t responseLen = static_cast<uint16_t>(got); |
5bdbb83d | 541 | queryId = dh->id; |
b50ee135 | 542 | |
fbf14b03 | 543 | if(queryId >= dss->idStates.size()) { |
5bdbb83d | 544 | continue; |
fbf14b03 | 545 | } |
4f380af3 | 546 | |
5bdbb83d | 547 | IDState* ids = &dss->idStates[queryId]; |
a9489723 | 548 | int64_t usageIndicator = ids->usageIndicator; |
c9ba8478 | 549 | |
311f19d5 | 550 | if(!IDState::isInUse(usageIndicator)) { |
9bd1a882 | 551 | /* the corresponding state is marked as not in use, meaning that: |
a9489723 | 552 | - it was already cleaned up by another thread and the state is gone ; |
9bd1a882 RG |
553 | - we already got a response for this query and this one is a duplicate. |
554 | Either way, we don't touch it. | |
555 | */ | |
5bdbb83d | 556 | continue; |
9bd1a882 | 557 | } |
7267213a | 558 | |
9bd1a882 | 559 | /* read the potential DOHUnit state as soon as possible, but don't use it |
a9489723 | 560 | until we have confirmed that we own this state by updating usageIndicator */ |
0956c5c5 | 561 | auto du = ids->du; |
5bdbb83d RG |
562 | /* setting age to 0 to prevent the maintainer thread from |
563 | cleaning this IDS while we process the response. | |
5bdbb83d RG |
564 | */ |
565 | ids->age = 0; | |
a9489723 | 566 | int origFD = ids->origFD; |
fcffc585 | 567 | |
e7c732b8 RG |
568 | unsigned int consumed = 0; |
569 | if (!responseContentMatches(response, responseLen, ids->qname, ids->qtype, ids->qclass, dss->remote, consumed)) { | |
5bdbb83d RG |
570 | continue; |
571 | } | |
7267213a | 572 | |
3c72bc54 | 573 | bool isDoH = du != nullptr; |
a9489723 RG |
574 | /* atomically mark the state as available, but only if it has not been altered |
575 | in the meantime */ | |
311f19d5 | 576 | if (ids->tryMarkUnused(usageIndicator)) { |
9bd1a882 RG |
577 | /* clear the potential DOHUnit asap, it's ours now |
578 | and since we just marked the state as unused, | |
579 | someone could overwrite it. */ | |
246ff31f | 580 | ids->du = nullptr; |
71b86bd8 RG |
581 | /* we only decrement the outstanding counter if the value was not |
582 | altered in the meantime, which would mean that the state has been actively reused | |
583 | and the other thread has not incremented the outstanding counter, so we don't | |
584 | want it to be decremented twice. */ | |
585 | --dss->outstanding; // you'd think an attacker could game this, but we're using connected socket | |
0956c5c5 | 586 | } else { |
9bd1a882 | 587 | /* someone updated the state in the meantime, we can't touch the existing pointer */ |
0956c5c5 | 588 | du = nullptr; |
f2d0c808 RG |
589 | /* since the state has been updated, we can't safely access it so let's just drop |
590 | this response */ | |
591 | continue; | |
71b86bd8 | 592 | } |
2c5db3f7 | 593 | |
5bdbb83d | 594 | if(dh->tc && g_truncateTC) { |
e0fd37ec | 595 | truncateTC(response, &responseLen, responseSize, consumed); |
5bdbb83d | 596 | } |
aeb36780 | 597 | |
5bdbb83d | 598 | dh->id = ids->origID; |
ca404e94 | 599 | |
5bdbb83d | 600 | uint16_t addRoom = 0; |
d0ae6360 RG |
601 | DNSResponse dr = makeDNSResponseFromIDState(*ids, dh, sizeof(packet), responseLen, false); |
602 | if (dr.dnsCryptQuery) { | |
5bdbb83d RG |
603 | addRoom = DNSCRYPT_MAX_RESPONSE_PADDING_AND_MAC_SIZE; |
604 | } | |
ca404e94 | 605 | |
3e425868 RG |
606 | memcpy(&cleartextDH, dr.dh, sizeof(cleartextDH)); |
607 | if (!processResponse(&response, &responseLen, &responseSize, localRespRulactions, dr, addRoom, rewrittenResponse, ids->cs && ids->cs->muted)) { | |
608 | continue; | |
5bdbb83d | 609 | } |
886e2cf2 | 610 | |
5bdbb83d | 611 | if (ids->cs && !ids->cs->muted) { |
0956c5c5 | 612 | if (du) { |
fbf14b03 RG |
613 | #ifdef HAVE_DNS_OVER_HTTPS |
614 | // DoH query | |
246ff31f | 615 | du->response = std::string(response, responseLen); |
0956c5c5 | 616 | if (send(du->rsock, &du, sizeof(du), 0) != sizeof(du)) { |
9bd1a882 | 617 | /* at this point we have the only remaining pointer on this |
b6d19fca RG |
618 | DOHUnit object since we did set ids->du to nullptr earlier, |
619 | except if we got the response before the pointer could be | |
620 | released by the frontend */ | |
621 | du->release(); | |
fbf14b03 RG |
622 | } |
623 | #endif /* HAVE_DNS_OVER_HTTPS */ | |
0956c5c5 | 624 | du = nullptr; |
fbf14b03 RG |
625 | } |
626 | else { | |
627 | ComboAddress empty; | |
628 | empty.sin4.sin_family = 0; | |
629 | /* if ids->destHarvested is false, origDest holds the listening address. | |
630 | We don't want to use that as a source since it could be 0.0.0.0 for example. */ | |
631 | sendUDPResponse(origFD, response, responseLen, dr.delayMsec, ids->destHarvested ? ids->origDest : empty, ids->origRemote); | |
632 | } | |
5bdbb83d | 633 | } |
66d1c0bf | 634 | |
cb167afd | 635 | ++g_stats.responses; |
83f7bbdb RG |
636 | if (ids->cs) { |
637 | ++ids->cs->responses; | |
638 | } | |
7fc95193 | 639 | ++dss->responses; |
66d1c0bf | 640 | |
5bdbb83d | 641 | double udiff = ids->sentTime.udiff(); |
fbf14b03 | 642 | vinfolog("Got answer from %s, relayed to %s%s, took %f usec", dss->remote.toStringWithPort(), ids->origRemote.toStringWithPort(), |
3c72bc54 | 643 | isDoH ? " (https)": "", udiff); |
66d1c0bf | 644 | |
01f6920b RG |
645 | struct timespec ts; |
646 | gettime(&ts); | |
3e425868 | 647 | g_rings.insertResponse(ts, *dr.remote, *dr.qname, dr.qtype, static_cast<unsigned int>(udiff), static_cast<unsigned int>(got), cleartextDH, dss->remote); |
66d1c0bf | 648 | |
5d7a33dd | 649 | switch (cleartextDH.rcode) { |
61d10a4d MH |
650 | case RCode::NXDomain: |
651 | ++g_stats.frontendNXDomain; | |
652 | break; | |
653 | case RCode::ServFail: | |
cb167afd | 654 | ++g_stats.servfailResponses; |
61d10a4d MH |
655 | ++g_stats.frontendServFail; |
656 | break; | |
657 | case RCode::NoError: | |
658 | ++g_stats.frontendNoError; | |
659 | break; | |
cb167afd | 660 | } |
5bdbb83d | 661 | dss->latencyUsec = (127.0 * dss->latencyUsec / 128.0) + udiff/128.0; |
66d1c0bf | 662 | |
5bdbb83d | 663 | doLatencyStats(udiff); |
fcadd56e | 664 | |
5bdbb83d RG |
665 | rewrittenResponse.clear(); |
666 | } | |
66d1c0bf | 667 | } |
df560083 | 668 | catch(const std::exception& e){ |
cd7fa253 | 669 | vinfolog("Got an error in UDP responder thread while parsing a response from %s, id %d: %s", dss->remote.toStringWithPort(), queryId, e.what()); |
66d1c0bf | 670 | } |
7730131a | 671 | } |
7730131a | 672 | } |
66d1c0bf RG |
673 | catch(const std::exception& e) |
674 | { | |
675 | errlog("UDP responder thread died because of exception: %s", e.what()); | |
66d1c0bf RG |
676 | } |
677 | catch(const PDNSException& e) | |
678 | { | |
679 | errlog("UDP responder thread died because of PowerDNS exception: %s", e.reason); | |
66d1c0bf RG |
680 | } |
681 | catch(...) | |
682 | { | |
683 | errlog("UDP responder thread died because of an exception: %s", "unknown"); | |
66d1c0bf | 684 | } |
24d5cb00 | 685 | |
5d7e6765 | 686 | bool DownstreamState::reconnect() |
3c115e0f | 687 | { |
5d7e6765 RG |
688 | std::unique_lock<std::mutex> tl(connectLock, std::try_to_lock); |
689 | if (!tl.owns_lock()) { | |
690 | /* we are already reconnecting */ | |
691 | return false; | |
692 | } | |
693 | ||
b58f08e5 | 694 | connected = false; |
5bdbb83d | 695 | for (auto& fd : sockets) { |
150105a2 | 696 | if (fd != -1) { |
5d7e6765 | 697 | if (sockets.size() > 1) { |
5bdbb83d RG |
698 | std::lock_guard<std::mutex> lock(socketsLock); |
699 | mplexer->removeReadFD(fd); | |
700 | } | |
150105a2 RG |
701 | /* shutdown() is needed to wake up recv() in the responderThread */ |
702 | shutdown(fd, SHUT_RDWR); | |
703 | close(fd); | |
704 | fd = -1; | |
f99e3aaf | 705 | } |
150105a2 RG |
706 | if (!IsAnyAddress(remote)) { |
707 | fd = SSocket(remote.sin4.sin_family, SOCK_DGRAM, 0); | |
708 | if (!IsAnyAddress(sourceAddr)) { | |
709 | SSetsockopt(fd, SOL_SOCKET, SO_REUSEADDR, 1); | |
70b0d0e2 RG |
710 | if (!sourceItfName.empty()) { |
711 | #ifdef SO_BINDTODEVICE | |
712 | int res = setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, sourceItfName.c_str(), sourceItfName.length()); | |
713 | if (res != 0) { | |
714 | infolog("Error setting up the interface on backend socket '%s': %s", remote.toStringWithPort(), stringerror()); | |
715 | } | |
716 | #endif | |
717 | } | |
718 | ||
150105a2 RG |
719 | SBind(fd, sourceAddr); |
720 | } | |
721 | try { | |
722 | SConnect(fd, remote); | |
5d7e6765 | 723 | if (sockets.size() > 1) { |
5bdbb83d RG |
724 | std::lock_guard<std::mutex> lock(socketsLock); |
725 | mplexer->addReadFD(fd, [](int, boost::any) {}); | |
726 | } | |
150105a2 RG |
727 | connected = true; |
728 | } | |
729 | catch(const std::runtime_error& error) { | |
730 | infolog("Error connecting to new server with address %s: %s", remote.toStringWithPort(), error.what()); | |
38069e7e RG |
731 | connected = false; |
732 | break; | |
733 | } | |
7565f4e6 | 734 | } |
38069e7e RG |
735 | } |
736 | ||
737 | /* if at least one (re-)connection failed, close all sockets */ | |
738 | if (!connected) { | |
5bdbb83d | 739 | for (auto& fd : sockets) { |
38069e7e | 740 | if (fd != -1) { |
5d7e6765 RG |
741 | if (sockets.size() > 1) { |
742 | std::lock_guard<std::mutex> lock(socketsLock); | |
743 | mplexer->removeReadFD(fd); | |
744 | } | |
38069e7e RG |
745 | /* shutdown() is needed to wake up recv() in the responderThread */ |
746 | shutdown(fd, SHUT_RDWR); | |
747 | close(fd); | |
748 | fd = -1; | |
150105a2 | 749 | } |
7565f4e6 | 750 | } |
b58f08e5 | 751 | } |
5d7e6765 RG |
752 | |
753 | return connected; | |
b58f08e5 | 754 | } |
f2caf657 CHB |
755 | void DownstreamState::hash() |
756 | { | |
d58e616a | 757 | vinfolog("Computing hashes for id=%s and weight=%d", id, weight); |
f2caf657 | 758 | auto w = weight; |
d58e616a | 759 | WriteLock wl(&d_lock); |
f2caf657 CHB |
760 | hashes.clear(); |
761 | while (w > 0) { | |
762 | std::string uuid = boost::str(boost::format("%s-%d") % id % w); | |
763 | unsigned int wshash = burtleCI((const unsigned char*)uuid.c_str(), uuid.size(), g_hashperturb); | |
764 | hashes.insert(wshash); | |
765 | --w; | |
766 | } | |
767 | } | |
768 | ||
769 | void DownstreamState::setId(const boost::uuids::uuid& newId) | |
770 | { | |
771 | id = newId; | |
d58e616a CHB |
772 | // compute hashes only if already done |
773 | if (!hashes.empty()) { | |
774 | hash(); | |
775 | } | |
f2caf657 CHB |
776 | } |
777 | ||
778 | void DownstreamState::setWeight(int newWeight) | |
779 | { | |
5a2bbe8c CHB |
780 | if (newWeight < 1) { |
781 | errlog("Error setting server's weight: downstream weight value must be greater than 0."); | |
782 | return ; | |
783 | } | |
f2caf657 | 784 | weight = newWeight; |
d58e616a CHB |
785 | if (!hashes.empty()) { |
786 | hash(); | |
787 | } | |
f2caf657 | 788 | } |
b58f08e5 | 789 | |
203b5348 | 790 | DownstreamState::DownstreamState(const ComboAddress& remote_, const ComboAddress& sourceAddr_, unsigned int sourceItf_, const std::string& sourceItfName_, size_t numberOfSockets, bool connect=true): sourceItfName(sourceItfName_), remote(remote_), sourceAddr(sourceAddr_), sourceItf(sourceItf_) |
b58f08e5 | 791 | { |
d58e616a | 792 | pthread_rwlock_init(&d_lock, nullptr); |
d61aa945 | 793 | id = getUniqueID(); |
5d7e6765 RG |
794 | threadStarted.clear(); |
795 | ||
5bdbb83d RG |
796 | mplexer = std::unique_ptr<FDMultiplexer>(FDMultiplexer::getMultiplexerSilent()); |
797 | ||
798 | sockets.resize(numberOfSockets); | |
799 | for (auto& fd : sockets) { | |
150105a2 RG |
800 | fd = -1; |
801 | } | |
802 | ||
203b5348 | 803 | if (connect && !IsAnyAddress(remote)) { |
b58f08e5 | 804 | reconnect(); |
f99e3aaf RG |
805 | idStates.resize(g_maxOutstanding); |
806 | sw.start(); | |
807 | infolog("Added downstream server %s", remote.toStringWithPort()); | |
fbe2a2e0 | 808 | } |
1720247e | 809 | |
3c115e0f | 810 | } |
811 | ||
773470ca | 812 | std::mutex g_luamutex; |
3f25bce6 | 813 | LuaContext g_lua; |
3f25bce6 | 814 | |
ecbe9133 | 815 | GlobalStateHolder<ServerPolicy> g_policy; |
773470ca | 816 | |
497a6e3a | 817 | shared_ptr<DownstreamState> firstAvailable(const NumberedServerVector& servers, const DNSQuestion* dq) |
773470ca | 818 | { |
22b2b326 | 819 | for(auto& d : servers) { |
da4e7813 | 820 | if(d.second->isUp() && d.second->qps.check()) |
821 | return d.second; | |
773470ca | 822 | } |
497a6e3a | 823 | return leastOutstanding(servers, dq); |
2c5db3f7 | 824 | } |
825 | ||
d1fba58e | 826 | // get server with least outstanding queries, and within those, with the lowest order, and within those: the fastest |
497a6e3a | 827 | shared_ptr<DownstreamState> leastOutstanding(const NumberedServerVector& servers, const DNSQuestion* dq) |
c9262563 | 828 | { |
886e2cf2 RG |
829 | if (servers.size() == 1 && servers[0].second->isUp()) { |
830 | return servers[0].second; | |
831 | } | |
832 | ||
d1fba58e | 833 | vector<pair<tuple<int,int,double>, shared_ptr<DownstreamState>>> poss; |
834 | /* so you might wonder, why do we go through this trouble? The data on which we sort could change during the sort, | |
835 | which would suck royally and could even lead to crashes. So first we snapshot on what we sort, and then we sort */ | |
478ce172 | 836 | poss.reserve(servers.size()); |
0e41337b | 837 | for(auto& d : servers) { |
da4e7813 | 838 | if(d.second->isUp()) { |
d1fba58e | 839 | poss.push_back({make_tuple(d.second->outstanding.load(), d.second->order, d.second->latencyUsec), d.second}); |
c9262563 | 840 | } |
841 | } | |
842 | if(poss.empty()) | |
843 | return shared_ptr<DownstreamState>(); | |
844 | nth_element(poss.begin(), poss.begin(), poss.end(), [](const decltype(poss)::value_type& a, const decltype(poss)::value_type& b) { return a.first < b.first; }); | |
845 | return poss.begin()->second; | |
846 | } | |
847 | ||
497a6e3a | 848 | shared_ptr<DownstreamState> valrandom(unsigned int val, const NumberedServerVector& servers, const DNSQuestion* dq) |
c9262563 | 849 | { |
850 | vector<pair<int, shared_ptr<DownstreamState>>> poss; | |
278403d3 DM |
851 | int sum = 0; |
852 | int max = std::numeric_limits<int>::max(); | |
853 | ||
22b2b326 | 854 | for(auto& d : servers) { // w=1, w=10 -> 1, 11 |
da4e7813 | 855 | if(d.second->isUp()) { |
278403d3 DM |
856 | // Don't overflow sum when adding high weights |
857 | if(d.second->weight > max - sum) { | |
858 | sum = max; | |
859 | } else { | |
860 | sum += d.second->weight; | |
861 | } | |
862 | ||
da4e7813 | 863 | poss.push_back({sum, d.second}); |
c9262563 | 864 | } |
865 | } | |
d2894425 NJ |
866 | |
867 | // Catch poss & sum are empty to avoid SIGFPE | |
868 | if(poss.empty()) | |
869 | return shared_ptr<DownstreamState>(); | |
870 | ||
a7f3108c | 871 | int r = val % sum; |
af619119 | 872 | auto p = upper_bound(poss.begin(), poss.end(),r, [](int r_, const decltype(poss)::value_type& a) { return r_ < a.first;}); |
c9262563 | 873 | if(p==poss.end()) |
874 | return shared_ptr<DownstreamState>(); | |
875 | return p->second; | |
876 | } | |
877 | ||
497a6e3a | 878 | shared_ptr<DownstreamState> wrandom(const NumberedServerVector& servers, const DNSQuestion* dq) |
a7f3108c | 879 | { |
497a6e3a | 880 | return valrandom(random(), servers, dq); |
a7f3108c | 881 | } |
882 | ||
36e763fa | 883 | uint32_t g_hashperturb; |
2b4287d4 | 884 | double g_consistentHashBalancingFactor = 0; |
497a6e3a | 885 | shared_ptr<DownstreamState> whashed(const NumberedServerVector& servers, const DNSQuestion* dq) |
a7f3108c | 886 | { |
497a6e3a | 887 | return valrandom(dq->qname->hash(g_hashperturb), servers, dq); |
a7f3108c | 888 | } |
889 | ||
1720247e CHB |
890 | shared_ptr<DownstreamState> chashed(const NumberedServerVector& servers, const DNSQuestion* dq) |
891 | { | |
1720247e | 892 | unsigned int qhash = dq->qname->hash(g_hashperturb); |
b712c51e CHB |
893 | unsigned int sel = std::numeric_limits<unsigned int>::max(); |
894 | unsigned int min = std::numeric_limits<unsigned int>::max(); | |
895 | shared_ptr<DownstreamState> ret = nullptr, first = nullptr; | |
1720247e | 896 | |
2b4287d4 | 897 | double targetLoad = std::numeric_limits<double>::max(); |
80ee5144 | 898 | if (g_consistentHashBalancingFactor > 0) { |
2b4287d4 RG |
899 | /* we start with one, representing the query we are currently handling */ |
900 | double currentLoad = 1; | |
901 | for (const auto& pair : servers) { | |
902 | currentLoad += pair.second->outstanding; | |
903 | } | |
904 | targetLoad = (currentLoad / servers.size()) * g_consistentHashBalancingFactor; | |
905 | } | |
906 | ||
1720247e | 907 | for (const auto& d: servers) { |
2b4287d4 | 908 | if (d.second->isUp() && d.second->outstanding <= targetLoad) { |
93ca495f | 909 | // make sure hashes have been computed |
d58e616a CHB |
910 | if (d.second->hashes.empty()) { |
911 | d.second->hash(); | |
912 | } | |
913 | { | |
914 | ReadLock rl(&(d.second->d_lock)); | |
93ca495f CHB |
915 | const auto& server = d.second; |
916 | // we want to keep track of the last hash | |
b712c51e CHB |
917 | if (min > *(server->hashes.begin())) { |
918 | min = *(server->hashes.begin()); | |
919 | first = server; | |
93ca495f | 920 | } |
b712c51e CHB |
921 | |
922 | auto hash_it = server->hashes.lower_bound(qhash); | |
923 | if (hash_it != server->hashes.end()) { | |
924 | if (*hash_it < sel) { | |
93ca495f CHB |
925 | sel = *hash_it; |
926 | ret = server; | |
927 | } | |
d58e616a | 928 | } |
1720247e CHB |
929 | } |
930 | } | |
931 | } | |
93ca495f CHB |
932 | if (ret != nullptr) { |
933 | return ret; | |
1720247e | 934 | } |
b712c51e CHB |
935 | if (first != nullptr) { |
936 | return first; | |
1720247e | 937 | } |
93ca495f | 938 | return shared_ptr<DownstreamState>(); |
1720247e | 939 | } |
a7f3108c | 940 | |
497a6e3a | 941 | shared_ptr<DownstreamState> roundrobin(const NumberedServerVector& servers, const DNSQuestion* dq) |
5f504638 | 942 | { |
da4e7813 | 943 | NumberedServerVector poss; |
c9262563 | 944 | |
22b2b326 | 945 | for(auto& d : servers) { |
da4e7813 | 946 | if(d.second->isUp()) { |
5f504638 | 947 | poss.push_back(d); |
c9262563 | 948 | } |
5f504638 | 949 | } |
c9262563 | 950 | |
ecbe9133 | 951 | const auto *res=&poss; |
32b86928 | 952 | if(poss.empty() && !g_roundrobinFailOnNoServer) |
ecbe9133 | 953 | res = &servers; |
c9262563 | 954 | |
955 | if(res->empty()) | |
956 | return shared_ptr<DownstreamState>(); | |
957 | ||
958 | static unsigned int counter; | |
959 | ||
da4e7813 | 960 | return (*res)[(counter++) % res->size()].second; |
5f504638 | 961 | } |
962 | ||
e6841c9e | 963 | ComboAddress g_serverControl{"127.0.0.1:5199"}; |
b076b34a | 964 | |
886e2cf2 RG |
965 | std::shared_ptr<ServerPool> createPoolIfNotExists(pools_t& pools, const string& poolName) |
966 | { | |
967 | std::shared_ptr<ServerPool> pool; | |
968 | pools_t::iterator it = pools.find(poolName); | |
969 | if (it != pools.end()) { | |
970 | pool = it->second; | |
971 | } | |
972 | else { | |
8f4f5186 RG |
973 | if (!poolName.empty()) |
974 | vinfolog("Creating pool %s", poolName); | |
886e2cf2 RG |
975 | pool = std::make_shared<ServerPool>(); |
976 | pools.insert(std::pair<std::string,std::shared_ptr<ServerPool> >(poolName, pool)); | |
977 | } | |
978 | return pool; | |
979 | } | |
78c8047b | 980 | |
742c079a RG |
981 | void setPoolPolicy(pools_t& pools, const string& poolName, std::shared_ptr<ServerPolicy> policy) |
982 | { | |
983 | std::shared_ptr<ServerPool> pool = createPoolIfNotExists(pools, poolName); | |
984 | if (!poolName.empty()) { | |
985 | vinfolog("Setting pool %s server selection policy to %s", poolName, policy->name); | |
986 | } else { | |
987 | vinfolog("Setting default pool server selection policy to %s", policy->name); | |
988 | } | |
989 | pool->policy = policy; | |
990 | } | |
991 | ||
886e2cf2 | 992 | void addServerToPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server) |
22b2b326 | 993 | { |
886e2cf2 | 994 | std::shared_ptr<ServerPool> pool = createPoolIfNotExists(pools, poolName); |
c218b223 | 995 | if (!poolName.empty()) { |
8f4f5186 | 996 | vinfolog("Adding server to pool %s", poolName); |
c218b223 | 997 | } else { |
8f4f5186 | 998 | vinfolog("Adding server to default pool"); |
c218b223 | 999 | } |
a1b1a29d | 1000 | pool->addServer(server); |
886e2cf2 RG |
1001 | } |
1002 | ||
1003 | void removeServerFromPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server) | |
1004 | { | |
8f4f5186 | 1005 | std::shared_ptr<ServerPool> pool = getPool(pools, poolName); |
886e2cf2 | 1006 | |
c218b223 | 1007 | if (!poolName.empty()) { |
8f4f5186 | 1008 | vinfolog("Removing server from pool %s", poolName); |
c218b223 | 1009 | } |
1010 | else { | |
8f4f5186 | 1011 | vinfolog("Removing server from default pool"); |
c218b223 | 1012 | } |
886e2cf2 | 1013 | |
a1b1a29d | 1014 | pool->removeServer(server); |
886e2cf2 RG |
1015 | } |
1016 | ||
1017 | std::shared_ptr<ServerPool> getPool(const pools_t& pools, const std::string& poolName) | |
1018 | { | |
1019 | pools_t::const_iterator it = pools.find(poolName); | |
1020 | ||
1021 | if (it == pools.end()) { | |
1022 | throw std::out_of_range("No pool named " + poolName); | |
1023 | } | |
1024 | ||
1025 | return it->second; | |
1026 | } | |
1027 | ||
a1b1a29d | 1028 | NumberedServerVector getDownstreamCandidates(const pools_t& pools, const std::string& poolName) |
886e2cf2 RG |
1029 | { |
1030 | std::shared_ptr<ServerPool> pool = getPool(pools, poolName); | |
a1b1a29d | 1031 | return pool->getServers(); |
22b2b326 | 1032 | } |
c9262563 | 1033 | |
614239d0 | 1034 | static void spoofResponseFromString(DNSQuestion& dq, const string& spoofContent) |
7791f83a RG |
1035 | { |
1036 | string result; | |
614239d0 RG |
1037 | |
1038 | std::vector<std::string> addrs; | |
1039 | stringtok(addrs, spoofContent, " ,"); | |
1040 | ||
1041 | if (addrs.size() == 1) { | |
1042 | try { | |
1043 | ComboAddress spoofAddr(spoofContent); | |
1044 | SpoofAction sa({spoofAddr}); | |
1045 | sa(&dq, &result); | |
1046 | } | |
1047 | catch(const PDNSException &e) { | |
1048 | SpoofAction sa(spoofContent); // CNAME then | |
1049 | sa(&dq, &result); | |
1050 | } | |
1051 | } else { | |
1052 | std::vector<ComboAddress> cas; | |
1053 | for (const auto& addr : addrs) { | |
1054 | try { | |
1055 | cas.push_back(ComboAddress(addr)); | |
1056 | } | |
1057 | catch (...) { | |
1058 | } | |
1059 | } | |
1060 | SpoofAction sa(cas); | |
6ca7a40a | 1061 | sa(&dq, &result); |
7791f83a RG |
1062 | } |
1063 | } | |
1064 | ||
2a28db86 RG |
1065 | bool processRulesResult(const DNSAction::Action& action, DNSQuestion& dq, std::string& ruleresult, bool& drop) |
1066 | { | |
1067 | switch(action) { | |
1068 | case DNSAction::Action::Allow: | |
1069 | return true; | |
1070 | break; | |
1071 | case DNSAction::Action::Drop: | |
1072 | ++g_stats.ruleDrop; | |
1073 | drop = true; | |
1074 | return true; | |
1075 | break; | |
1076 | case DNSAction::Action::Nxdomain: | |
1077 | dq.dh->rcode = RCode::NXDomain; | |
1078 | dq.dh->qr=true; | |
1079 | ++g_stats.ruleNXDomain; | |
1080 | return true; | |
1081 | break; | |
1082 | case DNSAction::Action::Refused: | |
1083 | dq.dh->rcode = RCode::Refused; | |
1084 | dq.dh->qr=true; | |
1085 | ++g_stats.ruleRefused; | |
1086 | return true; | |
1087 | break; | |
1088 | case DNSAction::Action::ServFail: | |
1089 | dq.dh->rcode = RCode::ServFail; | |
1090 | dq.dh->qr=true; | |
1091 | ++g_stats.ruleServFail; | |
1092 | return true; | |
1093 | break; | |
1094 | case DNSAction::Action::Spoof: | |
1095 | spoofResponseFromString(dq, ruleresult); | |
1096 | return true; | |
1097 | break; | |
1098 | case DNSAction::Action::Truncate: | |
1099 | dq.dh->tc = true; | |
1100 | dq.dh->qr = true; | |
955b9377 RG |
1101 | dq.dh->ra = dq.dh->rd; |
1102 | dq.dh->aa = false; | |
1103 | dq.dh->ad = false; | |
2a28db86 RG |
1104 | return true; |
1105 | break; | |
1106 | case DNSAction::Action::HeaderModify: | |
1107 | return true; | |
1108 | break; | |
1109 | case DNSAction::Action::Pool: | |
1110 | dq.poolname=ruleresult; | |
1111 | return true; | |
1112 | break; | |
1113 | case DNSAction::Action::NoRecurse: | |
1114 | dq.dh->rd = false; | |
1115 | return true; | |
1116 | break; | |
1117 | /* non-terminal actions follow */ | |
1118 | case DNSAction::Action::Delay: | |
1119 | dq.delayMsec = static_cast<int>(pdns_stou(ruleresult)); // sorry | |
1120 | break; | |
1121 | case DNSAction::Action::None: | |
1122 | /* fall-through */ | |
1123 | case DNSAction::Action::NoOp: | |
1124 | break; | |
1125 | } | |
1126 | ||
1127 | /* false means that we don't stop the processing */ | |
1128 | return false; | |
1129 | } | |
1130 | ||
1131 | ||
1132 | static bool applyRulesToQuery(LocalHolders& holders, DNSQuestion& dq, const struct timespec& now) | |
e91084ce | 1133 | { |
4ab01344 | 1134 | g_rings.insertQuery(now, *dq.remote, *dq.qname, dq.qtype, dq.len, *dq.dh); |
e91084ce | 1135 | |
786e4d8c | 1136 | if(g_qcount.enabled) { |
348ef1c6 | 1137 | string qname = (*dq.qname).toLogString(); |
786e4d8c RS |
1138 | bool countQuery{true}; |
1139 | if(g_qcount.filter) { | |
1140 | std::lock_guard<std::mutex> lock(g_luamutex); | |
dd1a3034 | 1141 | std::tie (countQuery, qname) = g_qcount.filter(&dq); |
786e4d8c RS |
1142 | } |
1143 | ||
1144 | if(countQuery) { | |
1145 | WriteLock wl(&g_qcount.queryLock); | |
1146 | if(!g_qcount.records.count(qname)) { | |
1147 | g_qcount.records[qname] = 0; | |
1148 | } | |
1149 | g_qcount.records[qname]++; | |
1150 | } | |
1151 | } | |
1152 | ||
0beaa5c8 | 1153 | if(auto got = holders.dynNMGBlock->lookup(*dq.remote)) { |
701f690b | 1154 | auto updateBlockStats = [&got]() { |
cb167afd | 1155 | ++g_stats.dynBlocked; |
701f690b | 1156 | got->second.blocks++; |
1157 | }; | |
1158 | ||
e91084ce | 1159 | if(now < got->second.until) { |
7b925432 RG |
1160 | DNSAction::Action action = got->second.action; |
1161 | if (action == DNSAction::Action::None) { | |
1162 | action = g_dynBlockAction; | |
1163 | } | |
477c86a0 RG |
1164 | switch (action) { |
1165 | case DNSAction::Action::NoOp: | |
1166 | /* do nothing */ | |
1167 | break; | |
79ee8ff9 RG |
1168 | |
1169 | case DNSAction::Action::Nxdomain: | |
1170 | vinfolog("Query from %s turned into NXDomain because of dynamic block", dq.remote->toStringWithPort()); | |
1171 | updateBlockStats(); | |
1172 | ||
1173 | dq.dh->rcode = RCode::NXDomain; | |
1174 | dq.dh->qr=true; | |
1175 | return true; | |
1176 | ||
477c86a0 | 1177 | case DNSAction::Action::Refused: |
dd46e5e3 | 1178 | vinfolog("Query from %s refused because of dynamic block", dq.remote->toStringWithPort()); |
701f690b | 1179 | updateBlockStats(); |
8477236d | 1180 | |
dd46e5e3 | 1181 | dq.dh->rcode = RCode::Refused; |
79ee8ff9 | 1182 | dq.dh->qr = true; |
dd46e5e3 | 1183 | return true; |
477c86a0 RG |
1184 | |
1185 | case DNSAction::Action::Truncate: | |
8477236d | 1186 | if(!dq.tcp) { |
701f690b | 1187 | updateBlockStats(); |
8477236d | 1188 | vinfolog("Query from %s truncated because of dynamic block", dq.remote->toStringWithPort()); |
1189 | dq.dh->tc = true; | |
1190 | dq.dh->qr = true; | |
955b9377 RG |
1191 | dq.dh->ra = dq.dh->rd; |
1192 | dq.dh->aa = false; | |
1193 | dq.dh->ad = false; | |
8477236d | 1194 | return true; |
1195 | } | |
1196 | else { | |
348ef1c6 | 1197 | vinfolog("Query from %s for %s over TCP *not* truncated because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toLogString()); |
8477236d | 1198 | } |
477c86a0 | 1199 | break; |
3d60b39a | 1200 | case DNSAction::Action::NoRecurse: |
1201 | updateBlockStats(); | |
1202 | vinfolog("Query from %s setting rd=0 because of dynamic block", dq.remote->toStringWithPort()); | |
1203 | dq.dh->rd = false; | |
1204 | return true; | |
477c86a0 | 1205 | default: |
701f690b | 1206 | updateBlockStats(); |
dd46e5e3 RG |
1207 | vinfolog("Query from %s dropped because of dynamic block", dq.remote->toStringWithPort()); |
1208 | return false; | |
1209 | } | |
e91084ce RG |
1210 | } |
1211 | } | |
1212 | ||
0beaa5c8 | 1213 | if(auto got = holders.dynSMTBlock->lookup(*dq.qname)) { |
701f690b | 1214 | auto updateBlockStats = [&got]() { |
cb167afd | 1215 | ++g_stats.dynBlocked; |
701f690b | 1216 | got->blocks++; |
1217 | }; | |
1218 | ||
71c94675 | 1219 | if(now < got->until) { |
7b925432 RG |
1220 | DNSAction::Action action = got->action; |
1221 | if (action == DNSAction::Action::None) { | |
1222 | action = g_dynBlockAction; | |
1223 | } | |
477c86a0 RG |
1224 | switch (action) { |
1225 | case DNSAction::Action::NoOp: | |
1226 | /* do nothing */ | |
1227 | break; | |
79ee8ff9 | 1228 | case DNSAction::Action::Nxdomain: |
348ef1c6 | 1229 | vinfolog("Query from %s for %s turned into NXDomain because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toLogString()); |
79ee8ff9 RG |
1230 | updateBlockStats(); |
1231 | ||
1232 | dq.dh->rcode = RCode::NXDomain; | |
1233 | dq.dh->qr=true; | |
1234 | return true; | |
477c86a0 | 1235 | case DNSAction::Action::Refused: |
348ef1c6 | 1236 | vinfolog("Query from %s for %s refused because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toLogString()); |
701f690b | 1237 | updateBlockStats(); |
8477236d | 1238 | |
dd46e5e3 RG |
1239 | dq.dh->rcode = RCode::Refused; |
1240 | dq.dh->qr=true; | |
1241 | return true; | |
477c86a0 | 1242 | case DNSAction::Action::Truncate: |
8477236d | 1243 | if(!dq.tcp) { |
701f690b | 1244 | updateBlockStats(); |
8477236d | 1245 | |
348ef1c6 | 1246 | vinfolog("Query from %s for %s truncated because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toLogString()); |
8477236d | 1247 | dq.dh->tc = true; |
1248 | dq.dh->qr = true; | |
955b9377 RG |
1249 | dq.dh->ra = dq.dh->rd; |
1250 | dq.dh->aa = false; | |
1251 | dq.dh->ad = false; | |
8477236d | 1252 | return true; |
1253 | } | |
1254 | else { | |
348ef1c6 | 1255 | vinfolog("Query from %s for %s over TCP *not* truncated because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toLogString()); |
8477236d | 1256 | } |
477c86a0 | 1257 | break; |
3d60b39a | 1258 | case DNSAction::Action::NoRecurse: |
1259 | updateBlockStats(); | |
1260 | vinfolog("Query from %s setting rd=0 because of dynamic block", dq.remote->toStringWithPort()); | |
1261 | dq.dh->rd = false; | |
1262 | return true; | |
477c86a0 | 1263 | default: |
701f690b | 1264 | updateBlockStats(); |
348ef1c6 | 1265 | vinfolog("Query from %s for %s dropped because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toLogString()); |
dd46e5e3 RG |
1266 | return false; |
1267 | } | |
71c94675 | 1268 | } |
1269 | } | |
1270 | ||
e91084ce RG |
1271 | DNSAction::Action action=DNSAction::Action::None; |
1272 | string ruleresult; | |
2a28db86 | 1273 | bool drop = false; |
0beaa5c8 | 1274 | for(const auto& lr : *holders.rulactions) { |
4d5959e6 RG |
1275 | if(lr.d_rule->matches(&dq)) { |
1276 | lr.d_rule->d_matches++; | |
1277 | action=(*lr.d_action)(&dq, &ruleresult); | |
2a28db86 | 1278 | if (processRulesResult(action, dq, ruleresult, drop)) { |
3d60b39a | 1279 | break; |
e91084ce RG |
1280 | } |
1281 | } | |
1282 | } | |
1283 | ||
2a28db86 RG |
1284 | if (drop) { |
1285 | return false; | |
1286 | } | |
1287 | ||
e91084ce RG |
1288 | return true; |
1289 | } | |
1290 | ||
fbf14b03 | 1291 | ssize_t udpClientSendRequestToBackend(const std::shared_ptr<DownstreamState>& ss, const int sd, const char* request, const size_t requestLen, bool healthCheck) |
fbe2a2e0 | 1292 | { |
b58f08e5 RG |
1293 | ssize_t result; |
1294 | ||
fbe2a2e0 | 1295 | if (ss->sourceItf == 0) { |
b58f08e5 RG |
1296 | result = send(sd, request, requestLen, 0); |
1297 | } | |
1298 | else { | |
1299 | struct msghdr msgh; | |
1300 | struct iovec iov; | |
7bec330a | 1301 | cmsgbuf_aligned cbuf; |
a2353842 | 1302 | ComboAddress remote(ss->remote); |
7bec330a OM |
1303 | fillMSGHdr(&msgh, &iov, &cbuf, sizeof(cbuf), const_cast<char*>(request), requestLen, &remote); |
1304 | addCMsgSrcAddr(&msgh, &cbuf, &ss->sourceAddr, ss->sourceItf); | |
b58f08e5 | 1305 | result = sendmsg(sd, &msgh, 0); |
fbe2a2e0 RG |
1306 | } |
1307 | ||
b58f08e5 RG |
1308 | if (result == -1) { |
1309 | int savederrno = errno; | |
1310 | vinfolog("Error sending request to backend %s: %d", ss->remote.toStringWithPort(), savederrno); | |
1311 | ||
1312 | /* This might sound silly, but on Linux send() might fail with EINVAL | |
1b126225 RG |
1313 | if the interface the socket was bound to doesn't exist anymore. |
1314 | We don't want to reconnect the real socket if the healthcheck failed, | |
1315 | because it's not using the same socket. | |
1316 | */ | |
1317 | if (!healthCheck && (savederrno == EINVAL || savederrno == ENODEV)) { | |
b58f08e5 RG |
1318 | ss->reconnect(); |
1319 | } | |
fbe2a2e0 RG |
1320 | } |
1321 | ||
b58f08e5 | 1322 | return result; |
fbe2a2e0 RG |
1323 | } |
1324 | ||
0beaa5c8 | 1325 | static bool isUDPQueryAcceptable(ClientState& cs, LocalHolders& holders, const struct msghdr* msgh, const ComboAddress& remote, ComboAddress& dest) |
24d5cb00 | 1326 | { |
0beaa5c8 RG |
1327 | if (msgh->msg_flags & MSG_TRUNC) { |
1328 | /* message was too large for our buffer */ | |
1329 | vinfolog("Dropping message too large for our buffer"); | |
cb167afd | 1330 | ++g_stats.nonCompliantQueries; |
0beaa5c8 RG |
1331 | return false; |
1332 | } | |
a4652d55 | 1333 | |
0beaa5c8 RG |
1334 | if(!holders.acl->match(remote)) { |
1335 | vinfolog("Query from %s dropped because of ACL", remote.toStringWithPort()); | |
cb167afd | 1336 | ++g_stats.aclDrops; |
0beaa5c8 | 1337 | return false; |
2b3eefc3 | 1338 | } |
2b3eefc3 | 1339 | |
0beaa5c8 | 1340 | cs.queries++; |
cb167afd | 1341 | ++g_stats.queries; |
2b3eefc3 | 1342 | |
0beaa5c8 RG |
1343 | if (HarvestDestinationAddress(msgh, &dest)) { |
1344 | /* we don't get the port, only the address */ | |
1345 | dest.sin4.sin_port = cs.local.sin4.sin_port; | |
1346 | } | |
1347 | else { | |
1348 | dest.sin4.sin_family = 0; | |
2b3eefc3 | 1349 | } |
549d63c9 | 1350 | |
0beaa5c8 RG |
1351 | return true; |
1352 | } | |
1353 | ||
4ab01344 | 1354 | boost::optional<std::vector<uint8_t>> checkDNSCryptQuery(const ClientState& cs, const char* query, uint16_t& len, std::shared_ptr<DNSCryptQuery>& dnsCryptQuery, time_t now, bool tcp) |
0beaa5c8 RG |
1355 | { |
1356 | if (cs.dnscryptCtx) { | |
7129b5c4 | 1357 | #ifdef HAVE_DNSCRYPT |
0beaa5c8 RG |
1358 | vector<uint8_t> response; |
1359 | uint16_t decryptedQueryLen = 0; | |
2b3eefc3 | 1360 | |
43234e76 | 1361 | dnsCryptQuery = std::make_shared<DNSCryptQuery>(cs.dnscryptCtx); |
0beaa5c8 | 1362 | |
4ab01344 | 1363 | bool decrypted = handleDNSCryptQuery(const_cast<char*>(query), len, dnsCryptQuery, &decryptedQueryLen, tcp, now, response); |
0beaa5c8 RG |
1364 | |
1365 | if (!decrypted) { | |
1366 | if (response.size() > 0) { | |
4ab01344 | 1367 | return response; |
2b3eefc3 | 1368 | } |
4ab01344 | 1369 | throw std::runtime_error("Unable to decrypt DNSCrypt query, dropping."); |
0beaa5c8 | 1370 | } |
2b3eefc3 | 1371 | |
0beaa5c8 | 1372 | len = decryptedQueryLen; |
7129b5c4 | 1373 | #endif /* HAVE_DNSCRYPT */ |
0beaa5c8 | 1374 | } |
4ab01344 | 1375 | return boost::none; |
0beaa5c8 | 1376 | } |
2b3eefc3 | 1377 | |
0beaa5c8 RG |
1378 | bool checkQueryHeaders(const struct dnsheader* dh) |
1379 | { | |
1380 | if (dh->qr) { // don't respond to responses | |
cb167afd | 1381 | ++g_stats.nonCompliantQueries; |
0beaa5c8 RG |
1382 | return false; |
1383 | } | |
2b3eefc3 | 1384 | |
0beaa5c8 | 1385 | if (dh->qdcount == 0) { |
cb167afd | 1386 | ++g_stats.emptyQueries; |
0beaa5c8 RG |
1387 | return false; |
1388 | } | |
0ba5eecf | 1389 | |
0beaa5c8 | 1390 | if (dh->rd) { |
cb167afd | 1391 | ++g_stats.rdQueries; |
0beaa5c8 | 1392 | } |
e91084ce | 1393 | |
0beaa5c8 RG |
1394 | return true; |
1395 | } | |
963bef8d | 1396 | |
0beaa5c8 | 1397 | #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) |
e59405cd | 1398 | static void queueResponse(const ClientState& cs, const char* response, uint16_t responseLen, const ComboAddress& dest, const ComboAddress& remote, struct mmsghdr& outMsg, struct iovec* iov, cmsgbuf_aligned* cbuf) |
0beaa5c8 RG |
1399 | { |
1400 | outMsg.msg_len = 0; | |
1401 | fillMSGHdr(&outMsg.msg_hdr, iov, nullptr, 0, const_cast<char*>(response), responseLen, const_cast<ComboAddress*>(&remote)); | |
11e1e08b | 1402 | |
0beaa5c8 RG |
1403 | if (dest.sin4.sin_family == 0) { |
1404 | outMsg.msg_hdr.msg_control = nullptr; | |
1405 | } | |
1406 | else { | |
1407 | addCMsgSrcAddr(&outMsg.msg_hdr, cbuf, &dest, 0); | |
1408 | } | |
1409 | } | |
1410 | #endif /* defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) */ | |
43eeadc1 | 1411 | |
3e425868 RG |
1412 | /* self-generated responses or cache hits */ |
1413 | static bool prepareOutgoingResponse(LocalHolders& holders, ClientState& cs, DNSQuestion& dq, bool cacheHit) | |
54aaa82b | 1414 | { |
3e425868 | 1415 | DNSResponse dr(dq.qname, dq.qtype, dq.qclass, dq.consumed, dq.local, dq.remote, reinterpret_cast<dnsheader*>(dq.dh), dq.size, dq.len, dq.tcp, dq.queryTime); |
4ab01344 | 1416 | |
54aaa82b | 1417 | #ifdef HAVE_PROTOBUF |
1418 | dr.uniqueId = dq.uniqueId; | |
1419 | #endif | |
1420 | dr.qTag = dq.qTag; | |
3e425868 | 1421 | dr.delayMsec = dq.delayMsec; |
54aaa82b | 1422 | |
3e425868 RG |
1423 | if (!applyRulesToResponse(cacheHit ? holders.cacheHitRespRulactions : holders.selfAnsweredRespRulactions, dr)) { |
1424 | return false; | |
54aaa82b | 1425 | } |
1426 | ||
3e425868 RG |
1427 | /* in case a rule changed it */ |
1428 | dq.delayMsec = dr.delayMsec; | |
1429 | ||
54aaa82b | 1430 | #ifdef HAVE_DNSCRYPT |
7129b5c4 | 1431 | if (!cs.muted) { |
3e425868 RG |
1432 | if (!encryptResponse(reinterpret_cast<char*>(dq.dh), &dq.len, dq.size, dq.tcp, dq.dnsCryptQuery, nullptr, nullptr)) { |
1433 | return false; | |
54aaa82b | 1434 | } |
54aaa82b | 1435 | } |
7129b5c4 | 1436 | #endif /* HAVE_DNSCRYPT */ |
54aaa82b | 1437 | |
389d903a RG |
1438 | if (cacheHit) { |
1439 | ++g_stats.cacheHits; | |
1440 | } | |
3e425868 | 1441 | |
61d10a4d MH |
1442 | switch (dr.dh->rcode) { |
1443 | case RCode::NXDomain: | |
1444 | ++g_stats.frontendNXDomain; | |
1445 | break; | |
1446 | case RCode::ServFail: | |
1447 | ++g_stats.frontendServFail; | |
1448 | break; | |
1449 | case RCode::NoError: | |
1450 | ++g_stats.frontendNoError; | |
1451 | break; | |
1452 | } | |
3e425868 | 1453 | |
54aaa82b | 1454 | doLatencyStats(0); // we're not going to measure this |
3e425868 | 1455 | return true; |
54aaa82b | 1456 | } |
1457 | ||
3e425868 | 1458 | ProcessQueryResult processQuery(DNSQuestion& dq, ClientState& cs, LocalHolders& holders, std::shared_ptr<DownstreamState>& selectedBackend) |
0beaa5c8 | 1459 | { |
4ab01344 | 1460 | const uint16_t queryId = ntohs(dq.dh->id); |
2b3eefc3 | 1461 | |
0beaa5c8 | 1462 | try { |
43234e76 RG |
1463 | /* we need an accurate ("real") value for the response and |
1464 | to store into the IDS, but not for insertion into the | |
1465 | rings for example */ | |
43234e76 RG |
1466 | struct timespec now; |
1467 | gettime(&now); | |
2efd427d | 1468 | |
2a28db86 | 1469 | if (!applyRulesToQuery(holders, dq, now)) { |
3e425868 | 1470 | return ProcessQueryResult::Drop; |
0beaa5c8 | 1471 | } |
b63add03 | 1472 | |
0beaa5c8 | 1473 | if(dq.dh->qr) { // something turned it into a response |
4ab01344 | 1474 | fixUpQueryTurnedResponse(dq, dq.origFlags); |
0beaa5c8 | 1475 | |
3e425868 RG |
1476 | if (!prepareOutgoingResponse(holders, cs, dq, false)) { |
1477 | return ProcessQueryResult::Drop; | |
22b2b326 | 1478 | } |
5f504638 | 1479 | |
3e425868 | 1480 | ++g_stats.selfAnswered; |
7fc95193 | 1481 | ++cs.responses; |
3e425868 | 1482 | return ProcessQueryResult::SendAnswer; |
0beaa5c8 RG |
1483 | } |
1484 | ||
2a28db86 | 1485 | std::shared_ptr<ServerPool> serverPool = getPool(*holders.pools, dq.poolname); |
4ab01344 | 1486 | dq.packetCache = serverPool->packetCache; |
a1b1a29d | 1487 | auto policy = *(holders.policy); |
0beaa5c8 | 1488 | if (serverPool->policy != nullptr) { |
a1b1a29d | 1489 | policy = *(serverPool->policy); |
0beaa5c8 | 1490 | } |
a1b1a29d RG |
1491 | auto servers = serverPool->getServers(); |
1492 | if (policy.isLua) { | |
0beaa5c8 | 1493 | std::lock_guard<std::mutex> lock(g_luamutex); |
3e425868 | 1494 | selectedBackend = policy.policy(servers, &dq); |
a1b1a29d RG |
1495 | } |
1496 | else { | |
3e425868 | 1497 | selectedBackend = policy.policy(servers, &dq); |
0beaa5c8 | 1498 | } |
228e4fe8 | 1499 | |
9837850d | 1500 | uint16_t cachedResponseSize = dq.size; |
3e425868 | 1501 | uint32_t allowExpired = selectedBackend ? 0 : g_staleCacheEntriesTTL; |
9837850d | 1502 | |
4ab01344 RG |
1503 | if (dq.packetCache && !dq.skipCache) { |
1504 | dq.dnssecOK = (getEDNSZ(dq) & EDNS_HEADER_FLAG_DO); | |
1ef18cab | 1505 | } |
1506 | ||
3e425868 | 1507 | if (dq.useECS && ((selectedBackend && selectedBackend->useECS) || (!selectedBackend && serverPool->getECS()))) { |
389d903a | 1508 | // we special case our cache in case a downstream explicitly gave us a universally valid response with a 0 scope |
2d0cefab RG |
1509 | // we need ECS parsing (parseECS) to be true so we can be sure that the initial incoming query did not have an existing |
1510 | // ECS option, which would make it unsuitable for the zero-scope feature. | |
3e425868 | 1511 | if (dq.packetCache && !dq.skipCache && (!selectedBackend || !selectedBackend->disableZeroScope) && dq.packetCache->isECSParsingEnabled()) { |
4ab01344 | 1512 | if (dq.packetCache->get(dq, dq.consumed, dq.dh->id, reinterpret_cast<char*>(dq.dh), &cachedResponseSize, &dq.cacheKeyNoECS, dq.subnet, dq.dnssecOK, allowExpired)) { |
3e425868 RG |
1513 | dq.len = cachedResponseSize; |
1514 | ||
1515 | if (!prepareOutgoingResponse(holders, cs, dq, true)) { | |
1516 | return ProcessQueryResult::Drop; | |
1517 | } | |
1518 | ||
1519 | return ProcessQueryResult::SendAnswer; | |
389d903a RG |
1520 | } |
1521 | ||
4ab01344 | 1522 | if (!dq.subnet) { |
389d903a | 1523 | /* there was no existing ECS on the query, enable the zero-scope feature */ |
4ab01344 | 1524 | dq.useZeroScope = true; |
389d903a | 1525 | } |
9837850d | 1526 | } |
389d903a | 1527 | |
4ab01344 RG |
1528 | if (!handleEDNSClientSubnet(dq, &(dq.ednsAdded), &(dq.ecsAdded), g_preserveTrailingData)) { |
1529 | vinfolog("Dropping query from %s because we couldn't insert the ECS value", dq.remote->toStringWithPort()); | |
3e425868 | 1530 | return ProcessQueryResult::Drop; |
886e2cf2 | 1531 | } |
0beaa5c8 | 1532 | } |
886e2cf2 | 1533 | |
4ab01344 RG |
1534 | if (dq.packetCache && !dq.skipCache) { |
1535 | if (dq.packetCache->get(dq, dq.consumed, dq.dh->id, reinterpret_cast<char*>(dq.dh), &cachedResponseSize, &dq.cacheKey, dq.subnet, dq.dnssecOK, allowExpired)) { | |
3e425868 RG |
1536 | dq.len = cachedResponseSize; |
1537 | ||
1538 | if (!prepareOutgoingResponse(holders, cs, dq, true)) { | |
1539 | return ProcessQueryResult::Drop; | |
1540 | } | |
1541 | ||
1542 | return ProcessQueryResult::SendAnswer; | |
886e2cf2 | 1543 | } |
cb167afd | 1544 | ++g_stats.cacheMisses; |
0beaa5c8 | 1545 | } |
886e2cf2 | 1546 | |
3e425868 | 1547 | if(!selectedBackend) { |
cb167afd | 1548 | ++g_stats.noPolicy; |
26a3cdb7 | 1549 | |
348ef1c6 | 1550 | vinfolog("%s query for %s|%s from %s, no policy applied", g_servFailOnNoPolicy ? "ServFailed" : "Dropped", dq.qname->toLogString(), QType(dq.qtype).getName(), dq.remote->toStringWithPort()); |
3e425868 | 1551 | if (g_servFailOnNoPolicy) { |
4ab01344 | 1552 | restoreFlags(dq.dh, dq.origFlags); |
26a3cdb7 | 1553 | |
0beaa5c8 RG |
1554 | dq.dh->rcode = RCode::ServFail; |
1555 | dq.dh->qr = true; | |
26a3cdb7 | 1556 | |
3e425868 RG |
1557 | if (!prepareOutgoingResponse(holders, cs, dq, false)) { |
1558 | return ProcessQueryResult::Drop; | |
1559 | } | |
be6c318f | 1560 | // no response-only statistics counter to update. |
3e425868 | 1561 | return ProcessQueryResult::SendAnswer; |
1ea747c0 | 1562 | } |
3e425868 RG |
1563 | |
1564 | return ProcessQueryResult::Drop; | |
0beaa5c8 | 1565 | } |
1ea747c0 | 1566 | |
3e425868 RG |
1567 | if (dq.addXPF && selectedBackend->xpfRRCode != 0) { |
1568 | addXPF(dq, selectedBackend->xpfRRCode, g_preserveTrailingData); | |
5cc8371b RG |
1569 | } |
1570 | ||
3e425868 RG |
1571 | selectedBackend->queries++; |
1572 | return ProcessQueryResult::PassToBackend; | |
4ab01344 RG |
1573 | } |
1574 | catch(const std::exception& e){ | |
1575 | vinfolog("Got an error while parsing a %s query from %s, id %d: %s", (dq.tcp ? "TCP" : "UDP"), dq.remote->toStringWithPort(), queryId, e.what()); | |
4ab01344 | 1576 | } |
3e425868 | 1577 | return ProcessQueryResult::Drop; |
4ab01344 RG |
1578 | } |
1579 | ||
7bec330a | 1580 | static void processUDPQuery(ClientState& cs, LocalHolders& holders, const struct msghdr* msgh, const ComboAddress& remote, ComboAddress& dest, char* query, uint16_t len, size_t queryBufferSize, struct mmsghdr* responsesVect, unsigned int* queuedResponses, struct iovec* respIOV, cmsgbuf_aligned* respCBuf) |
4ab01344 RG |
1581 | { |
1582 | assert(responsesVect == nullptr || (queuedResponses != nullptr && respIOV != nullptr && respCBuf != nullptr)); | |
1583 | uint16_t queryId = 0; | |
1584 | ||
1585 | try { | |
1586 | if (!isUDPQueryAcceptable(cs, holders, msgh, remote, dest)) { | |
1587 | return; | |
1588 | } | |
1589 | ||
1590 | /* we need an accurate ("real") value for the response and | |
1591 | to store into the IDS, but not for insertion into the | |
1592 | rings for example */ | |
1593 | struct timespec queryRealTime; | |
4ab01344 RG |
1594 | gettime(&queryRealTime, true); |
1595 | ||
1596 | std::shared_ptr<DNSCryptQuery> dnsCryptQuery = nullptr; | |
4ab01344 RG |
1597 | auto dnsCryptResponse = checkDNSCryptQuery(cs, query, len, dnsCryptQuery, queryRealTime.tv_sec, false); |
1598 | if (dnsCryptResponse) { | |
1599 | sendUDPResponse(cs.udpFD, reinterpret_cast<char*>(dnsCryptResponse->data()), static_cast<uint16_t>(dnsCryptResponse->size()), 0, dest, remote); | |
1600 | return; | |
1601 | } | |
4ab01344 RG |
1602 | |
1603 | struct dnsheader* dh = reinterpret_cast<struct dnsheader*>(query); | |
1604 | queryId = ntohs(dh->id); | |
1605 | ||
1606 | if (!checkQueryHeaders(dh)) { | |
1607 | return; | |
1608 | } | |
1609 | ||
1610 | uint16_t qtype, qclass; | |
1611 | unsigned int consumed = 0; | |
1612 | DNSName qname(query, len, sizeof(dnsheader), false, &qtype, &qclass, &consumed); | |
1613 | DNSQuestion dq(&qname, qtype, qclass, consumed, dest.sin4.sin_family != 0 ? &dest : &cs.local, &remote, dh, queryBufferSize, len, false, &queryRealTime); | |
1614 | dq.dnsCryptQuery = std::move(dnsCryptQuery); | |
3e425868 RG |
1615 | std::shared_ptr<DownstreamState> ss{nullptr}; |
1616 | auto result = processQuery(dq, cs, holders, ss); | |
4ab01344 | 1617 | |
3e425868 RG |
1618 | if (result == ProcessQueryResult::Drop) { |
1619 | return; | |
1620 | } | |
1621 | ||
1622 | if (result == ProcessQueryResult::SendAnswer) { | |
4ab01344 | 1623 | #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) |
3e425868 RG |
1624 | if (dq.delayMsec == 0 && responsesVect != nullptr) { |
1625 | queueResponse(cs, reinterpret_cast<char*>(dq.dh), dq.len, *dq.local, *dq.remote, responsesVect[*queuedResponses], respIOV, respCBuf); | |
4ab01344 RG |
1626 | (*queuedResponses)++; |
1627 | return; | |
1628 | } | |
1629 | #endif /* defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) */ | |
6d192f51 RG |
1630 | /* we use dest, always, because we don't want to use the listening address to send a response since it could be 0.0.0.0 */ |
1631 | sendUDPResponse(cs.udpFD, reinterpret_cast<char*>(dq.dh), dq.len, dq.delayMsec, dest, *dq.remote); | |
3e425868 RG |
1632 | return; |
1633 | } | |
4ab01344 | 1634 | |
3e425868 | 1635 | if (result != ProcessQueryResult::PassToBackend || ss == nullptr) { |
4ab01344 RG |
1636 | return; |
1637 | } | |
11e1e08b | 1638 | |
0beaa5c8 RG |
1639 | unsigned int idOffset = (ss->idOffset++) % ss->idStates.size(); |
1640 | IDState* ids = &ss->idStates[idOffset]; | |
1641 | ids->age = 0; | |
0956c5c5 | 1642 | DOHUnit* du = nullptr; |
c9ba8478 | 1643 | |
9bd1a882 RG |
1644 | /* that means that the state was in use, possibly with an allocated |
1645 | DOHUnit that we will need to handle, but we can't touch it before | |
1646 | confirming that we now own this state */ | |
311f19d5 | 1647 | if (ids->isInUse()) { |
0956c5c5 RG |
1648 | du = ids->du; |
1649 | } | |
c9ba8478 | 1650 | |
a9489723 | 1651 | /* we atomically replace the value, we now own this state */ |
311f19d5 RG |
1652 | if (!ids->markAsUsed()) { |
1653 | /* the state was not in use. | |
9bd1a882 | 1654 | we reset 'du' because it might have still been in use when we read it. */ |
0956c5c5 | 1655 | du = nullptr; |
fbf14b03 | 1656 | ++ss->outstanding; |
71b86bd8 | 1657 | } |
0beaa5c8 | 1658 | else { |
9bd1a882 RG |
1659 | /* we are reusing a state, no change in outstanding but if there was an existing DOHUnit we need |
1660 | to handle it because it's about to be overwritten. */ | |
a9489723 | 1661 | ids->du = nullptr; |
fbf14b03 | 1662 | ++ss->reuseds; |
cb167afd | 1663 | ++g_stats.downstreamTimeouts; |
0956c5c5 | 1664 | handleDOHTimeout(du); |
0beaa5c8 | 1665 | } |
0e41337b | 1666 | |
0beaa5c8 | 1667 | ids->cs = &cs; |
a9489723 | 1668 | ids->origFD = cs.udpFD; |
0beaa5c8 | 1669 | ids->origID = dh->id; |
d0ae6360 | 1670 | setIDStateFromDNSQuestion(*ids, dq, std::move(qname)); |
0beaa5c8 RG |
1671 | |
1672 | /* If we couldn't harvest the real dest addr, still | |
1673 | write down the listening addr since it will be useful | |
1674 | (especially if it's not an 'any' one). | |
1675 | We need to keep track of which one it is since we may | |
1676 | want to use the real but not the listening addr to reply. | |
1677 | */ | |
1678 | if (dest.sin4.sin_family != 0) { | |
1679 | ids->origDest = dest; | |
1680 | ids->destHarvested = true; | |
1681 | } | |
1682 | else { | |
1683 | ids->origDest = cs.local; | |
1684 | ids->destHarvested = false; | |
1685 | } | |
7129b5c4 | 1686 | |
0beaa5c8 | 1687 | dh->id = idOffset; |
ca404e94 | 1688 | |
38069e7e | 1689 | int fd = pickBackendSocketForSending(ss); |
150105a2 | 1690 | ssize_t ret = udpClientSendRequestToBackend(ss, fd, query, dq.len); |
11e1e08b | 1691 | |
0beaa5c8 | 1692 | if(ret < 0) { |
fbf14b03 | 1693 | ++ss->sendErrors; |
cb167afd | 1694 | ++g_stats.downstreamSendErrors; |
0beaa5c8 | 1695 | } |
ca404e94 | 1696 | |
348ef1c6 | 1697 | vinfolog("Got query for %s|%s from %s, relayed to %s", ids->qname.toLogString(), QType(ids->qtype).getName(), remote.toStringWithPort(), ss->getName()); |
0beaa5c8 RG |
1698 | } |
1699 | catch(const std::exception& e){ | |
1700 | vinfolog("Got an error in UDP question thread while parsing a query from %s, id %d: %s", remote.toStringWithPort(), queryId, e.what()); | |
1701 | } | |
1702 | } | |
1703 | ||
1704 | #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) | |
1705 | static void MultipleMessagesUDPClientThread(ClientState* cs, LocalHolders& holders) | |
1706 | { | |
1707 | struct MMReceiver | |
1708 | { | |
8179b6d6 | 1709 | char packet[s_maxPacketCacheEntrySize]; |
0beaa5c8 RG |
1710 | ComboAddress remote; |
1711 | ComboAddress dest; | |
1712 | struct iovec iov; | |
392966bb OM |
1713 | /* used by HarvestDestinationAddress */ |
1714 | cmsgbuf_aligned cbuf; | |
0beaa5c8 RG |
1715 | }; |
1716 | const size_t vectSize = g_udpVectorSize; | |
1717 | /* the actual buffer is larger because: | |
1718 | - we may have to add EDNS and/or ECS | |
1719 | - we use it for self-generated responses (from rule or cache) | |
1720 | but we only accept incoming payloads up to that size | |
1721 | */ | |
1722 | static_assert(s_udpIncomingBufferSize <= sizeof(MMReceiver::packet), "the incoming buffer size should not be larger than sizeof(MMReceiver::packet)"); | |
1723 | ||
1724 | auto recvData = std::unique_ptr<MMReceiver[]>(new MMReceiver[vectSize]); | |
1725 | auto msgVec = std::unique_ptr<struct mmsghdr[]>(new struct mmsghdr[vectSize]); | |
1726 | auto outMsgVec = std::unique_ptr<struct mmsghdr[]>(new struct mmsghdr[vectSize]); | |
1727 | ||
1728 | /* initialize the structures needed to receive our messages */ | |
1729 | for (size_t idx = 0; idx < vectSize; idx++) { | |
1730 | recvData[idx].remote.sin4.sin_family = cs->local.sin4.sin_family; | |
7bec330a | 1731 | fillMSGHdr(&msgVec[idx].msg_hdr, &recvData[idx].iov, &recvData[idx].cbuf, sizeof(recvData[idx].cbuf), recvData[idx].packet, s_udpIncomingBufferSize, &recvData[idx].remote); |
0beaa5c8 RG |
1732 | } |
1733 | ||
1734 | /* go now */ | |
1735 | for(;;) { | |
1736 | ||
1737 | /* reset the IO vector, since it's also used to send the vector of responses | |
1738 | to avoid having to copy the data around */ | |
1739 | for (size_t idx = 0; idx < vectSize; idx++) { | |
1740 | recvData[idx].iov.iov_base = recvData[idx].packet; | |
1741 | recvData[idx].iov.iov_len = sizeof(recvData[idx].packet); | |
5f504638 | 1742 | } |
0beaa5c8 RG |
1743 | |
1744 | /* block until we have at least one message ready, but return | |
1745 | as many as possible to save the syscall costs */ | |
1746 | int msgsGot = recvmmsg(cs->udpFD, msgVec.get(), vectSize, MSG_WAITFORONE | MSG_TRUNC, nullptr); | |
1747 | ||
1748 | if (msgsGot <= 0) { | |
a702a96c | 1749 | vinfolog("Getting UDP messages via recvmmsg() failed with: %s", stringerror()); |
0beaa5c8 | 1750 | continue; |
773470ca | 1751 | } |
0beaa5c8 RG |
1752 | |
1753 | unsigned int msgsToSend = 0; | |
1754 | ||
1755 | /* process the received messages */ | |
1756 | for (int msgIdx = 0; msgIdx < msgsGot; msgIdx++) { | |
1757 | const struct msghdr* msgh = &msgVec[msgIdx].msg_hdr; | |
1758 | unsigned int got = msgVec[msgIdx].msg_len; | |
1759 | const ComboAddress& remote = recvData[msgIdx].remote; | |
1760 | ||
33d01bbd | 1761 | if (static_cast<size_t>(got) < sizeof(struct dnsheader)) { |
3e425868 | 1762 | ++g_stats.nonCompliantQueries; |
0beaa5c8 RG |
1763 | continue; |
1764 | } | |
1765 | ||
7bec330a | 1766 | processUDPQuery(*cs, holders, msgh, remote, recvData[msgIdx].dest, recvData[msgIdx].packet, static_cast<uint16_t>(got), sizeof(recvData[msgIdx].packet), outMsgVec.get(), &msgsToSend, &recvData[msgIdx].iov, &recvData[msgIdx].cbuf); |
0beaa5c8 | 1767 | |
2b3eefc3 | 1768 | } |
0beaa5c8 RG |
1769 | |
1770 | /* immediate (not delayed or sent to a backend) responses (mostly from a rule, dynamic block | |
1771 | or the cache) can be sent in batch too */ | |
1772 | ||
1773 | if (msgsToSend > 0 && msgsToSend <= static_cast<unsigned int>(msgsGot)) { | |
1774 | int sent = sendmmsg(cs->udpFD, outMsgVec.get(), msgsToSend, 0); | |
1775 | ||
2b3eefc3 | 1776 | if (sent < 0 || static_cast<unsigned int>(sent) != msgsToSend) { |
a702a96c | 1777 | vinfolog("Error sending responses with sendmmsg() (%d on %u): %s", sent, msgsToSend, stringerror()); |
2b3eefc3 | 1778 | } |
2b3eefc3 | 1779 | } |
0beaa5c8 | 1780 | |
24d5cb00 | 1781 | } |
0beaa5c8 RG |
1782 | } |
1783 | #endif /* defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) */ | |
1784 | ||
1785 | // listens to incoming queries, sends out to downstream servers, noting the intended return path | |
9b73b71c | 1786 | static void udpClientThread(ClientState* cs) |
0beaa5c8 RG |
1787 | try |
1788 | { | |
519f5484 | 1789 | setThreadName("dnsdist/udpClie"); |
0beaa5c8 RG |
1790 | LocalHolders holders; |
1791 | ||
1792 | #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) | |
1793 | if (g_udpVectorSize > 1) { | |
1794 | MultipleMessagesUDPClientThread(cs, holders); | |
1795 | ||
1796 | } | |
1797 | else | |
1798 | #endif /* defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) */ | |
1799 | { | |
8179b6d6 | 1800 | char packet[s_maxPacketCacheEntrySize]; |
0beaa5c8 RG |
1801 | /* the actual buffer is larger because: |
1802 | - we may have to add EDNS and/or ECS | |
1803 | - we use it for self-generated responses (from rule or cache) | |
1804 | but we only accept incoming payloads up to that size | |
1805 | */ | |
1806 | static_assert(s_udpIncomingBufferSize <= sizeof(packet), "the incoming buffer size should not be larger than sizeof(MMReceiver::packet)"); | |
1807 | struct msghdr msgh; | |
1808 | struct iovec iov; | |
1809 | /* used by HarvestDestinationAddress */ | |
7bec330a | 1810 | cmsgbuf_aligned cbuf; |
0beaa5c8 RG |
1811 | |
1812 | ComboAddress remote; | |
1813 | ComboAddress dest; | |
1814 | remote.sin4.sin_family = cs->local.sin4.sin_family; | |
be7dec02 | 1815 | fillMSGHdr(&msgh, &iov, &cbuf, sizeof(cbuf), packet, s_udpIncomingBufferSize, &remote); |
0beaa5c8 RG |
1816 | |
1817 | for(;;) { | |
1818 | ssize_t got = recvmsg(cs->udpFD, &msgh, 0); | |
1819 | ||
1820 | if (got < 0 || static_cast<size_t>(got) < sizeof(struct dnsheader)) { | |
cb167afd | 1821 | ++g_stats.nonCompliantQueries; |
0beaa5c8 RG |
1822 | continue; |
1823 | } | |
1824 | ||
be7dec02 | 1825 | processUDPQuery(*cs, holders, &msgh, remote, dest, packet, static_cast<uint16_t>(got), sizeof(packet), nullptr, nullptr, nullptr, nullptr); |
0beaa5c8 RG |
1826 | } |
1827 | } | |
24d5cb00 | 1828 | } |
2b3eefc3 | 1829 | catch(const std::exception &e) |
a4652d55 | 1830 | { |
1831 | errlog("UDP client thread died because of exception: %s", e.what()); | |
a4652d55 | 1832 | } |
2b3eefc3 | 1833 | catch(const PDNSException &e) |
a4652d55 | 1834 | { |
1835 | errlog("UDP client thread died because of PowerDNS exception: %s", e.reason); | |
a4652d55 | 1836 | } |
1837 | catch(...) | |
1838 | { | |
1839 | errlog("UDP client thread died because of an exception: %s", "unknown"); | |
a4652d55 | 1840 | } |
24d5cb00 | 1841 | |
555970c9 RG |
1842 | uint16_t getRandomDNSID() |
1843 | { | |
1844 | #ifdef HAVE_LIBSODIUM | |
dd9c8246 | 1845 | return randombytes_uniform(65536); |
555970c9 RG |
1846 | #else |
1847 | return (random() % 65536); | |
1848 | #endif | |
1849 | } | |
1850 | ||
6c1ca990 | 1851 | uint64_t g_maxTCPClientThreads{10}; |
886e2cf2 | 1852 | std::atomic<uint16_t> g_cacheCleaningDelay{60}; |
f65ea0c2 | 1853 | std::atomic<uint16_t> g_cacheCleaningPercentage{100}; |
e41f8165 | 1854 | |
9b73b71c | 1855 | void maintThread() |
886e2cf2 | 1856 | { |
519f5484 | 1857 | setThreadName("dnsdist/main"); |
886e2cf2 RG |
1858 | int interval = 1; |
1859 | size_t counter = 0; | |
5f3ea719 | 1860 | int32_t secondsToWaitLog = 0; |
886e2cf2 RG |
1861 | |
1862 | for(;;) { | |
1863 | sleep(interval); | |
1864 | ||
069e59db | 1865 | { |
1866 | std::lock_guard<std::mutex> lock(g_luamutex); | |
5f3ea719 PL |
1867 | auto f = g_lua.readVariable<boost::optional<std::function<void()> > >("maintenance"); |
1868 | if(f) { | |
1869 | try { | |
1870 | (*f)(); | |
1871 | secondsToWaitLog = 0; | |
1872 | } | |
1873 | catch(std::exception &e) { | |
1874 | if (secondsToWaitLog <= 0) { | |
1875 | infolog("Error during execution of maintenance function: %s", e.what()); | |
1876 | secondsToWaitLog = 61; | |
1877 | } | |
1878 | secondsToWaitLog -= interval; | |
1879 | } | |
1880 | } | |
069e59db | 1881 | } |
886e2cf2 RG |
1882 | |
1883 | counter++; | |
1884 | if (counter >= g_cacheCleaningDelay) { | |
c1b81381 RG |
1885 | /* keep track, for each cache, of whether we should keep |
1886 | expired entries */ | |
1887 | std::map<std::shared_ptr<DNSDistPacketCache>, bool> caches; | |
1888 | ||
1889 | /* gather all caches actually used by at least one pool, and see | |
1890 | if something prevents us from cleaning the expired entries */ | |
a9c2e4ab | 1891 | auto localPools = g_pools.getLocal(); |
a9c2e4ab | 1892 | for (const auto& entry : *localPools) { |
c1b81381 RG |
1893 | auto& pool = entry.second; |
1894 | ||
1895 | auto packetCache = pool->packetCache; | |
1896 | if (!packetCache) { | |
1897 | continue; | |
1898 | } | |
1899 | ||
1900 | auto pair = caches.insert({packetCache, false}); | |
1901 | auto& iter = pair.first; | |
1902 | /* if we need to keep stale data for this cache (ie, not clear | |
1903 | expired entries when at least one pool using this cache | |
1904 | has all its backends down) */ | |
1905 | if (packetCache->keepStaleData() && iter->second == false) { | |
1906 | /* so far all pools had at least one backend up */ | |
1907 | if (pool->countServers(true) == 0) { | |
1908 | iter->second = true; | |
1909 | } | |
886e2cf2 RG |
1910 | } |
1911 | } | |
c1b81381 RG |
1912 | |
1913 | for (auto pair : caches) { | |
1914 | /* shall we keep expired entries ? */ | |
1915 | if (pair.second == true) { | |
1916 | continue; | |
886e2cf2 | 1917 | } |
c1b81381 RG |
1918 | auto& packetCache = pair.first; |
1919 | size_t upTo = (packetCache->getMaxEntries()* (100 - g_cacheCleaningPercentage)) / 100; | |
1920 | packetCache->purgeExpired(upTo); | |
886e2cf2 RG |
1921 | } |
1922 | counter = 0; | |
1923 | } | |
1924 | ||
1925 | // ponder pruning g_dynblocks of expired entries here | |
1926 | } | |
886e2cf2 RG |
1927 | } |
1928 | ||
9b73b71c | 1929 | static void secPollThread() |
5d4e1ef8 RG |
1930 | { |
1931 | setThreadName("dnsdist/secpoll"); | |
1932 | ||
1933 | for (;;) { | |
1934 | try { | |
1935 | doSecPoll(g_secPollSuffix); | |
1936 | } | |
1937 | catch(...) { | |
1938 | } | |
1939 | sleep(g_secPollInterval); | |
1940 | } | |
1941 | } | |
1942 | ||
9b73b71c | 1943 | static void healthChecksThread() |
3ae86514 | 1944 | { |
519f5484 | 1945 | setThreadName("dnsdist/healthC"); |
77c9bc9a | 1946 | |
dd9c8246 | 1947 | static const int interval = 1; |
64e4ebb4 | 1948 | |
3ae86514 | 1949 | for(;;) { |
1950 | sleep(interval); | |
773470ca | 1951 | |
dd9c8246 | 1952 | if(g_tcpclientthreads->getQueuedCount() > 1 && !g_tcpclientthreads->hasReachedMaxThreads()) { |
a9bf3ec4 | 1953 | g_tcpclientthreads->addTCPClientThread(); |
dd9c8246 | 1954 | } |
3ae86514 | 1955 | |
dd9c8246 | 1956 | auto mplexer = std::shared_ptr<FDMultiplexer>(FDMultiplexer::getMultiplexerSilent()); |
a9c2e4ab RG |
1957 | auto states = g_dstates.getLocal(); // this points to the actual shared_ptrs! |
1958 | for(auto& dss : *states) { | |
dd9c8246 | 1959 | if(++dss->lastCheck < dss->checkInterval) { |
7c9bf18d | 1960 | continue; |
dd9c8246 | 1961 | } |
5d7e6765 | 1962 | |
dd9c8246 | 1963 | dss->lastCheck = 0; |
7565f4e6 | 1964 | |
dd9c8246 RG |
1965 | if (dss->availability == DownstreamState::Availability::Auto) { |
1966 | if (!queueHealthCheck(mplexer, dss)) { | |
1967 | updateHealthCheckResult(dss, false); | |
2993d58d | 1968 | } |
773470ca | 1969 | } |
b076b34a | 1970 | |
773470ca | 1971 | auto delta = dss->sw.udiffAndSet()/1000000.0; |
1972 | dss->queryLoad = 1.0*(dss->queries.load() - dss->prev.queries.load())/delta; | |
1973 | dss->dropRate = 1.0*(dss->reuseds.load() - dss->prev.reuseds.load())/delta; | |
3c115e0f | 1974 | dss->prev.queries.store(dss->queries.load()); |
773470ca | 1975 | dss->prev.reuseds.store(dss->reuseds.load()); |
3c115e0f | 1976 | |
dd9c8246 | 1977 | for (IDState& ids : dss->idStates) { // timeouts |
a9489723 | 1978 | int64_t usageIndicator = ids.usageIndicator; |
311f19d5 RG |
1979 | if(IDState::isInUse(usageIndicator) && ids.age++ > g_udpTimeout) { |
1980 | /* We mark the state as unused as soon as possible | |
51642fe3 RG |
1981 | to limit the risk of racing with the |
1982 | responder thread. | |
51642fe3 | 1983 | */ |
0956c5c5 RG |
1984 | auto oldDU = ids.du; |
1985 | ||
311f19d5 | 1986 | if (!ids.tryMarkUnused(usageIndicator)) { |
71b86bd8 RG |
1987 | /* this state has been altered in the meantime, |
1988 | don't go anywhere near it */ | |
1989 | continue; | |
1990 | } | |
fbf14b03 | 1991 | ids.du = nullptr; |
9bd1a882 | 1992 | handleDOHTimeout(oldDU); |
64e4ebb4 | 1993 | ids.age = 0; |
3c115e0f | 1994 | dss->reuseds++; |
1995 | --dss->outstanding; | |
cb167afd | 1996 | ++g_stats.downstreamTimeouts; // this is an 'actively' discovered timeout |
c08a5092 | 1997 | vinfolog("Had a downstream timeout from %s (%s) for query for %s|%s from %s", |
1998 | dss->remote.toStringWithPort(), dss->name, | |
348ef1c6 | 1999 | ids.qname.toLogString(), QType(ids.qtype).getName(), ids.origRemote.toStringWithPort()); |
51642fe3 | 2000 | |
c2fbeb27 | 2001 | struct timespec ts; |
85c7ca75 | 2002 | gettime(&ts); |
c2fbeb27 | 2003 | |
2d11d1b2 | 2004 | struct dnsheader fake; |
2005 | memset(&fake, 0, sizeof(fake)); | |
2006 | fake.id = ids.origID; | |
c2fbeb27 | 2007 | |
6d31c8b6 | 2008 | g_rings.insertResponse(ts, ids.origRemote, ids.qname, ids.qtype, std::numeric_limits<unsigned int>::max(), 0, fake, dss->remote); |
232f0877 | 2009 | } |
3ae86514 | 2010 | } |
2011 | } | |
dd9c8246 RG |
2012 | |
2013 | handleQueuedHealthChecks(mplexer); | |
3ae86514 | 2014 | } |
3ae86514 | 2015 | } |
2016 | ||
c2a42f97 | 2017 | static void bindAny(int af, int sock) |
2018 | { | |
18f8e493 | 2019 | __attribute__((unused)) int one = 1; |
c2a42f97 | 2020 | |
2021 | #ifdef IP_FREEBIND | |
2022 | if (setsockopt(sock, IPPROTO_IP, IP_FREEBIND, &one, sizeof(one)) < 0) | |
a702a96c | 2023 | warnlog("Warning: IP_FREEBIND setsockopt failed: %s", stringerror()); |
c2a42f97 | 2024 | #endif |
2025 | ||
2026 | #ifdef IP_BINDANY | |
2027 | if (af == AF_INET) | |
2028 | if (setsockopt(sock, IPPROTO_IP, IP_BINDANY, &one, sizeof(one)) < 0) | |
a702a96c | 2029 | warnlog("Warning: IP_BINDANY setsockopt failed: %s", stringerror()); |
c2a42f97 | 2030 | #endif |
2031 | #ifdef IPV6_BINDANY | |
2032 | if (af == AF_INET6) | |
2033 | if (setsockopt(sock, IPPROTO_IPV6, IPV6_BINDANY, &one, sizeof(one)) < 0) | |
a702a96c | 2034 | warnlog("Warning: IPV6_BINDANY setsockopt failed: %s", stringerror()); |
c2a42f97 | 2035 | #endif |
2036 | #ifdef SO_BINDANY | |
2037 | if (setsockopt(sock, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) < 0) | |
a702a96c | 2038 | warnlog("Warning: SO_BINDANY setsockopt failed: %s", stringerror()); |
c2a42f97 | 2039 | #endif |
2040 | } | |
2041 | ||
a36ce055 RG |
2042 | static void dropGroupPrivs(gid_t gid) |
2043 | { | |
2044 | if (gid) { | |
2045 | if (setgid(gid) == 0) { | |
2046 | if (setgroups(0, NULL) < 0) { | |
a702a96c | 2047 | warnlog("Warning: Unable to drop supplementary gids: %s", stringerror()); |
a36ce055 RG |
2048 | } |
2049 | } | |
2050 | else { | |
a702a96c | 2051 | warnlog("Warning: Unable to set group ID to %d: %s", gid, stringerror()); |
a36ce055 RG |
2052 | } |
2053 | } | |
2054 | } | |
2055 | ||
2056 | static void dropUserPrivs(uid_t uid) | |
2057 | { | |
2058 | if(uid) { | |
2059 | if(setuid(uid) < 0) { | |
a702a96c | 2060 | warnlog("Warning: Unable to set user ID to %d: %s", uid, stringerror()); |
a36ce055 RG |
2061 | } |
2062 | } | |
2063 | } | |
2064 | ||
41408d3a RG |
2065 | static void checkFileDescriptorsLimits(size_t udpBindsCount, size_t tcpBindsCount) |
2066 | { | |
2067 | /* stdin, stdout, stderr */ | |
2068 | size_t requiredFDsCount = 3; | |
a9c2e4ab | 2069 | auto backends = g_dstates.getLocal(); |
cd73ceeb RG |
2070 | /* UDP sockets to backends */ |
2071 | size_t backendUDPSocketsCount = 0; | |
a9c2e4ab | 2072 | for (const auto& backend : *backends) { |
5bdbb83d | 2073 | backendUDPSocketsCount += backend->sockets.size(); |
cd73ceeb RG |
2074 | } |
2075 | requiredFDsCount += backendUDPSocketsCount; | |
2076 | /* TCP sockets to backends */ | |
a9c2e4ab | 2077 | requiredFDsCount += (backends->size() * g_maxTCPClientThreads); |
9fcd6adb | 2078 | /* listening sockets */ |
41408d3a RG |
2079 | requiredFDsCount += udpBindsCount; |
2080 | requiredFDsCount += tcpBindsCount; | |
2081 | /* max TCP connections currently served */ | |
2082 | requiredFDsCount += g_maxTCPClientThreads; | |
f2e29d04 | 2083 | /* max pipes for communicating between TCP acceptors and client threads */ |
41408d3a | 2084 | requiredFDsCount += (g_maxTCPClientThreads * 2); |
41408d3a | 2085 | /* max TCP queued connections */ |
9fcd6adb | 2086 | requiredFDsCount += g_maxTCPQueuedConnections; |
41408d3a RG |
2087 | /* DelayPipe pipe */ |
2088 | requiredFDsCount += 2; | |
2089 | /* syslog socket */ | |
2090 | requiredFDsCount++; | |
2091 | /* webserver main socket */ | |
2092 | requiredFDsCount++; | |
2093 | /* console main socket */ | |
2094 | requiredFDsCount++; | |
2095 | /* carbon export */ | |
2096 | requiredFDsCount++; | |
2097 | /* history file */ | |
2098 | requiredFDsCount++; | |
2099 | struct rlimit rl; | |
2100 | getrlimit(RLIMIT_NOFILE, &rl); | |
9fcd6adb | 2101 | if (rl.rlim_cur <= requiredFDsCount) { |
41408d3a RG |
2102 | warnlog("Warning, this configuration can use more than %d file descriptors, web server and console connections not included, and the current limit is %d.", std::to_string(requiredFDsCount), std::to_string(rl.rlim_cur)); |
2103 | #ifdef HAVE_SYSTEMD | |
2104 | warnlog("You can increase this value by using LimitNOFILE= in the systemd unit file or ulimit."); | |
2105 | #else | |
2106 | warnlog("You can increase this value by using ulimit."); | |
2107 | #endif | |
2108 | } | |
2109 | } | |
c2a42f97 | 2110 | |
6e9fd124 RG |
2111 | static void setUpLocalBind(std::unique_ptr<ClientState>& cs) |
2112 | { | |
2113 | /* skip some warnings if there is an identical UDP context */ | |
fbf14b03 | 2114 | bool warn = cs->tcp == false || cs->tlsFrontend != nullptr || cs->dohFrontend != nullptr; |
6e9fd124 RG |
2115 | int& fd = cs->tcp == false ? cs->udpFD : cs->tcpFD; |
2116 | (void) warn; | |
2117 | ||
2118 | fd = SSocket(cs->local.sin4.sin_family, cs->tcp == false ? SOCK_DGRAM : SOCK_STREAM, 0); | |
2119 | ||
2120 | if (cs->tcp) { | |
2121 | SSetsockopt(fd, SOL_SOCKET, SO_REUSEADDR, 1); | |
2122 | #ifdef TCP_DEFER_ACCEPT | |
2123 | SSetsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, 1); | |
2124 | #endif | |
2125 | if (cs->fastOpenQueueSize > 0) { | |
2126 | #ifdef TCP_FASTOPEN | |
2127 | SSetsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, cs->fastOpenQueueSize); | |
2128 | #else | |
2129 | if (warn) { | |
2130 | warnlog("TCP Fast Open has been configured on local address '%s' but is not supported", cs->local.toStringWithPort()); | |
2131 | } | |
2132 | #endif | |
2133 | } | |
2134 | } | |
2135 | ||
2136 | if(cs->local.sin4.sin_family == AF_INET6) { | |
2137 | SSetsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, 1); | |
2138 | } | |
2139 | ||
2140 | bindAny(cs->local.sin4.sin_family, fd); | |
2141 | ||
2142 | if(!cs->tcp && IsAnyAddress(cs->local)) { | |
2143 | int one=1; | |
2144 | setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one)); // linux supports this, so why not - might fail on other systems | |
2145 | #ifdef IPV6_RECVPKTINFO | |
2146 | setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)); | |
2147 | #endif | |
2148 | } | |
2149 | ||
2150 | if (cs->reuseport) { | |
2151 | #ifdef SO_REUSEPORT | |
2152 | SSetsockopt(fd, SOL_SOCKET, SO_REUSEPORT, 1); | |
2153 | #else | |
2154 | if (warn) { | |
2155 | /* no need to warn again if configured but support is not available, we already did for UDP */ | |
2156 | warnlog("SO_REUSEPORT has been configured on local address '%s' but is not supported", cs->local.toStringWithPort()); | |
2157 | } | |
2158 | #endif | |
2159 | } | |
2160 | ||
90f9fbc0 RG |
2161 | if (!cs->tcp) { |
2162 | if (cs->local.isIPv4()) { | |
2163 | try { | |
2164 | setSocketIgnorePMTU(cs->udpFD); | |
2165 | } | |
2166 | catch(const std::exception& e) { | |
2167 | warnlog("Failed to set IP_MTU_DISCOVER on UDP server socket for local address '%s': %s", cs->local.toStringWithPort(), e.what()); | |
2168 | } | |
2169 | } | |
2170 | } | |
2171 | ||
6e9fd124 RG |
2172 | const std::string& itf = cs->interface; |
2173 | if (!itf.empty()) { | |
2174 | #ifdef SO_BINDTODEVICE | |
2175 | int res = setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, itf.c_str(), itf.length()); | |
2176 | if (res != 0) { | |
a702a96c | 2177 | warnlog("Error setting up the interface on local address '%s': %s", cs->local.toStringWithPort(), stringerror()); |
6e9fd124 RG |
2178 | } |
2179 | #else | |
2180 | if (warn) { | |
2181 | warnlog("An interface has been configured on local address '%s' but SO_BINDTODEVICE is not supported", cs->local.toStringWithPort()); | |
2182 | } | |
2183 | #endif | |
2184 | } | |
2185 | ||
2186 | #ifdef HAVE_EBPF | |
2187 | if (g_defaultBPFFilter) { | |
2188 | cs->attachFilter(g_defaultBPFFilter); | |
2189 | vinfolog("Attaching default BPF Filter to %s frontend %s", (!cs->tcp ? "UDP" : "TCP"), cs->local.toStringWithPort()); | |
2190 | } | |
2191 | #endif /* HAVE_EBPF */ | |
2192 | ||
2193 | if (cs->tlsFrontend != nullptr) { | |
2194 | if (!cs->tlsFrontend->setupTLS()) { | |
2195 | errlog("Error while setting up TLS on local address '%s', exiting", cs->local.toStringWithPort()); | |
2196 | _exit(EXIT_FAILURE); | |
2197 | } | |
2198 | } | |
2199 | ||
fbf14b03 RG |
2200 | if (cs->dohFrontend != nullptr) { |
2201 | cs->dohFrontend->setup(); | |
2202 | } | |
2203 | ||
6e9fd124 RG |
2204 | SBind(fd, cs->local); |
2205 | ||
2206 | if (cs->tcp) { | |
fbf14b03 | 2207 | SListen(cs->tcpFD, SOMAXCONN); |
6e9fd124 RG |
2208 | if (cs->tlsFrontend != nullptr) { |
2209 | warnlog("Listening on %s for TLS", cs->local.toStringWithPort()); | |
2210 | } | |
fbf14b03 RG |
2211 | else if (cs->dohFrontend != nullptr) { |
2212 | warnlog("Listening on %s for DoH", cs->local.toStringWithPort()); | |
2213 | } | |
6e9fd124 RG |
2214 | else if (cs->dnscryptCtx != nullptr) { |
2215 | warnlog("Listening on %s for DNSCrypt", cs->local.toStringWithPort()); | |
2216 | } | |
2217 | else { | |
2218 | warnlog("Listening on %s", cs->local.toStringWithPort()); | |
2219 | } | |
2220 | } | |
2221 | ||
2222 | cs->ready = true; | |
2223 | } | |
2224 | ||
7cc68f53 | 2225 | struct |
2226 | { | |
2227 | vector<string> locals; | |
2228 | vector<string> remotes; | |
5efcfa63 | 2229 | bool checkConfig{false}; |
7cc68f53 | 2230 | bool beClient{false}; |
505ca3d1 | 2231 | bool beSupervised{false}; |
7cc68f53 | 2232 | string command; |
2233 | string config; | |
a36ce055 RG |
2234 | string uid; |
2235 | string gid; | |
7cc68f53 | 2236 | } g_cmdLine; |
520eb5a0 | 2237 | |
e41f8165 | 2238 | std::atomic<bool> g_configurationDone{false}; |
520eb5a0 | 2239 | |
7d3ee2bb PL |
2240 | static void usage() |
2241 | { | |
2242 | cout<<endl; | |
b82a127e RG |
2243 | cout<<"Syntax: dnsdist [-C,--config file] [-c,--client [IP[:PORT]]]\n"; |
2244 | cout<<"[-e,--execute cmd] [-h,--help] [-l,--local addr]\n"; | |
4406b79b | 2245 | cout<<"[-v,--verbose] [--check-config] [--version]\n"; |
7d3ee2bb PL |
2246 | cout<<"\n"; |
2247 | cout<<"-a,--acl netmask Add this netmask to the ACL\n"; | |
2248 | cout<<"-C,--config file Load configuration from 'file'\n"; | |
2249 | cout<<"-c,--client Operate as a client, connect to dnsdist. This reads\n"; | |
2250 | cout<<" controlSocket from your configuration file, but also\n"; | |
2251 | cout<<" accepts an IP:PORT argument\n"; | |
2252 | #ifdef HAVE_LIBSODIUM | |
2253 | cout<<"-k,--setkey KEY Use KEY for encrypted communication to dnsdist. This\n"; | |
2254 | cout<<" is similar to setting setKey in the configuration file.\n"; | |
17a0ddad CH |
2255 | cout<<" NOTE: this will leak this key in your shell's history\n"; |
2256 | cout<<" and in the systems running process list.\n"; | |
7d3ee2bb PL |
2257 | #endif |
2258 | cout<<"--check-config Validate the configuration file and exit. The exit-code\n"; | |
2259 | cout<<" reflects the validation, 0 is OK, 1 means an error.\n"; | |
2260 | cout<<" Any errors are printed as well.\n"; | |
7d3ee2bb PL |
2261 | cout<<"-e,--execute cmd Connect to dnsdist and execute 'cmd'\n"; |
2262 | cout<<"-g,--gid gid Change the process group ID after binding sockets\n"; | |
2263 | cout<<"-h,--help Display this helpful message\n"; | |
2264 | cout<<"-l,--local address Listen on this local address\n"; | |
2265 | cout<<"--supervised Don't open a console, I'm supervised\n"; | |
2266 | cout<<" (use with e.g. systemd and daemontools)\n"; | |
2267 | cout<<"--disable-syslog Don't log to syslog, only to stdout\n"; | |
2268 | cout<<" (use with e.g. systemd)\n"; | |
7d3ee2bb PL |
2269 | cout<<"-u,--uid uid Change the process user ID after binding sockets\n"; |
2270 | cout<<"-v,--verbose Enable verbose mode\n"; | |
4406b79b | 2271 | cout<<"-V,--version Show dnsdist version information and exit\n"; |
7d3ee2bb PL |
2272 | } |
2273 | ||
24d5cb00 | 2274 | int main(int argc, char** argv) |
2275 | try | |
2276 | { | |
41408d3a RG |
2277 | size_t udpBindsCount = 0; |
2278 | size_t tcpBindsCount = 0; | |
94721140 | 2279 | rl_attempted_completion_function = my_completion; |
2280 | rl_completion_append_character = 0; | |
2281 | ||
726ddf60 | 2282 | signal(SIGPIPE, SIG_IGN); |
6d01c80c | 2283 | signal(SIGCHLD, SIG_IGN); |
0ca6a67f | 2284 | openlog("dnsdist", LOG_PID|LOG_NDELAY, LOG_DAEMON); |
6d01c80c | 2285 | |
0b62ec78 | 2286 | #ifdef HAVE_LIBSODIUM |
6d01c80c | 2287 | if (sodium_init() == -1) { |
2288 | cerr<<"Unable to initialize crypto library"<<endl; | |
2289 | exit(EXIT_FAILURE); | |
2290 | } | |
7691e7df | 2291 | g_hashperturb=randombytes_uniform(0xffffffff); |
2292 | srandom(randombytes_uniform(0xffffffff)); | |
2293 | #else | |
2294 | { | |
2295 | struct timeval tv; | |
2296 | gettimeofday(&tv, 0); | |
2297 | srandom(tv.tv_sec ^ tv.tv_usec ^ getpid()); | |
2298 | g_hashperturb=random(); | |
2299 | } | |
2300 | ||
0b62ec78 | 2301 | #endif |
094b6aff | 2302 | ComboAddress clientAddress = ComboAddress(); |
11058bd6 | 2303 | g_cmdLine.config=SYSCONFDIR "/dnsdist.conf"; |
359bdba5 | 2304 | struct option longopts[]={ |
8f2d5ec3 | 2305 | {"acl", required_argument, 0, 'a'}, |
359bdba5 CH |
2306 | {"check-config", no_argument, 0, 1}, |
2307 | {"client", no_argument, 0, 'c'}, | |
7cc68f53 | 2308 | {"config", required_argument, 0, 'C'}, |
359bdba5 | 2309 | {"disable-syslog", no_argument, 0, 2}, |
7cc68f53 | 2310 | {"execute", required_argument, 0, 'e'}, |
359bdba5 CH |
2311 | {"gid", required_argument, 0, 'g'}, |
2312 | {"help", no_argument, 0, 'h'}, | |
2313 | {"local", required_argument, 0, 'l'}, | |
359bdba5 | 2314 | {"setkey", required_argument, 0, 'k'}, |
359bdba5 CH |
2315 | {"supervised", no_argument, 0, 3}, |
2316 | {"uid", required_argument, 0, 'u'}, | |
2317 | {"verbose", no_argument, 0, 'v'}, | |
2318 | {"version", no_argument, 0, 'V'}, | |
2319 | {0,0,0,0} | |
7cc68f53 | 2320 | }; |
2321 | int longindex=0; | |
8f2d5ec3 | 2322 | string optstring; |
7cc68f53 | 2323 | for(;;) { |
359bdba5 | 2324 | int c=getopt_long(argc, argv, "a:cC:e:g:hk:l:u:vV", longopts, &longindex); |
7cc68f53 | 2325 | if(c==-1) |
2326 | break; | |
2327 | switch(c) { | |
5efcfa63 PL |
2328 | case 1: |
2329 | g_cmdLine.checkConfig=true; | |
2330 | break; | |
bbfaaa6f PL |
2331 | case 2: |
2332 | g_syslog=false; | |
2333 | break; | |
b7165327 CH |
2334 | case 3: |
2335 | g_cmdLine.beSupervised=true; | |
2336 | break; | |
7cc68f53 | 2337 | case 'C': |
2338 | g_cmdLine.config=optarg; | |
2339 | break; | |
2340 | case 'c': | |
2341 | g_cmdLine.beClient=true; | |
2342 | break; | |
7cc68f53 | 2343 | case 'e': |
2344 | g_cmdLine.command=optarg; | |
2345 | break; | |
a36ce055 RG |
2346 | case 'g': |
2347 | g_cmdLine.gid=optarg; | |
2348 | break; | |
7cc68f53 | 2349 | case 'h': |
6306c282 | 2350 | cout<<"dnsdist "<<VERSION<<endl; |
7d3ee2bb | 2351 | usage(); |
7cc68f53 | 2352 | cout<<"\n"; |
2353 | exit(EXIT_SUCCESS); | |
2354 | break; | |
8f2d5ec3 | 2355 | case 'a': |
2356 | optstring=optarg; | |
2357 | g_ACL.modify([optstring](NetmaskGroup& nmg) { nmg.addMask(optstring); }); | |
2358 | break; | |
ddb14ec9 | 2359 | case 'k': |
b4b5edbd | 2360 | #ifdef HAVE_LIBSODIUM |
b5521206 | 2361 | if (B64Decode(string(optarg), g_consoleKey) < 0) { |
ddb14ec9 PL |
2362 | cerr<<"Unable to decode key '"<<optarg<<"'."<<endl; |
2363 | exit(EXIT_FAILURE); | |
2364 | } | |
b4b5edbd CH |
2365 | #else |
2366 | cerr<<"dnsdist has been built without libsodium, -k/--setkey is unsupported."<<endl; | |
2367 | exit(EXIT_FAILURE); | |
ddb14ec9 | 2368 | #endif |
b4b5edbd | 2369 | break; |
7cc68f53 | 2370 | case 'l': |
6a363878 | 2371 | g_cmdLine.locals.push_back(trim_copy(string(optarg))); |
7cc68f53 | 2372 | break; |
a36ce055 RG |
2373 | case 'u': |
2374 | g_cmdLine.uid=optarg; | |
2375 | break; | |
7cc68f53 | 2376 | case 'v': |
2377 | g_verbose=true; | |
2378 | break; | |
6306c282 | 2379 | case 'V': |
d4d796e5 PD |
2380 | #ifdef LUAJIT_VERSION |
2381 | cout<<"dnsdist "<<VERSION<<" ("<<LUA_RELEASE<<" ["<<LUAJIT_VERSION<<"])"<<endl; | |
2382 | #else | |
2383 | cout<<"dnsdist "<<VERSION<<" ("<<LUA_RELEASE<<")"<<endl; | |
2384 | #endif | |
70829d97 | 2385 | cout<<"Enabled features: "; |
90fe8ae6 RG |
2386 | #ifdef HAVE_CDB |
2387 | cout<<"cdb "; | |
2388 | #endif | |
a227f47d RG |
2389 | #ifdef HAVE_DNS_OVER_TLS |
2390 | cout<<"dns-over-tls("; | |
2391 | #ifdef HAVE_GNUTLS | |
3909bf10 CH |
2392 | cout<<"gnutls"; |
2393 | #ifdef HAVE_LIBSSL | |
2394 | cout<<" "; | |
2395 | #endif | |
a227f47d RG |
2396 | #endif |
2397 | #ifdef HAVE_LIBSSL | |
2398 | cout<<"openssl"; | |
2399 | #endif | |
2400 | cout<<") "; | |
2401 | #endif | |
fbf14b03 RG |
2402 | #ifdef HAVE_DNS_OVER_HTTPS |
2403 | cout<<"dns-over-https(DOH) "; | |
2404 | #endif | |
70829d97 PL |
2405 | #ifdef HAVE_DNSCRYPT |
2406 | cout<<"dnscrypt "; | |
2407 | #endif | |
0beaa5c8 RG |
2408 | #ifdef HAVE_EBPF |
2409 | cout<<"ebpf "; | |
2410 | #endif | |
82a91ddf CH |
2411 | #ifdef HAVE_FSTRM |
2412 | cout<<"fstrm "; | |
2413 | #endif | |
f4b1f1fd RG |
2414 | #ifdef HAVE_LIBCRYPTO |
2415 | cout<<"ipcipher "; | |
2416 | #endif | |
3909bf10 CH |
2417 | #ifdef HAVE_LIBSODIUM |
2418 | cout<<"libsodium "; | |
2419 | #endif | |
f441962a RG |
2420 | #ifdef HAVE_LMDB |
2421 | cout<<"lmdb "; | |
2422 | #endif | |
70829d97 PL |
2423 | #ifdef HAVE_PROTOBUF |
2424 | cout<<"protobuf "; | |
2425 | #endif | |
2426 | #ifdef HAVE_RE2 | |
2427 | cout<<"re2 "; | |
2428 | #endif | |
0beaa5c8 RG |
2429 | #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) |
2430 | cout<<"recvmmsg/sendmmsg "; | |
2431 | #endif | |
2432 | #ifdef HAVE_NET_SNMP | |
2433 | cout<<"snmp "; | |
2434 | #endif | |
70829d97 PL |
2435 | #ifdef HAVE_SYSTEMD |
2436 | cout<<"systemd"; | |
2437 | #endif | |
2438 | cout<<endl; | |
6306c282 PL |
2439 | exit(EXIT_SUCCESS); |
2440 | break; | |
7d3ee2bb PL |
2441 | case '?': |
2442 | //getopt_long printed an error message. | |
2443 | usage(); | |
2444 | exit(EXIT_FAILURE); | |
2445 | break; | |
7cc68f53 | 2446 | } |
24d5cb00 | 2447 | } |
6ab65223 | 2448 | |
7cc68f53 | 2449 | argc-=optind; |
2450 | argv+=optind; | |
2451 | for(auto p = argv; *p; ++p) { | |
094b6aff PL |
2452 | if(g_cmdLine.beClient) { |
2453 | clientAddress = ComboAddress(*p, 5199); | |
2454 | } else { | |
2455 | g_cmdLine.remotes.push_back(*p); | |
2456 | } | |
7cc68f53 | 2457 | } |
2458 | ||
a1b1a29d | 2459 | ServerPolicy leastOutstandingPol{"leastOutstanding", leastOutstanding, false}; |
cd29dcb1 | 2460 | |
e5a14b2b | 2461 | g_policy.setState(leastOutstandingPol); |
7cc68f53 | 2462 | if(g_cmdLine.beClient || !g_cmdLine.command.empty()) { |
203b5348 | 2463 | setupLua(true, false, g_cmdLine.config); |
094b6aff PL |
2464 | if (clientAddress != ComboAddress()) |
2465 | g_serverControl = clientAddress; | |
7cc68f53 | 2466 | doClient(g_serverControl, g_cmdLine.command); |
e16fd59c | 2467 | _exit(EXIT_SUCCESS); |
6d01c80c | 2468 | } |
2e72cc0e | 2469 | |
8f133915 | 2470 | auto acl = g_ACL.getCopy(); |
8f2d5ec3 | 2471 | if(acl.empty()) { |
2472 | for(auto& addr : {"127.0.0.0/8", "10.0.0.0/8", "100.64.0.0/10", "169.254.0.0/16", "192.168.0.0/16", "172.16.0.0/12", "::1/128", "fc00::/7", "fe80::/10"}) | |
2473 | acl.addMask(addr); | |
2474 | g_ACL.setState(acl); | |
2475 | } | |
8f133915 | 2476 | |
b5521206 | 2477 | auto consoleACL = g_consoleACL.getCopy(); |
5ceea33e RG |
2478 | for (const auto& mask : { "127.0.0.1/8", "::1/128" }) { |
2479 | consoleACL.addMask(mask); | |
2480 | } | |
b5521206 RG |
2481 | g_consoleACL.setState(consoleACL); |
2482 | ||
5efcfa63 | 2483 | if (g_cmdLine.checkConfig) { |
203b5348 | 2484 | setupLua(false, true, g_cmdLine.config); |
5efcfa63 PL |
2485 | // No exception was thrown |
2486 | infolog("Configuration '%s' OK!", g_cmdLine.config); | |
d8c19b98 | 2487 | _exit(EXIT_SUCCESS); |
5efcfa63 PL |
2488 | } |
2489 | ||
203b5348 | 2490 | auto todo=setupLua(false, false, g_cmdLine.config); |
2e72cc0e | 2491 | |
636cc544 CHB |
2492 | auto localPools = g_pools.getCopy(); |
2493 | { | |
2494 | bool precompute = false; | |
2495 | if (g_policy.getLocal()->name == "chashed") { | |
2496 | precompute = true; | |
2497 | } else { | |
2498 | for (const auto& entry: localPools) { | |
2499 | if (entry.second->policy != nullptr && entry.second->policy->name == "chashed") { | |
2500 | precompute = true; | |
2501 | break ; | |
2502 | } | |
2503 | } | |
2504 | } | |
2505 | if (precompute) { | |
2506 | vinfolog("Pre-computing hashes for consistent hash load-balancing policy"); | |
2507 | // pre compute hashes | |
2508 | auto backends = g_dstates.getLocal(); | |
2509 | for (auto& backend: *backends) { | |
2510 | backend->hash(); | |
2511 | } | |
d58e616a CHB |
2512 | } |
2513 | } | |
2514 | ||
6e9fd124 RG |
2515 | if (!g_cmdLine.locals.empty()) { |
2516 | for (auto it = g_frontends.begin(); it != g_frontends.end(); ) { | |
fbf14b03 RG |
2517 | /* DoH, DoT and DNSCrypt frontends are separate */ |
2518 | if ((*it)->dohFrontend == nullptr && (*it)->tlsFrontend == nullptr && (*it)->dnscryptCtx == nullptr) { | |
6e9fd124 | 2519 | it = g_frontends.erase(it); |
9f67b883 | 2520 | } |
6e9fd124 RG |
2521 | else { |
2522 | ++it; | |
9f67b883 | 2523 | } |
9f67b883 RG |
2524 | } |
2525 | ||
6e9fd124 RG |
2526 | for(const auto& loc : g_cmdLine.locals) { |
2527 | /* UDP */ | |
2528 | g_frontends.push_back(std::unique_ptr<ClientState>(new ClientState(ComboAddress(loc, 53), false, false, 0, "", {}))); | |
2529 | /* TCP */ | |
2530 | g_frontends.push_back(std::unique_ptr<ClientState>(new ClientState(ComboAddress(loc, 53), true, false, 0, "", {}))); | |
87b515ed | 2531 | } |
a36ce055 RG |
2532 | } |
2533 | ||
6e9fd124 RG |
2534 | if (g_frontends.empty()) { |
2535 | /* UDP */ | |
2536 | g_frontends.push_back(std::unique_ptr<ClientState>(new ClientState(ComboAddress("127.0.0.1", 53), false, false, 0, "", {}))); | |
2537 | /* TCP */ | |
2538 | g_frontends.push_back(std::unique_ptr<ClientState>(new ClientState(ComboAddress("127.0.0.1", 53), true, false, 0, "", {}))); | |
11e1e08b | 2539 | } |
11e1e08b | 2540 | |
6e9fd124 | 2541 | g_configurationDone = true; |
a227f47d | 2542 | |
6e9fd124 RG |
2543 | for(auto& frontend : g_frontends) { |
2544 | setUpLocalBind(frontend); | |
a227f47d | 2545 | |
6e9fd124 RG |
2546 | if (frontend->tcp == false) { |
2547 | ++udpBindsCount; | |
a227f47d RG |
2548 | } |
2549 | else { | |
6e9fd124 | 2550 | ++tcpBindsCount; |
a227f47d RG |
2551 | } |
2552 | } | |
2553 | ||
b82a127e | 2554 | warnlog("dnsdist %s comes with ABSOLUTELY NO WARRANTY. This is free software, and you are welcome to redistribute it according to the terms of the GPL version 2", VERSION); |
9c9b4998 | 2555 | |
b82a127e RG |
2556 | vector<string> vec; |
2557 | std::string acls; | |
2558 | g_ACL.getLocal()->toStringVector(&vec); | |
2559 | for(const auto& s : vec) { | |
2560 | if (!acls.empty()) | |
2561 | acls += ", "; | |
2562 | acls += s; | |
b076b34a | 2563 | } |
b82a127e | 2564 | infolog("ACL allowing queries from: %s", acls.c_str()); |
b5521206 RG |
2565 | vec.clear(); |
2566 | acls.clear(); | |
2567 | g_consoleACL.getLocal()->toStringVector(&vec); | |
2568 | for (const auto& entry : vec) { | |
2569 | if (!acls.empty()) { | |
2570 | acls += ", "; | |
2571 | } | |
2572 | acls += entry; | |
2573 | } | |
2574 | infolog("Console ACL allowing connections from: %s", acls.c_str()); | |
6d01c80c | 2575 | |
9c9b4998 RG |
2576 | #ifdef HAVE_LIBSODIUM |
2577 | if (g_consoleEnabled && g_consoleKey.empty()) { | |
2578 | warnlog("Warning, the console has been enabled via 'controlSocket()' but no key has been set with 'setKey()' so all connections will fail until a key has been set"); | |
2579 | } | |
2580 | #endif | |
2581 | ||
aac59883 RG |
2582 | uid_t newgid=0; |
2583 | gid_t newuid=0; | |
2584 | ||
2585 | if(!g_cmdLine.gid.empty()) | |
2586 | newgid = strToGID(g_cmdLine.gid.c_str()); | |
2587 | ||
2588 | if(!g_cmdLine.uid.empty()) | |
2589 | newuid = strToUID(g_cmdLine.uid.c_str()); | |
2590 | ||
2591 | dropGroupPrivs(newgid); | |
2592 | dropUserPrivs(newuid); | |
fdc3ea42 RG |
2593 | try { |
2594 | /* we might still have capabilities remaining, | |
2595 | for example if we have been started as root | |
2596 | without --uid or --gid (please don't do that) | |
2597 | or as an unprivileged user with ambient | |
2598 | capabilities like CAP_NET_BIND_SERVICE. | |
2599 | */ | |
83fe2c55 | 2600 | dropCapabilities(g_capabilitiesToRetain); |
fdc3ea42 RG |
2601 | } |
2602 | catch(const std::exception& e) { | |
2603 | warnlog("%s", e.what()); | |
2604 | } | |
aac59883 | 2605 | |
a36ce055 RG |
2606 | /* this need to be done _after_ dropping privileges */ |
2607 | g_delay = new DelayPipe<DelayedPacket>(); | |
2608 | ||
9f4eb5cc RG |
2609 | if (g_snmpAgent) { |
2610 | g_snmpAgent->run(); | |
2611 | } | |
2612 | ||
1f7646c2 | 2613 | g_tcpclientthreads = std::unique_ptr<TCPClientCollection>(new TCPClientCollection(g_maxTCPClientThreads, g_useTCPSinglePipe)); |
a9bf3ec4 | 2614 | |
2e72cc0e | 2615 | for(auto& t : todo) |
2616 | t(); | |
2617 | ||
636cc544 | 2618 | localPools = g_pools.getCopy(); |
8f4f5186 RG |
2619 | /* create the default pool no matter what */ |
2620 | createPoolIfNotExists(localPools, ""); | |
7cc68f53 | 2621 | if(g_cmdLine.remotes.size()) { |
2622 | for(const auto& address : g_cmdLine.remotes) { | |
c9262563 | 2623 | auto ret=std::make_shared<DownstreamState>(ComboAddress(address, 53)); |
886e2cf2 | 2624 | addServerToPool(localPools, "", ret); |
5d7e6765 | 2625 | if (ret->connected && !ret->threadStarted.test_and_set()) { |
2717a92f | 2626 | ret->tid = thread(responderThread, ret); |
7565f4e6 | 2627 | } |
ecbe9133 | 2628 | g_dstates.modify([ret](servers_t& servers) { servers.push_back(ret); }); |
6d01c80c | 2629 | } |
2630 | } | |
886e2cf2 | 2631 | g_pools.setState(localPools); |
6d01c80c | 2632 | |
a9c2e4ab | 2633 | if(g_dstates.getLocal()->empty()) { |
e73ec7d3 | 2634 | errlog("No downstream servers defined: all packets will get dropped"); |
2635 | // you might define them later, but you need to know | |
2636 | } | |
2637 | ||
41408d3a RG |
2638 | checkFileDescriptorsLimits(udpBindsCount, tcpBindsCount); |
2639 | ||
dd9c8246 | 2640 | auto mplexer = std::shared_ptr<FDMultiplexer>(FDMultiplexer::getMultiplexerSilent()); |
e5a14b2b | 2641 | for(auto& dss : g_dstates.getCopy()) { // it is a copy, but the internal shared_ptrs are the real deal |
dd9c8246 RG |
2642 | if (dss->availability == DownstreamState::Availability::Auto) { |
2643 | if (!queueHealthCheck(mplexer, dss, true)) { | |
2644 | dss->upStatus = false; | |
2645 | warnlog("Marking downstream %s as 'down'", dss->getNameWithAddr()); | |
2646 | } | |
773470ca | 2647 | } |
2648 | } | |
dd9c8246 | 2649 | handleQueuedHealthChecks(mplexer, true); |
b076b34a | 2650 | |
6e9fd124 | 2651 | for(auto& cs : g_frontends) { |
fbf14b03 RG |
2652 | if (cs->dohFrontend != nullptr) { |
2653 | #ifdef HAVE_DNS_OVER_HTTPS | |
2654 | std::thread t1(dohThread, cs.get()); | |
2b0cb8f8 RG |
2655 | if (!cs->cpus.empty()) { |
2656 | mapThreadToCPUList(t1.native_handle(), cs->cpus); | |
2657 | } | |
fbf14b03 RG |
2658 | t1.detach(); |
2659 | #endif /* HAVE_DNS_OVER_HTTPS */ | |
2660 | continue; | |
2661 | } | |
a36ce055 | 2662 | if (cs->udpFD >= 0) { |
6e9fd124 | 2663 | thread t1(udpClientThread, cs.get()); |
f0e4dcba RG |
2664 | if (!cs->cpus.empty()) { |
2665 | mapThreadToCPUList(t1.native_handle(), cs->cpus); | |
2666 | } | |
a36ce055 | 2667 | t1.detach(); |
652a7355 | 2668 | } |
a36ce055 | 2669 | else if (cs->tcpFD >= 0) { |
6e9fd124 | 2670 | thread t1(tcpAcceptorThread, cs.get()); |
f0e4dcba RG |
2671 | if (!cs->cpus.empty()) { |
2672 | mapThreadToCPUList(t1.native_handle(), cs->cpus); | |
2673 | } | |
a36ce055 | 2674 | t1.detach(); |
726ddf60 | 2675 | } |
24d5cb00 | 2676 | } |
7730131a | 2677 | |
42fae326 | 2678 | thread carbonthread(carbonDumpThread); |
2679 | carbonthread.detach(); | |
2680 | ||
3c115e0f | 2681 | thread stattid(maintThread); |
886e2cf2 | 2682 | stattid.detach(); |
6d01c80c | 2683 | |
886e2cf2 RG |
2684 | thread healththread(healthChecksThread); |
2685 | ||
5d4e1ef8 RG |
2686 | if (!g_secPollSuffix.empty()) { |
2687 | thread secpollthread(secPollThread); | |
2688 | secpollthread.detach(); | |
2689 | } | |
2690 | ||
b82a127e | 2691 | if(g_cmdLine.beSupervised) { |
6ab65223 PL |
2692 | #ifdef HAVE_SYSTEMD |
2693 | sd_notify(0, "READY=1"); | |
2694 | #endif | |
886e2cf2 | 2695 | healththread.join(); |
773470ca | 2696 | } |
6d01c80c | 2697 | else { |
886e2cf2 | 2698 | healththread.detach(); |
505ca3d1 | 2699 | doConsole(); |
3c115e0f | 2700 | } |
9cf811d1 | 2701 | _exit(EXIT_SUCCESS); |
3c115e0f | 2702 | |
6d01c80c | 2703 | } |
3f5c3f1d PD |
2704 | catch(const LuaContext::ExecutionErrorException& e) { |
2705 | try { | |
2706 | errlog("Fatal Lua error: %s", e.what()); | |
2707 | std::rethrow_if_nested(e); | |
2010ac95 RG |
2708 | } catch(const std::exception& ne) { |
2709 | errlog("Details: %s", ne.what()); | |
3f5c3f1d PD |
2710 | } |
2711 | catch(PDNSException &ae) | |
2712 | { | |
2713 | errlog("Fatal pdns error: %s", ae.reason); | |
2714 | } | |
2715 | _exit(EXIT_FAILURE); | |
2716 | } | |
24d5cb00 | 2717 | catch(std::exception &e) |
2718 | { | |
6d01c80c | 2719 | errlog("Fatal error: %s", e.what()); |
4a966472 | 2720 | _exit(EXIT_FAILURE); |
24d5cb00 | 2721 | } |
3f81d239 | 2722 | catch(PDNSException &ae) |
7730131a | 2723 | { |
6d01c80c | 2724 | errlog("Fatal pdns error: %s", ae.reason); |
4a966472 | 2725 | _exit(EXIT_FAILURE); |
7730131a | 2726 | } |
eb0335ff MC |
2727 | |
2728 | uint64_t getLatencyCount(const std::string&) | |
2729 | { | |
2730 | return g_stats.responses + g_stats.selfAnswered + g_stats.cacheHits; | |
2731 | } |