]>
Commit | Line | Data |
---|---|---|
24d5cb00 | 1 | /* |
12471842 PL |
2 | * This file is part of PowerDNS or dnsdist. |
3 | * Copyright -- PowerDNS.COM B.V. and its contributors | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify | |
6 | * it under the terms of version 2 of the GNU General Public License as | |
7 | * published by the Free Software Foundation. | |
8 | * | |
9 | * In addition, for the avoidance of any doubt, permission is granted to | |
10 | * link this program with OpenSSL and to (re)distribute the binaries | |
11 | * produced as the result of such linking. | |
12 | * | |
13 | * This program is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | * GNU General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU General Public License | |
19 | * along with this program; if not, write to the Free Software | |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |
21 | */ | |
5cc8371b RG |
22 | |
23 | #include "config.h" | |
24 | ||
25 | #include <fstream> | |
26 | #include <getopt.h> | |
27 | #include <grp.h> | |
2c5db3f7 | 28 | #include <limits> |
5cc8371b RG |
29 | #include <netinet/tcp.h> |
30 | #include <pwd.h> | |
31 | #include <sys/resource.h> | |
32 | #include <unistd.h> | |
424bdfb1 | 33 | |
4d39d7f3 | 34 | #if defined (__OpenBSD__) || defined(__NetBSD__) |
424bdfb1 PD |
35 | #include <readline/readline.h> |
36 | #else | |
37 | #include <editline/readline.h> | |
38 | #endif | |
39 | ||
d2138f20 | 40 | #include "dnsdist-systemd.hh" |
6ab65223 PL |
41 | #ifdef HAVE_SYSTEMD |
42 | #include <systemd/sd-daemon.h> | |
43 | #endif | |
44 | ||
5cc8371b RG |
45 | #include "dnsdist.hh" |
46 | #include "dnsdist-cache.hh" | |
b5521206 | 47 | #include "dnsdist-console.hh" |
5cc8371b | 48 | #include "dnsdist-ecs.hh" |
dd9c8246 | 49 | #include "dnsdist-healthchecks.hh" |
5cc8371b | 50 | #include "dnsdist-lua.hh" |
03b00917 | 51 | #include "dnsdist-rings.hh" |
5d4e1ef8 | 52 | #include "dnsdist-secpoll.hh" |
53c57da7 | 53 | #include "dnsdist-xpf.hh" |
5cc8371b RG |
54 | |
55 | #include "base64.hh" | |
56 | #include "delaypipe.hh" | |
57 | #include "dolog.hh" | |
58 | #include "dnsname.hh" | |
e0fd37ec | 59 | #include "dnsparser.hh" |
5cc8371b RG |
60 | #include "ednsoptions.hh" |
61 | #include "gettime.hh" | |
62 | #include "lock.hh" | |
63 | #include "misc.hh" | |
64 | #include "sodcrypto.hh" | |
65 | #include "sstuff.hh" | |
519f5484 | 66 | #include "threadname.hh" |
5cc8371b | 67 | |
a40df301 | 68 | /* Known sins: |
e48090d1 | 69 | |
d12cea01 | 70 | Receiver is currently single threaded |
e5a14b2b | 71 | not *that* bad actually, but now that we are thread safe, might want to scale |
a40df301 | 72 | */ |
24d5cb00 | 73 | |
0940e4eb | 74 | /* the Rulaction plan |
75 | Set of Rules, if one matches, it leads to an Action | |
76 | Both rules and actions could conceivably be Lua based. | |
77 | On the C++ side, both could be inherited from a class Rule and a class Action, | |
78 | on the Lua side we can't do that. */ | |
79 | ||
64e4ebb4 | 80 | using std::atomic; |
81 | using std::thread; | |
7730131a | 82 | bool g_verbose; |
b065f701 | 83 | |
e48090d1 | 84 | struct DNSDistStats g_stats; |
37a5c2d5 | 85 | |
3b203c83 | 86 | uint16_t g_maxOutstanding{std::numeric_limits<uint16_t>::max()}; |
1ea747c0 | 87 | uint32_t g_staleCacheEntriesTTL{0}; |
bbfaaa6f | 88 | bool g_syslog{true}; |
0dffe9e3 | 89 | bool g_allowEmptyResponse{false}; |
cffde2fd | 90 | |
91 | GlobalStateHolder<NetmaskGroup> g_ACL; | |
c9262563 | 92 | string g_outputBuffer; |
a227f47d | 93 | |
a227f47d | 94 | std::vector<std::shared_ptr<TLSFrontend>> g_tlslocals; |
fbf14b03 | 95 | std::vector<std::shared_ptr<DOHFrontend>> g_dohlocals; |
6e9fd124 | 96 | std::vector<std::shared_ptr<DNSCryptContext>> g_dnsCryptLocals; |
87b515ed RG |
97 | #ifdef HAVE_EBPF |
98 | shared_ptr<BPFFilter> g_defaultBPFFilter; | |
8429ad04 | 99 | std::vector<std::shared_ptr<DynBPFFilter> > g_dynBPFFilters; |
87b515ed | 100 | #endif /* HAVE_EBPF */ |
6e9fd124 | 101 | std::vector<std::unique_ptr<ClientState>> g_frontends; |
886e2cf2 | 102 | GlobalStateHolder<pools_t> g_pools; |
0beaa5c8 | 103 | size_t g_udpVectorSize{1}; |
7c0860e1 | 104 | |
9f4eb5cc RG |
105 | bool g_snmpEnabled{false}; |
106 | bool g_snmpTrapsEnabled{false}; | |
107 | DNSDistSNMPAgent* g_snmpAgent{nullptr}; | |
108 | ||
726ddf60 | 109 | /* UDP: the grand design. Per socket we listen on for incoming queries there is one thread. |
7730131a | 110 | Then we have a bunch of connected sockets for talking to downstream servers. |
111 | We send directly to those sockets. | |
24d5cb00 | 112 | |
7730131a | 113 | For the return path, per downstream server we have a thread that listens to responses. |
24d5cb00 | 114 | |
7730131a | 115 | Per socket there is an array of 2^16 states, when we send out a packet downstream, we note |
116 | there the original requestor and the original id. The new ID is the offset in the array. | |
117 | ||
118 | When an answer comes in on a socket, we look up the offset by the id, and lob it to the | |
119 | original requestor. | |
120 | ||
121 | IDs are assigned by atomic increments of the socket offset. | |
122 | */ | |
123 | ||
4d5959e6 RG |
124 | GlobalStateHolder<vector<DNSDistRuleAction> > g_rulactions; |
125 | GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_resprulactions; | |
126 | GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_cachehitresprulactions; | |
2d4783a8 | 127 | GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_selfansweredresprulactions; |
4d5959e6 | 128 | |
df111b53 | 129 | Rings g_rings; |
786e4d8c | 130 | QueryCount g_qcount; |
7c0860e1 | 131 | |
ecbe9133 | 132 | GlobalStateHolder<servers_t> g_dstates; |
78ffa782 | 133 | GlobalStateHolder<NetmaskTree<DynBlock>> g_dynblockNMG; |
71c94675 | 134 | GlobalStateHolder<SuffixMatchTree<DynBlock>> g_dynblockSMT; |
dd46e5e3 | 135 | DNSAction::Action g_dynBlockAction = DNSAction::Action::Drop; |
3f6d07a4 RG |
136 | int g_tcpRecvTimeout{2}; |
137 | int g_tcpSendTimeout{2}; | |
e0b5e49d | 138 | int g_udpTimeout{2}; |
3f6d07a4 | 139 | |
26a3cdb7 | 140 | bool g_servFailOnNoPolicy{false}; |
e72fbfc4 | 141 | bool g_truncateTC{false}; |
53c57da7 RG |
142 | bool g_fixupCase{false}; |
143 | bool g_preserveTrailingData{false}; | |
32b86928 | 144 | bool g_roundrobinFailOnNoServer{false}; |
0beaa5c8 | 145 | |
83fe2c55 RG |
146 | std::set<std::string> g_capabilitiesToRetain; |
147 | ||
e0fd37ec | 148 | static void truncateTC(char* packet, uint16_t* len, size_t responseSize, unsigned int consumed) |
6ad8b29a | 149 | try |
150 | { | |
e0fd37ec RG |
151 | bool hadEDNS = false; |
152 | uint16_t payloadSize = 0; | |
153 | uint16_t z = 0; | |
154 | ||
155 | if (g_addEDNSToSelfGeneratedResponses) { | |
156 | hadEDNS = getEDNSUDPPayloadSizeAndZ(packet, *len, &payloadSize, &z); | |
157 | } | |
158 | ||
2aa2c0aa | 159 | *len=static_cast<uint16_t>(sizeof(dnsheader)+consumed+DNS_TYPE_SIZE+DNS_CLASS_SIZE); |
e7c732b8 RG |
160 | struct dnsheader* dh = reinterpret_cast<struct dnsheader*>(packet); |
161 | dh->ancount = dh->arcount = dh->nscount = 0; | |
e0fd37ec RG |
162 | |
163 | if (hadEDNS) { | |
5e98ccfa | 164 | addEDNS(dh, *len, responseSize, z & EDNS_HEADER_FLAG_DO, payloadSize, 0); |
e0fd37ec | 165 | } |
6ad8b29a | 166 | } |
167 | catch(...) | |
168 | { | |
169 | g_stats.truncFail++; | |
170 | } | |
171 | ||
7b3865cd | 172 | struct DelayedPacket |
173 | { | |
174 | int fd; | |
175 | string packet; | |
176 | ComboAddress destination; | |
68f3eb4d | 177 | ComboAddress origDest; |
7b3865cd | 178 | void operator()() |
179 | { | |
20b019fa RG |
180 | ssize_t res; |
181 | if(origDest.sin4.sin_family == 0) { | |
182 | res = sendto(fd, packet.c_str(), packet.size(), 0, (struct sockaddr*)&destination, destination.getSocklen()); | |
183 | } | |
184 | else { | |
185 | res = sendfromto(fd, packet.c_str(), packet.size(), 0, origDest, destination); | |
186 | } | |
187 | if (res == -1) { | |
188 | int err = errno; | |
189 | vinfolog("Error sending delayed response to %s: %s", destination.toStringWithPort(), strerror(err)); | |
190 | } | |
7b3865cd | 191 | } |
192 | }; | |
193 | ||
3e425868 | 194 | DelayPipe<DelayedPacket>* g_delay = nullptr; |
7b3865cd | 195 | |
f653b8df | 196 | void doLatencyStats(double udiff) |
daacd477 | 197 | { |
cb167afd CHB |
198 | if(udiff < 1000) ++g_stats.latency0_1; |
199 | else if(udiff < 10000) ++g_stats.latency1_10; | |
200 | else if(udiff < 50000) ++g_stats.latency10_50; | |
201 | else if(udiff < 100000) ++g_stats.latency50_100; | |
202 | else if(udiff < 1000000) ++g_stats.latency100_1000; | |
203 | else ++g_stats.latencySlow; | |
eb0335ff | 204 | g_stats.latencySum += udiff / 1000; |
be6c318f | 205 | |
daacd477 | 206 | auto doAvg = [](double& var, double n, double weight) { |
207 | var = (weight -1) * var/weight + n/weight; | |
208 | }; | |
209 | ||
210 | doAvg(g_stats.latencyAvg100, udiff, 100); | |
211 | doAvg(g_stats.latencyAvg1000, udiff, 1000); | |
212 | doAvg(g_stats.latencyAvg10000, udiff, 10000); | |
213 | doAvg(g_stats.latencyAvg1000000, udiff, 1000000); | |
214 | } | |
ca404e94 | 215 | |
e7c732b8 | 216 | bool responseContentMatches(const char* response, const uint16_t responseLen, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const ComboAddress& remote, unsigned int& consumed) |
fcffc585 | 217 | { |
fcffc585 RG |
218 | if (responseLen < sizeof(dnsheader)) { |
219 | return false; | |
220 | } | |
221 | ||
3e425868 | 222 | const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(response); |
c8c3d4e4 | 223 | if (dh->qdcount == 0) { |
ad7ec9a2 | 224 | if ((dh->rcode != RCode::NoError && dh->rcode != RCode::NXDomain) || g_allowEmptyResponse) { |
c8c3d4e4 RG |
225 | return true; |
226 | } | |
227 | else { | |
cb167afd | 228 | ++g_stats.nonCompliantResponses; |
c8c3d4e4 RG |
229 | return false; |
230 | } | |
231 | } | |
232 | ||
3e425868 RG |
233 | uint16_t rqtype, rqclass; |
234 | DNSName rqname; | |
fcffc585 RG |
235 | try { |
236 | rqname=DNSName(response, responseLen, sizeof(dnsheader), false, &rqtype, &rqclass, &consumed); | |
237 | } | |
3e425868 RG |
238 | catch(const std::exception& e) { |
239 | if(responseLen > 0 && static_cast<size_t>(responseLen) > sizeof(dnsheader)) { | |
fcffc585 | 240 | infolog("Backend %s sent us a response with id %d that did not parse: %s", remote.toStringWithPort(), ntohs(dh->id), e.what()); |
3e425868 | 241 | } |
cb167afd | 242 | ++g_stats.nonCompliantResponses; |
fcffc585 RG |
243 | return false; |
244 | } | |
245 | ||
246 | if (rqtype != qtype || rqclass != qclass || rqname != qname) { | |
247 | return false; | |
248 | } | |
249 | ||
250 | return true; | |
251 | } | |
252 | ||
4ab01344 | 253 | static void restoreFlags(struct dnsheader* dh, uint16_t origFlags) |
fcffc585 RG |
254 | { |
255 | static const uint16_t rdMask = 1 << FLAGS_RD_OFFSET; | |
256 | static const uint16_t cdMask = 1 << FLAGS_CD_OFFSET; | |
257 | static const uint16_t restoreFlagsMask = UINT16_MAX & ~(rdMask | cdMask); | |
0f72fd5c RG |
258 | uint16_t * flags = getFlagsFromDNSHeader(dh); |
259 | /* clear the flags we are about to restore */ | |
260 | *flags &= restoreFlagsMask; | |
261 | /* only keep the flags we want to restore */ | |
262 | origFlags &= ~restoreFlagsMask; | |
263 | /* set the saved flags as they were */ | |
264 | *flags |= origFlags; | |
265 | } | |
266 | ||
4ab01344 | 267 | static bool fixUpQueryTurnedResponse(DNSQuestion& dq, const uint16_t origFlags) |
e7c732b8 RG |
268 | { |
269 | restoreFlags(dq.dh, origFlags); | |
270 | ||
271 | return addEDNSToQueryTurnedResponse(dq); | |
272 | } | |
273 | ||
3e425868 | 274 | static bool fixUpResponse(char** response, uint16_t* responseLen, size_t* responseSize, const DNSName& qname, uint16_t origFlags, bool ednsAdded, bool ecsAdded, std::vector<uint8_t>& rewrittenResponse, uint16_t addRoom, bool* zeroScope) |
0f72fd5c | 275 | { |
fcffc585 RG |
276 | if (*responseLen < sizeof(dnsheader)) { |
277 | return false; | |
278 | } | |
279 | ||
3e425868 | 280 | struct dnsheader* dh = reinterpret_cast<struct dnsheader*>(*response); |
c8c3d4e4 RG |
281 | restoreFlags(dh, origFlags); |
282 | ||
283 | if (*responseLen == sizeof(dnsheader)) { | |
284 | return true; | |
285 | } | |
286 | ||
fcffc585 RG |
287 | if(g_fixupCase) { |
288 | string realname = qname.toDNSString(); | |
289 | if (*responseLen >= (sizeof(dnsheader) + realname.length())) { | |
290 | memcpy(*response + sizeof(dnsheader), realname.c_str(), realname.length()); | |
291 | } | |
292 | } | |
293 | ||
ff73f02b | 294 | if (ednsAdded || ecsAdded) { |
ceae225c | 295 | uint16_t optStart; |
fcffc585 RG |
296 | size_t optLen = 0; |
297 | bool last = false; | |
298 | ||
11d5d247 RG |
299 | const std::string responseStr(*response, *responseLen); |
300 | int res = locateEDNSOptRR(responseStr, &optStart, &optLen, &last); | |
fcffc585 RG |
301 | |
302 | if (res == 0) { | |
49c33a6c RG |
303 | if (zeroScope) { // this finds if an EDNS Client Subnet scope was set, and if it is 0 |
304 | size_t optContentStart = 0; | |
305 | uint16_t optContentLen = 0; | |
306 | /* we need at least 4 bytes after the option length (family: 2, source prefix-length: 1, scope prefix-length: 1) */ | |
307 | if (isEDNSOptionInOpt(responseStr, optStart, optLen, EDNSOptionCode::ECS, &optContentStart, &optContentLen) && optContentLen >= 4) { | |
308 | /* see if the EDNS Client Subnet SCOPE PREFIX-LENGTH byte in position 3 is set to 0, which is the only thing | |
309 | we care about. */ | |
310 | *zeroScope = responseStr.at(optContentStart + 3) == 0; | |
311 | } | |
40669042 | 312 | } |
9837850d | 313 | |
ff73f02b RG |
314 | if (ednsAdded) { |
315 | /* we added the entire OPT RR, | |
316 | therefore we need to remove it entirely */ | |
317 | if (last) { | |
318 | /* simply remove the last AR */ | |
319 | *responseLen -= optLen; | |
320 | uint16_t arcount = ntohs(dh->arcount); | |
321 | arcount--; | |
322 | dh->arcount = htons(arcount); | |
323 | } | |
324 | else { | |
325 | /* Removing an intermediary RR could lead to compression error */ | |
11d5d247 | 326 | if (rewriteResponseWithoutEDNS(responseStr, rewrittenResponse) == 0) { |
ff73f02b RG |
327 | *responseLen = rewrittenResponse.size(); |
328 | if (addRoom && (UINT16_MAX - *responseLen) > addRoom) { | |
329 | rewrittenResponse.reserve(*responseLen + addRoom); | |
330 | } | |
331 | *responseSize = rewrittenResponse.capacity(); | |
332 | *response = reinterpret_cast<char*>(rewrittenResponse.data()); | |
333 | } | |
334 | else { | |
335 | warnlog("Error rewriting content"); | |
336 | } | |
337 | } | |
fcffc585 RG |
338 | } |
339 | else { | |
ff73f02b RG |
340 | /* the OPT RR was already present, but without ECS, |
341 | we need to remove the ECS option if any */ | |
342 | if (last) { | |
343 | /* nothing after the OPT RR, we can simply remove the | |
344 | ECS option */ | |
345 | size_t existingOptLen = optLen; | |
11d5d247 | 346 | removeEDNSOptionFromOPT(*response + optStart, &optLen, EDNSOptionCode::ECS); |
ff73f02b | 347 | *responseLen -= (existingOptLen - optLen); |
fcffc585 RG |
348 | } |
349 | else { | |
ff73f02b | 350 | /* Removing an intermediary RR could lead to compression error */ |
11d5d247 | 351 | if (rewriteResponseWithoutEDNSOption(responseStr, EDNSOptionCode::ECS, rewrittenResponse) == 0) { |
ff73f02b RG |
352 | *responseLen = rewrittenResponse.size(); |
353 | if (addRoom && (UINT16_MAX - *responseLen) > addRoom) { | |
354 | rewrittenResponse.reserve(*responseLen + addRoom); | |
355 | } | |
356 | *responseSize = rewrittenResponse.capacity(); | |
357 | *response = reinterpret_cast<char*>(rewrittenResponse.data()); | |
358 | } | |
359 | else { | |
360 | warnlog("Error rewriting content"); | |
361 | } | |
fcffc585 RG |
362 | } |
363 | } | |
364 | } | |
365 | } | |
366 | ||
367 | return true; | |
368 | } | |
369 | ||
fcffc585 | 370 | #ifdef HAVE_DNSCRYPT |
3e425868 | 371 | static bool encryptResponse(char* response, uint16_t* responseLen, size_t responseSize, bool tcp, std::shared_ptr<DNSCryptQuery> dnsCryptQuery, dnsheader** dh, dnsheader* dhCopy) |
fcffc585 | 372 | { |
0f72fd5c RG |
373 | if (dnsCryptQuery) { |
374 | uint16_t encryptedResponseLen = 0; | |
57847d65 RG |
375 | |
376 | /* save the original header before encrypting it in place */ | |
377 | if (dh != nullptr && *dh != nullptr && dhCopy != nullptr) { | |
378 | memcpy(dhCopy, *dh, sizeof(dnsheader)); | |
379 | *dh = dhCopy; | |
380 | } | |
381 | ||
43234e76 | 382 | int res = dnsCryptQuery->encryptResponse(response, *responseLen, responseSize, tcp, &encryptedResponseLen); |
fcffc585 | 383 | if (res == 0) { |
0f72fd5c | 384 | *responseLen = encryptedResponseLen; |
fcffc585 RG |
385 | } else { |
386 | /* dropping response */ | |
387 | vinfolog("Error encrypting the response, dropping."); | |
388 | return false; | |
389 | } | |
390 | } | |
0f72fd5c RG |
391 | return true; |
392 | } | |
7129b5c4 | 393 | #endif /* HAVE_DNSCRYPT */ |
fcffc585 | 394 | |
3e425868 RG |
395 | static bool applyRulesToResponse(LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr) |
396 | { | |
397 | DNSResponseAction::Action action=DNSResponseAction::Action::None; | |
398 | std::string ruleresult; | |
399 | for(const auto& lr : *localRespRulactions) { | |
400 | if(lr.d_rule->matches(&dr)) { | |
401 | lr.d_rule->d_matches++; | |
402 | action=(*lr.d_action)(&dr, &ruleresult); | |
403 | switch(action) { | |
404 | case DNSResponseAction::Action::Allow: | |
405 | return true; | |
406 | break; | |
407 | case DNSResponseAction::Action::Drop: | |
408 | return false; | |
409 | break; | |
410 | case DNSResponseAction::Action::HeaderModify: | |
411 | return true; | |
412 | break; | |
413 | case DNSResponseAction::Action::ServFail: | |
414 | dr.dh->rcode = RCode::ServFail; | |
415 | return true; | |
416 | break; | |
417 | /* non-terminal actions follow */ | |
418 | case DNSResponseAction::Action::Delay: | |
419 | dr.delayMsec = static_cast<int>(pdns_stou(ruleresult)); // sorry | |
420 | break; | |
421 | case DNSResponseAction::Action::None: | |
422 | break; | |
423 | } | |
424 | } | |
425 | } | |
426 | ||
427 | return true; | |
428 | } | |
429 | ||
430 | bool processResponse(char** response, uint16_t* responseLen, size_t* responseSize, LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr, size_t addRoom, std::vector<uint8_t>& rewrittenResponse, bool muted) | |
431 | { | |
432 | if (!applyRulesToResponse(localRespRulactions, dr)) { | |
433 | return false; | |
434 | } | |
435 | ||
436 | bool zeroScope = false; | |
437 | if (!fixUpResponse(response, responseLen, responseSize, *dr.qname, dr.origFlags, dr.ednsAdded, dr.ecsAdded, rewrittenResponse, addRoom, dr.useZeroScope ? &zeroScope : nullptr)) { | |
438 | return false; | |
439 | } | |
440 | ||
01e23732 | 441 | if (dr.packetCache && !dr.skipCache && *responseLen <= s_maxPacketCacheEntrySize) { |
3e425868 RG |
442 | if (!dr.useZeroScope) { |
443 | /* if the query was not suitable for zero-scope, for | |
444 | example because it had an existing ECS entry so the hash is | |
445 | not really 'no ECS', so just insert it for the existing subnet | |
446 | since: | |
447 | - we don't have the correct hash for a non-ECS query | |
448 | - inserting with hash computed before the ECS replacement but with | |
449 | the subnet extracted _after_ the replacement would not work. | |
450 | */ | |
451 | zeroScope = false; | |
452 | } | |
453 | // if zeroScope, pass the pre-ECS hash-key and do not pass the subnet to the cache | |
454 | dr.packetCache->insert(zeroScope ? dr.cacheKeyNoECS : dr.cacheKey, zeroScope ? boost::none : dr.subnet, dr.origFlags, dr.dnssecOK, *dr.qname, dr.qtype, dr.qclass, *response, *responseLen, dr.tcp, dr.dh->rcode, dr.tempFailureTTL); | |
455 | } | |
456 | ||
457 | #ifdef HAVE_DNSCRYPT | |
458 | if (!muted) { | |
459 | if (!encryptResponse(*response, responseLen, *responseSize, dr.tcp, dr.dnsCryptQuery, nullptr, nullptr)) { | |
460 | return false; | |
461 | } | |
462 | } | |
7129b5c4 | 463 | #endif /* HAVE_DNSCRYPT */ |
3e425868 RG |
464 | |
465 | return true; | |
466 | } | |
467 | ||
468 | static bool sendUDPResponse(int origFD, const char* response, const uint16_t responseLen, const int delayMsec, const ComboAddress& origDest, const ComboAddress& origRemote) | |
0f72fd5c | 469 | { |
fcffc585 RG |
470 | if(delayMsec && g_delay) { |
471 | DelayedPacket dp{origFD, string(response,responseLen), origRemote, origDest}; | |
472 | g_delay->submit(dp, delayMsec); | |
473 | } | |
474 | else { | |
20b019fa RG |
475 | ssize_t res; |
476 | if(origDest.sin4.sin_family == 0) { | |
3e425868 | 477 | res = sendto(origFD, response, responseLen, 0, reinterpret_cast<const struct sockaddr*>(&origRemote), origRemote.getSocklen()); |
20b019fa RG |
478 | } |
479 | else { | |
480 | res = sendfromto(origFD, response, responseLen, 0, origDest, origRemote); | |
481 | } | |
482 | if (res == -1) { | |
483 | int err = errno; | |
a702a96c | 484 | vinfolog("Error sending response to %s: %s", origRemote.toStringWithPort(), stringerror(err)); |
20b019fa | 485 | } |
fcffc585 RG |
486 | } |
487 | ||
488 | return true; | |
489 | } | |
490 | ||
150105a2 | 491 | |
fbf14b03 | 492 | int pickBackendSocketForSending(std::shared_ptr<DownstreamState>& state) |
150105a2 | 493 | { |
5bdbb83d | 494 | return state->sockets[state->socketsOffset++ % state->sockets.size()]; |
150105a2 RG |
495 | } |
496 | ||
5bdbb83d | 497 | static void pickBackendSocketsReadyForReceiving(const std::shared_ptr<DownstreamState>& state, std::vector<int>& ready) |
150105a2 | 498 | { |
5bdbb83d | 499 | ready.clear(); |
150105a2 | 500 | |
5bdbb83d RG |
501 | if (state->sockets.size() == 1) { |
502 | ready.push_back(state->sockets[0]); | |
503 | return ; | |
150105a2 RG |
504 | } |
505 | ||
5bdbb83d RG |
506 | { |
507 | std::lock_guard<std::mutex> lock(state->socketsLock); | |
508 | state->mplexer->getAvailableFDs(ready, -1); | |
150105a2 | 509 | } |
150105a2 RG |
510 | } |
511 | ||
4632045b | 512 | // listens on a dedicated socket, lobs answers from downstream servers to original requestors |
9b73b71c | 513 | void responderThread(std::shared_ptr<DownstreamState> dss) |
66d1c0bf | 514 | try { |
519f5484 | 515 | setThreadName("dnsdist/respond"); |
d8c19b98 | 516 | auto localRespRulactions = g_resprulactions.getLocal(); |
8179b6d6 | 517 | char packet[s_maxPacketCacheEntrySize + DNSCRYPT_MAX_RESPONSE_PADDING_AND_MAC_SIZE]; |
b50ee135 | 518 | static_assert(sizeof(packet) <= UINT16_MAX, "Packet size should fit in a uint16_t"); |
3e425868 RG |
519 | /* when the answer is encrypted in place, we need to get a copy |
520 | of the original header before encryption to fill the ring buffer */ | |
521 | dnsheader cleartextDH; | |
ca404e94 | 522 | vector<uint8_t> rewrittenResponse; |
7267213a | 523 | |
66d1c0bf | 524 | uint16_t queryId = 0; |
5bdbb83d RG |
525 | std::vector<int> sockets; |
526 | sockets.reserve(dss->sockets.size()); | |
527 | ||
7730131a | 528 | for(;;) { |
57847d65 | 529 | dnsheader* dh = reinterpret_cast<struct dnsheader*>(packet); |
66d1c0bf | 530 | try { |
5bdbb83d RG |
531 | pickBackendSocketsReadyForReceiving(dss, sockets); |
532 | for (const auto& fd : sockets) { | |
533 | ssize_t got = recv(fd, packet, sizeof(packet), 0); | |
534 | char * response = packet; | |
535 | size_t responseSize = sizeof(packet); | |
ca404e94 | 536 | |
2aa2c0aa | 537 | if (got < 0 || static_cast<size_t>(got) < sizeof(dnsheader)) |
5bdbb83d | 538 | continue; |
4f380af3 | 539 | |
3e425868 | 540 | uint16_t responseLen = static_cast<uint16_t>(got); |
5bdbb83d | 541 | queryId = dh->id; |
b50ee135 | 542 | |
fbf14b03 | 543 | if(queryId >= dss->idStates.size()) { |
5bdbb83d | 544 | continue; |
fbf14b03 | 545 | } |
4f380af3 | 546 | |
5bdbb83d | 547 | IDState* ids = &dss->idStates[queryId]; |
a9489723 | 548 | int64_t usageIndicator = ids->usageIndicator; |
c9ba8478 | 549 | |
311f19d5 | 550 | if(!IDState::isInUse(usageIndicator)) { |
9bd1a882 | 551 | /* the corresponding state is marked as not in use, meaning that: |
a9489723 | 552 | - it was already cleaned up by another thread and the state is gone ; |
9bd1a882 RG |
553 | - we already got a response for this query and this one is a duplicate. |
554 | Either way, we don't touch it. | |
555 | */ | |
5bdbb83d | 556 | continue; |
9bd1a882 | 557 | } |
7267213a | 558 | |
9bd1a882 | 559 | /* read the potential DOHUnit state as soon as possible, but don't use it |
a9489723 | 560 | until we have confirmed that we own this state by updating usageIndicator */ |
0956c5c5 | 561 | auto du = ids->du; |
5bdbb83d RG |
562 | /* setting age to 0 to prevent the maintainer thread from |
563 | cleaning this IDS while we process the response. | |
5bdbb83d RG |
564 | */ |
565 | ids->age = 0; | |
a9489723 | 566 | int origFD = ids->origFD; |
fcffc585 | 567 | |
e7c732b8 RG |
568 | unsigned int consumed = 0; |
569 | if (!responseContentMatches(response, responseLen, ids->qname, ids->qtype, ids->qclass, dss->remote, consumed)) { | |
5bdbb83d RG |
570 | continue; |
571 | } | |
7267213a | 572 | |
3c72bc54 | 573 | bool isDoH = du != nullptr; |
a9489723 RG |
574 | /* atomically mark the state as available, but only if it has not been altered |
575 | in the meantime */ | |
311f19d5 | 576 | if (ids->tryMarkUnused(usageIndicator)) { |
9bd1a882 RG |
577 | /* clear the potential DOHUnit asap, it's ours now |
578 | and since we just marked the state as unused, | |
579 | someone could overwrite it. */ | |
246ff31f | 580 | ids->du = nullptr; |
71b86bd8 RG |
581 | /* we only decrement the outstanding counter if the value was not |
582 | altered in the meantime, which would mean that the state has been actively reused | |
583 | and the other thread has not incremented the outstanding counter, so we don't | |
584 | want it to be decremented twice. */ | |
585 | --dss->outstanding; // you'd think an attacker could game this, but we're using connected socket | |
0956c5c5 | 586 | } else { |
9bd1a882 | 587 | /* someone updated the state in the meantime, we can't touch the existing pointer */ |
0956c5c5 | 588 | du = nullptr; |
f2d0c808 RG |
589 | /* since the state has been updated, we can't safely access it so let's just drop |
590 | this response */ | |
591 | continue; | |
71b86bd8 | 592 | } |
2c5db3f7 | 593 | |
5bdbb83d | 594 | if(dh->tc && g_truncateTC) { |
e0fd37ec | 595 | truncateTC(response, &responseLen, responseSize, consumed); |
5bdbb83d | 596 | } |
aeb36780 | 597 | |
5bdbb83d | 598 | dh->id = ids->origID; |
ca404e94 | 599 | |
5bdbb83d | 600 | uint16_t addRoom = 0; |
d0ae6360 RG |
601 | DNSResponse dr = makeDNSResponseFromIDState(*ids, dh, sizeof(packet), responseLen, false); |
602 | if (dr.dnsCryptQuery) { | |
5bdbb83d RG |
603 | addRoom = DNSCRYPT_MAX_RESPONSE_PADDING_AND_MAC_SIZE; |
604 | } | |
ca404e94 | 605 | |
3e425868 RG |
606 | memcpy(&cleartextDH, dr.dh, sizeof(cleartextDH)); |
607 | if (!processResponse(&response, &responseLen, &responseSize, localRespRulactions, dr, addRoom, rewrittenResponse, ids->cs && ids->cs->muted)) { | |
608 | continue; | |
5bdbb83d | 609 | } |
886e2cf2 | 610 | |
5bdbb83d | 611 | if (ids->cs && !ids->cs->muted) { |
0956c5c5 | 612 | if (du) { |
fbf14b03 RG |
613 | #ifdef HAVE_DNS_OVER_HTTPS |
614 | // DoH query | |
246ff31f | 615 | du->response = std::string(response, responseLen); |
0956c5c5 | 616 | if (send(du->rsock, &du, sizeof(du), 0) != sizeof(du)) { |
9bd1a882 | 617 | /* at this point we have the only remaining pointer on this |
b6d19fca RG |
618 | DOHUnit object since we did set ids->du to nullptr earlier, |
619 | except if we got the response before the pointer could be | |
620 | released by the frontend */ | |
621 | du->release(); | |
fbf14b03 RG |
622 | } |
623 | #endif /* HAVE_DNS_OVER_HTTPS */ | |
0956c5c5 | 624 | du = nullptr; |
fbf14b03 RG |
625 | } |
626 | else { | |
627 | ComboAddress empty; | |
628 | empty.sin4.sin_family = 0; | |
629 | /* if ids->destHarvested is false, origDest holds the listening address. | |
630 | We don't want to use that as a source since it could be 0.0.0.0 for example. */ | |
631 | sendUDPResponse(origFD, response, responseLen, dr.delayMsec, ids->destHarvested ? ids->origDest : empty, ids->origRemote); | |
632 | } | |
5bdbb83d | 633 | } |
66d1c0bf | 634 | |
cb167afd | 635 | ++g_stats.responses; |
83f7bbdb RG |
636 | if (ids->cs) { |
637 | ++ids->cs->responses; | |
638 | } | |
7fc95193 | 639 | ++dss->responses; |
66d1c0bf | 640 | |
5bdbb83d | 641 | double udiff = ids->sentTime.udiff(); |
fbf14b03 | 642 | vinfolog("Got answer from %s, relayed to %s%s, took %f usec", dss->remote.toStringWithPort(), ids->origRemote.toStringWithPort(), |
3c72bc54 | 643 | isDoH ? " (https)": "", udiff); |
66d1c0bf | 644 | |
01f6920b RG |
645 | struct timespec ts; |
646 | gettime(&ts); | |
3e425868 | 647 | g_rings.insertResponse(ts, *dr.remote, *dr.qname, dr.qtype, static_cast<unsigned int>(udiff), static_cast<unsigned int>(got), cleartextDH, dss->remote); |
66d1c0bf | 648 | |
5d7a33dd | 649 | switch (cleartextDH.rcode) { |
61d10a4d MH |
650 | case RCode::NXDomain: |
651 | ++g_stats.frontendNXDomain; | |
652 | break; | |
653 | case RCode::ServFail: | |
cb167afd | 654 | ++g_stats.servfailResponses; |
61d10a4d MH |
655 | ++g_stats.frontendServFail; |
656 | break; | |
657 | case RCode::NoError: | |
658 | ++g_stats.frontendNoError; | |
659 | break; | |
cb167afd | 660 | } |
5bdbb83d | 661 | dss->latencyUsec = (127.0 * dss->latencyUsec / 128.0) + udiff/128.0; |
66d1c0bf | 662 | |
5bdbb83d | 663 | doLatencyStats(udiff); |
fcadd56e | 664 | |
5bdbb83d RG |
665 | rewrittenResponse.clear(); |
666 | } | |
66d1c0bf | 667 | } |
df560083 | 668 | catch(const std::exception& e){ |
cd7fa253 | 669 | vinfolog("Got an error in UDP responder thread while parsing a response from %s, id %d: %s", dss->remote.toStringWithPort(), queryId, e.what()); |
66d1c0bf | 670 | } |
7730131a | 671 | } |
7730131a | 672 | } |
66d1c0bf RG |
673 | catch(const std::exception& e) |
674 | { | |
675 | errlog("UDP responder thread died because of exception: %s", e.what()); | |
66d1c0bf RG |
676 | } |
677 | catch(const PDNSException& e) | |
678 | { | |
679 | errlog("UDP responder thread died because of PowerDNS exception: %s", e.reason); | |
66d1c0bf RG |
680 | } |
681 | catch(...) | |
682 | { | |
683 | errlog("UDP responder thread died because of an exception: %s", "unknown"); | |
66d1c0bf | 684 | } |
24d5cb00 | 685 | |
5d7e6765 | 686 | bool DownstreamState::reconnect() |
3c115e0f | 687 | { |
5d7e6765 RG |
688 | std::unique_lock<std::mutex> tl(connectLock, std::try_to_lock); |
689 | if (!tl.owns_lock()) { | |
690 | /* we are already reconnecting */ | |
691 | return false; | |
692 | } | |
693 | ||
b58f08e5 | 694 | connected = false; |
5bdbb83d | 695 | for (auto& fd : sockets) { |
150105a2 | 696 | if (fd != -1) { |
5d7e6765 | 697 | if (sockets.size() > 1) { |
5bdbb83d RG |
698 | std::lock_guard<std::mutex> lock(socketsLock); |
699 | mplexer->removeReadFD(fd); | |
700 | } | |
150105a2 RG |
701 | /* shutdown() is needed to wake up recv() in the responderThread */ |
702 | shutdown(fd, SHUT_RDWR); | |
703 | close(fd); | |
704 | fd = -1; | |
f99e3aaf | 705 | } |
150105a2 RG |
706 | if (!IsAnyAddress(remote)) { |
707 | fd = SSocket(remote.sin4.sin_family, SOCK_DGRAM, 0); | |
708 | if (!IsAnyAddress(sourceAddr)) { | |
709 | SSetsockopt(fd, SOL_SOCKET, SO_REUSEADDR, 1); | |
70b0d0e2 RG |
710 | if (!sourceItfName.empty()) { |
711 | #ifdef SO_BINDTODEVICE | |
712 | int res = setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, sourceItfName.c_str(), sourceItfName.length()); | |
713 | if (res != 0) { | |
714 | infolog("Error setting up the interface on backend socket '%s': %s", remote.toStringWithPort(), stringerror()); | |
715 | } | |
716 | #endif | |
717 | } | |
718 | ||
150105a2 RG |
719 | SBind(fd, sourceAddr); |
720 | } | |
721 | try { | |
722 | SConnect(fd, remote); | |
5d7e6765 | 723 | if (sockets.size() > 1) { |
5bdbb83d RG |
724 | std::lock_guard<std::mutex> lock(socketsLock); |
725 | mplexer->addReadFD(fd, [](int, boost::any) {}); | |
726 | } | |
150105a2 RG |
727 | connected = true; |
728 | } | |
729 | catch(const std::runtime_error& error) { | |
730 | infolog("Error connecting to new server with address %s: %s", remote.toStringWithPort(), error.what()); | |
38069e7e RG |
731 | connected = false; |
732 | break; | |
733 | } | |
7565f4e6 | 734 | } |
38069e7e RG |
735 | } |
736 | ||
737 | /* if at least one (re-)connection failed, close all sockets */ | |
738 | if (!connected) { | |
5bdbb83d | 739 | for (auto& fd : sockets) { |
38069e7e | 740 | if (fd != -1) { |
5d7e6765 RG |
741 | if (sockets.size() > 1) { |
742 | std::lock_guard<std::mutex> lock(socketsLock); | |
743 | mplexer->removeReadFD(fd); | |
744 | } | |
38069e7e RG |
745 | /* shutdown() is needed to wake up recv() in the responderThread */ |
746 | shutdown(fd, SHUT_RDWR); | |
747 | close(fd); | |
748 | fd = -1; | |
150105a2 | 749 | } |
7565f4e6 | 750 | } |
b58f08e5 | 751 | } |
5d7e6765 RG |
752 | |
753 | return connected; | |
b58f08e5 | 754 | } |
f2caf657 CHB |
755 | void DownstreamState::hash() |
756 | { | |
d58e616a | 757 | vinfolog("Computing hashes for id=%s and weight=%d", id, weight); |
f2caf657 | 758 | auto w = weight; |
d58e616a | 759 | WriteLock wl(&d_lock); |
f2caf657 CHB |
760 | hashes.clear(); |
761 | while (w > 0) { | |
762 | std::string uuid = boost::str(boost::format("%s-%d") % id % w); | |
763 | unsigned int wshash = burtleCI((const unsigned char*)uuid.c_str(), uuid.size(), g_hashperturb); | |
764 | hashes.insert(wshash); | |
765 | --w; | |
766 | } | |
767 | } | |
768 | ||
769 | void DownstreamState::setId(const boost::uuids::uuid& newId) | |
770 | { | |
771 | id = newId; | |
d58e616a CHB |
772 | // compute hashes only if already done |
773 | if (!hashes.empty()) { | |
774 | hash(); | |
775 | } | |
f2caf657 CHB |
776 | } |
777 | ||
778 | void DownstreamState::setWeight(int newWeight) | |
779 | { | |
5a2bbe8c CHB |
780 | if (newWeight < 1) { |
781 | errlog("Error setting server's weight: downstream weight value must be greater than 0."); | |
782 | return ; | |
783 | } | |
f2caf657 | 784 | weight = newWeight; |
d58e616a CHB |
785 | if (!hashes.empty()) { |
786 | hash(); | |
787 | } | |
f2caf657 | 788 | } |
b58f08e5 | 789 | |
203b5348 | 790 | DownstreamState::DownstreamState(const ComboAddress& remote_, const ComboAddress& sourceAddr_, unsigned int sourceItf_, const std::string& sourceItfName_, size_t numberOfSockets, bool connect=true): sourceItfName(sourceItfName_), remote(remote_), sourceAddr(sourceAddr_), sourceItf(sourceItf_) |
b58f08e5 | 791 | { |
d58e616a | 792 | pthread_rwlock_init(&d_lock, nullptr); |
d61aa945 | 793 | id = getUniqueID(); |
5d7e6765 RG |
794 | threadStarted.clear(); |
795 | ||
5bdbb83d RG |
796 | mplexer = std::unique_ptr<FDMultiplexer>(FDMultiplexer::getMultiplexerSilent()); |
797 | ||
798 | sockets.resize(numberOfSockets); | |
799 | for (auto& fd : sockets) { | |
150105a2 RG |
800 | fd = -1; |
801 | } | |
802 | ||
203b5348 | 803 | if (connect && !IsAnyAddress(remote)) { |
b58f08e5 | 804 | reconnect(); |
f99e3aaf RG |
805 | idStates.resize(g_maxOutstanding); |
806 | sw.start(); | |
807 | infolog("Added downstream server %s", remote.toStringWithPort()); | |
fbe2a2e0 | 808 | } |
1720247e | 809 | |
3c115e0f | 810 | } |
811 | ||
773470ca | 812 | std::mutex g_luamutex; |
3f25bce6 | 813 | LuaContext g_lua; |
3f25bce6 | 814 | |
ecbe9133 | 815 | GlobalStateHolder<ServerPolicy> g_policy; |
773470ca | 816 | |
497a6e3a | 817 | shared_ptr<DownstreamState> firstAvailable(const NumberedServerVector& servers, const DNSQuestion* dq) |
773470ca | 818 | { |
22b2b326 | 819 | for(auto& d : servers) { |
da4e7813 | 820 | if(d.second->isUp() && d.second->qps.check()) |
821 | return d.second; | |
773470ca | 822 | } |
497a6e3a | 823 | return leastOutstanding(servers, dq); |
2c5db3f7 | 824 | } |
825 | ||
d1fba58e | 826 | // get server with least outstanding queries, and within those, with the lowest order, and within those: the fastest |
497a6e3a | 827 | shared_ptr<DownstreamState> leastOutstanding(const NumberedServerVector& servers, const DNSQuestion* dq) |
c9262563 | 828 | { |
886e2cf2 RG |
829 | if (servers.size() == 1 && servers[0].second->isUp()) { |
830 | return servers[0].second; | |
831 | } | |
832 | ||
d1fba58e | 833 | vector<pair<tuple<int,int,double>, shared_ptr<DownstreamState>>> poss; |
834 | /* so you might wonder, why do we go through this trouble? The data on which we sort could change during the sort, | |
835 | which would suck royally and could even lead to crashes. So first we snapshot on what we sort, and then we sort */ | |
478ce172 | 836 | poss.reserve(servers.size()); |
0e41337b | 837 | for(auto& d : servers) { |
da4e7813 | 838 | if(d.second->isUp()) { |
d1fba58e | 839 | poss.push_back({make_tuple(d.second->outstanding.load(), d.second->order, d.second->latencyUsec), d.second}); |
c9262563 | 840 | } |
841 | } | |
842 | if(poss.empty()) | |
843 | return shared_ptr<DownstreamState>(); | |
844 | nth_element(poss.begin(), poss.begin(), poss.end(), [](const decltype(poss)::value_type& a, const decltype(poss)::value_type& b) { return a.first < b.first; }); | |
845 | return poss.begin()->second; | |
846 | } | |
847 | ||
497a6e3a | 848 | shared_ptr<DownstreamState> valrandom(unsigned int val, const NumberedServerVector& servers, const DNSQuestion* dq) |
c9262563 | 849 | { |
850 | vector<pair<int, shared_ptr<DownstreamState>>> poss; | |
278403d3 DM |
851 | int sum = 0; |
852 | int max = std::numeric_limits<int>::max(); | |
853 | ||
22b2b326 | 854 | for(auto& d : servers) { // w=1, w=10 -> 1, 11 |
da4e7813 | 855 | if(d.second->isUp()) { |
278403d3 DM |
856 | // Don't overflow sum when adding high weights |
857 | if(d.second->weight > max - sum) { | |
858 | sum = max; | |
859 | } else { | |
860 | sum += d.second->weight; | |
861 | } | |
862 | ||
da4e7813 | 863 | poss.push_back({sum, d.second}); |
c9262563 | 864 | } |
865 | } | |
d2894425 NJ |
866 | |
867 | // Catch poss & sum are empty to avoid SIGFPE | |
868 | if(poss.empty()) | |
869 | return shared_ptr<DownstreamState>(); | |
870 | ||
a7f3108c | 871 | int r = val % sum; |
af619119 | 872 | auto p = upper_bound(poss.begin(), poss.end(),r, [](int r_, const decltype(poss)::value_type& a) { return r_ < a.first;}); |
c9262563 | 873 | if(p==poss.end()) |
874 | return shared_ptr<DownstreamState>(); | |
875 | return p->second; | |
876 | } | |
877 | ||
497a6e3a | 878 | shared_ptr<DownstreamState> wrandom(const NumberedServerVector& servers, const DNSQuestion* dq) |
a7f3108c | 879 | { |
497a6e3a | 880 | return valrandom(random(), servers, dq); |
a7f3108c | 881 | } |
882 | ||
36e763fa | 883 | uint32_t g_hashperturb; |
2b4287d4 | 884 | double g_consistentHashBalancingFactor = 0; |
497a6e3a | 885 | shared_ptr<DownstreamState> whashed(const NumberedServerVector& servers, const DNSQuestion* dq) |
a7f3108c | 886 | { |
497a6e3a | 887 | return valrandom(dq->qname->hash(g_hashperturb), servers, dq); |
a7f3108c | 888 | } |
889 | ||
1720247e CHB |
890 | shared_ptr<DownstreamState> chashed(const NumberedServerVector& servers, const DNSQuestion* dq) |
891 | { | |
1720247e | 892 | unsigned int qhash = dq->qname->hash(g_hashperturb); |
b712c51e CHB |
893 | unsigned int sel = std::numeric_limits<unsigned int>::max(); |
894 | unsigned int min = std::numeric_limits<unsigned int>::max(); | |
895 | shared_ptr<DownstreamState> ret = nullptr, first = nullptr; | |
1720247e | 896 | |
2b4287d4 | 897 | double targetLoad = std::numeric_limits<double>::max(); |
80ee5144 | 898 | if (g_consistentHashBalancingFactor > 0) { |
2b4287d4 RG |
899 | /* we start with one, representing the query we are currently handling */ |
900 | double currentLoad = 1; | |
901 | for (const auto& pair : servers) { | |
902 | currentLoad += pair.second->outstanding; | |
903 | } | |
904 | targetLoad = (currentLoad / servers.size()) * g_consistentHashBalancingFactor; | |
905 | } | |
906 | ||
1720247e | 907 | for (const auto& d: servers) { |
2b4287d4 | 908 | if (d.second->isUp() && d.second->outstanding <= targetLoad) { |
93ca495f | 909 | // make sure hashes have been computed |
d58e616a CHB |
910 | if (d.second->hashes.empty()) { |
911 | d.second->hash(); | |
912 | } | |
913 | { | |
914 | ReadLock rl(&(d.second->d_lock)); | |
93ca495f CHB |
915 | const auto& server = d.second; |
916 | // we want to keep track of the last hash | |
b712c51e CHB |
917 | if (min > *(server->hashes.begin())) { |
918 | min = *(server->hashes.begin()); | |
919 | first = server; | |
93ca495f | 920 | } |
b712c51e CHB |
921 | |
922 | auto hash_it = server->hashes.lower_bound(qhash); | |
923 | if (hash_it != server->hashes.end()) { | |
924 | if (*hash_it < sel) { | |
93ca495f CHB |
925 | sel = *hash_it; |
926 | ret = server; | |
927 | } | |
d58e616a | 928 | } |
1720247e CHB |
929 | } |
930 | } | |
931 | } | |
93ca495f CHB |
932 | if (ret != nullptr) { |
933 | return ret; | |
1720247e | 934 | } |
b712c51e CHB |
935 | if (first != nullptr) { |
936 | return first; | |
1720247e | 937 | } |
93ca495f | 938 | return shared_ptr<DownstreamState>(); |
1720247e | 939 | } |
a7f3108c | 940 | |
497a6e3a | 941 | shared_ptr<DownstreamState> roundrobin(const NumberedServerVector& servers, const DNSQuestion* dq) |
5f504638 | 942 | { |
da4e7813 | 943 | NumberedServerVector poss; |
c9262563 | 944 | |
22b2b326 | 945 | for(auto& d : servers) { |
da4e7813 | 946 | if(d.second->isUp()) { |
5f504638 | 947 | poss.push_back(d); |
c9262563 | 948 | } |
5f504638 | 949 | } |
c9262563 | 950 | |
ecbe9133 | 951 | const auto *res=&poss; |
32b86928 | 952 | if(poss.empty() && !g_roundrobinFailOnNoServer) |
ecbe9133 | 953 | res = &servers; |
c9262563 | 954 | |
955 | if(res->empty()) | |
956 | return shared_ptr<DownstreamState>(); | |
957 | ||
958 | static unsigned int counter; | |
959 | ||
da4e7813 | 960 | return (*res)[(counter++) % res->size()].second; |
5f504638 | 961 | } |
962 | ||
e6841c9e | 963 | ComboAddress g_serverControl{"127.0.0.1:5199"}; |
b076b34a | 964 | |
886e2cf2 RG |
965 | std::shared_ptr<ServerPool> createPoolIfNotExists(pools_t& pools, const string& poolName) |
966 | { | |
967 | std::shared_ptr<ServerPool> pool; | |
968 | pools_t::iterator it = pools.find(poolName); | |
969 | if (it != pools.end()) { | |
970 | pool = it->second; | |
971 | } | |
972 | else { | |
8f4f5186 RG |
973 | if (!poolName.empty()) |
974 | vinfolog("Creating pool %s", poolName); | |
886e2cf2 RG |
975 | pool = std::make_shared<ServerPool>(); |
976 | pools.insert(std::pair<std::string,std::shared_ptr<ServerPool> >(poolName, pool)); | |
977 | } | |
978 | return pool; | |
979 | } | |
78c8047b | 980 | |
742c079a RG |
981 | void setPoolPolicy(pools_t& pools, const string& poolName, std::shared_ptr<ServerPolicy> policy) |
982 | { | |
983 | std::shared_ptr<ServerPool> pool = createPoolIfNotExists(pools, poolName); | |
984 | if (!poolName.empty()) { | |
985 | vinfolog("Setting pool %s server selection policy to %s", poolName, policy->name); | |
986 | } else { | |
987 | vinfolog("Setting default pool server selection policy to %s", policy->name); | |
988 | } | |
989 | pool->policy = policy; | |
990 | } | |
991 | ||
886e2cf2 | 992 | void addServerToPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server) |
22b2b326 | 993 | { |
886e2cf2 | 994 | std::shared_ptr<ServerPool> pool = createPoolIfNotExists(pools, poolName); |
c218b223 | 995 | if (!poolName.empty()) { |
8f4f5186 | 996 | vinfolog("Adding server to pool %s", poolName); |
c218b223 | 997 | } else { |
8f4f5186 | 998 | vinfolog("Adding server to default pool"); |
c218b223 | 999 | } |
a1b1a29d | 1000 | pool->addServer(server); |
886e2cf2 RG |
1001 | } |
1002 | ||
1003 | void removeServerFromPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server) | |
1004 | { | |
8f4f5186 | 1005 | std::shared_ptr<ServerPool> pool = getPool(pools, poolName); |
886e2cf2 | 1006 | |
c218b223 | 1007 | if (!poolName.empty()) { |
8f4f5186 | 1008 | vinfolog("Removing server from pool %s", poolName); |
c218b223 | 1009 | } |
1010 | else { | |
8f4f5186 | 1011 | vinfolog("Removing server from default pool"); |
c218b223 | 1012 | } |
886e2cf2 | 1013 | |
a1b1a29d | 1014 | pool->removeServer(server); |
886e2cf2 RG |
1015 | } |
1016 | ||
1017 | std::shared_ptr<ServerPool> getPool(const pools_t& pools, const std::string& poolName) | |
1018 | { | |
1019 | pools_t::const_iterator it = pools.find(poolName); | |
1020 | ||
1021 | if (it == pools.end()) { | |
1022 | throw std::out_of_range("No pool named " + poolName); | |
1023 | } | |
1024 | ||
1025 | return it->second; | |
1026 | } | |
1027 | ||
a1b1a29d | 1028 | NumberedServerVector getDownstreamCandidates(const pools_t& pools, const std::string& poolName) |
886e2cf2 RG |
1029 | { |
1030 | std::shared_ptr<ServerPool> pool = getPool(pools, poolName); | |
a1b1a29d | 1031 | return pool->getServers(); |
22b2b326 | 1032 | } |
c9262563 | 1033 | |
202c4ab9 | 1034 | static void spoofResponseFromString(DNSQuestion& dq, const string& spoofContent, bool raw) |
7791f83a RG |
1035 | { |
1036 | string result; | |
614239d0 | 1037 | |
202c4ab9 RG |
1038 | if (raw) { |
1039 | SpoofAction sa(spoofContent); | |
1040 | sa(&dq, &result); | |
1041 | } | |
1042 | else { | |
1043 | std::vector<std::string> addrs; | |
1044 | stringtok(addrs, spoofContent, " ,"); | |
614239d0 | 1045 | |
202c4ab9 | 1046 | if (addrs.size() == 1) { |
614239d0 | 1047 | try { |
202c4ab9 RG |
1048 | ComboAddress spoofAddr(spoofContent); |
1049 | SpoofAction sa({spoofAddr}); | |
1050 | sa(&dq, &result); | |
614239d0 | 1051 | } |
202c4ab9 RG |
1052 | catch(const PDNSException &e) { |
1053 | DNSName cname(spoofContent); | |
1054 | SpoofAction sa(cname); // CNAME then | |
1055 | sa(&dq, &result); | |
614239d0 | 1056 | } |
202c4ab9 RG |
1057 | } else { |
1058 | std::vector<ComboAddress> cas; | |
1059 | for (const auto& addr : addrs) { | |
1060 | try { | |
1061 | cas.push_back(ComboAddress(addr)); | |
1062 | } | |
1063 | catch (...) { | |
1064 | } | |
1065 | } | |
1066 | SpoofAction sa(cas); | |
1067 | sa(&dq, &result); | |
614239d0 | 1068 | } |
7791f83a RG |
1069 | } |
1070 | } | |
1071 | ||
2a28db86 RG |
1072 | bool processRulesResult(const DNSAction::Action& action, DNSQuestion& dq, std::string& ruleresult, bool& drop) |
1073 | { | |
1074 | switch(action) { | |
1075 | case DNSAction::Action::Allow: | |
1076 | return true; | |
1077 | break; | |
1078 | case DNSAction::Action::Drop: | |
1079 | ++g_stats.ruleDrop; | |
1080 | drop = true; | |
1081 | return true; | |
1082 | break; | |
1083 | case DNSAction::Action::Nxdomain: | |
1084 | dq.dh->rcode = RCode::NXDomain; | |
1085 | dq.dh->qr=true; | |
1086 | ++g_stats.ruleNXDomain; | |
1087 | return true; | |
1088 | break; | |
1089 | case DNSAction::Action::Refused: | |
1090 | dq.dh->rcode = RCode::Refused; | |
1091 | dq.dh->qr=true; | |
1092 | ++g_stats.ruleRefused; | |
1093 | return true; | |
1094 | break; | |
1095 | case DNSAction::Action::ServFail: | |
1096 | dq.dh->rcode = RCode::ServFail; | |
1097 | dq.dh->qr=true; | |
1098 | ++g_stats.ruleServFail; | |
1099 | return true; | |
1100 | break; | |
1101 | case DNSAction::Action::Spoof: | |
202c4ab9 RG |
1102 | spoofResponseFromString(dq, ruleresult, false); |
1103 | return true; | |
1104 | break; | |
1105 | case DNSAction::Action::SpoofRaw: | |
1106 | spoofResponseFromString(dq, ruleresult, true); | |
2a28db86 RG |
1107 | return true; |
1108 | break; | |
1109 | case DNSAction::Action::Truncate: | |
1110 | dq.dh->tc = true; | |
1111 | dq.dh->qr = true; | |
955b9377 RG |
1112 | dq.dh->ra = dq.dh->rd; |
1113 | dq.dh->aa = false; | |
1114 | dq.dh->ad = false; | |
2a28db86 RG |
1115 | return true; |
1116 | break; | |
1117 | case DNSAction::Action::HeaderModify: | |
1118 | return true; | |
1119 | break; | |
1120 | case DNSAction::Action::Pool: | |
1121 | dq.poolname=ruleresult; | |
1122 | return true; | |
1123 | break; | |
1124 | case DNSAction::Action::NoRecurse: | |
1125 | dq.dh->rd = false; | |
1126 | return true; | |
1127 | break; | |
1128 | /* non-terminal actions follow */ | |
1129 | case DNSAction::Action::Delay: | |
1130 | dq.delayMsec = static_cast<int>(pdns_stou(ruleresult)); // sorry | |
1131 | break; | |
1132 | case DNSAction::Action::None: | |
1133 | /* fall-through */ | |
1134 | case DNSAction::Action::NoOp: | |
1135 | break; | |
1136 | } | |
1137 | ||
1138 | /* false means that we don't stop the processing */ | |
1139 | return false; | |
1140 | } | |
1141 | ||
1142 | ||
1143 | static bool applyRulesToQuery(LocalHolders& holders, DNSQuestion& dq, const struct timespec& now) | |
e91084ce | 1144 | { |
4ab01344 | 1145 | g_rings.insertQuery(now, *dq.remote, *dq.qname, dq.qtype, dq.len, *dq.dh); |
e91084ce | 1146 | |
786e4d8c | 1147 | if(g_qcount.enabled) { |
348ef1c6 | 1148 | string qname = (*dq.qname).toLogString(); |
786e4d8c RS |
1149 | bool countQuery{true}; |
1150 | if(g_qcount.filter) { | |
1151 | std::lock_guard<std::mutex> lock(g_luamutex); | |
dd1a3034 | 1152 | std::tie (countQuery, qname) = g_qcount.filter(&dq); |
786e4d8c RS |
1153 | } |
1154 | ||
1155 | if(countQuery) { | |
1156 | WriteLock wl(&g_qcount.queryLock); | |
1157 | if(!g_qcount.records.count(qname)) { | |
1158 | g_qcount.records[qname] = 0; | |
1159 | } | |
1160 | g_qcount.records[qname]++; | |
1161 | } | |
1162 | } | |
1163 | ||
0beaa5c8 | 1164 | if(auto got = holders.dynNMGBlock->lookup(*dq.remote)) { |
701f690b | 1165 | auto updateBlockStats = [&got]() { |
cb167afd | 1166 | ++g_stats.dynBlocked; |
701f690b | 1167 | got->second.blocks++; |
1168 | }; | |
1169 | ||
e91084ce | 1170 | if(now < got->second.until) { |
7b925432 RG |
1171 | DNSAction::Action action = got->second.action; |
1172 | if (action == DNSAction::Action::None) { | |
1173 | action = g_dynBlockAction; | |
1174 | } | |
477c86a0 RG |
1175 | switch (action) { |
1176 | case DNSAction::Action::NoOp: | |
1177 | /* do nothing */ | |
1178 | break; | |
79ee8ff9 RG |
1179 | |
1180 | case DNSAction::Action::Nxdomain: | |
1181 | vinfolog("Query from %s turned into NXDomain because of dynamic block", dq.remote->toStringWithPort()); | |
1182 | updateBlockStats(); | |
1183 | ||
1184 | dq.dh->rcode = RCode::NXDomain; | |
1185 | dq.dh->qr=true; | |
1186 | return true; | |
1187 | ||
477c86a0 | 1188 | case DNSAction::Action::Refused: |
dd46e5e3 | 1189 | vinfolog("Query from %s refused because of dynamic block", dq.remote->toStringWithPort()); |
701f690b | 1190 | updateBlockStats(); |
8477236d | 1191 | |
dd46e5e3 | 1192 | dq.dh->rcode = RCode::Refused; |
79ee8ff9 | 1193 | dq.dh->qr = true; |
dd46e5e3 | 1194 | return true; |
477c86a0 RG |
1195 | |
1196 | case DNSAction::Action::Truncate: | |
8477236d | 1197 | if(!dq.tcp) { |
701f690b | 1198 | updateBlockStats(); |
8477236d | 1199 | vinfolog("Query from %s truncated because of dynamic block", dq.remote->toStringWithPort()); |
1200 | dq.dh->tc = true; | |
1201 | dq.dh->qr = true; | |
955b9377 RG |
1202 | dq.dh->ra = dq.dh->rd; |
1203 | dq.dh->aa = false; | |
1204 | dq.dh->ad = false; | |
8477236d | 1205 | return true; |
1206 | } | |
1207 | else { | |
348ef1c6 | 1208 | vinfolog("Query from %s for %s over TCP *not* truncated because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toLogString()); |
8477236d | 1209 | } |
477c86a0 | 1210 | break; |
3d60b39a | 1211 | case DNSAction::Action::NoRecurse: |
1212 | updateBlockStats(); | |
1213 | vinfolog("Query from %s setting rd=0 because of dynamic block", dq.remote->toStringWithPort()); | |
1214 | dq.dh->rd = false; | |
1215 | return true; | |
477c86a0 | 1216 | default: |
701f690b | 1217 | updateBlockStats(); |
dd46e5e3 RG |
1218 | vinfolog("Query from %s dropped because of dynamic block", dq.remote->toStringWithPort()); |
1219 | return false; | |
1220 | } | |
e91084ce RG |
1221 | } |
1222 | } | |
1223 | ||
0beaa5c8 | 1224 | if(auto got = holders.dynSMTBlock->lookup(*dq.qname)) { |
701f690b | 1225 | auto updateBlockStats = [&got]() { |
cb167afd | 1226 | ++g_stats.dynBlocked; |
701f690b | 1227 | got->blocks++; |
1228 | }; | |
1229 | ||
71c94675 | 1230 | if(now < got->until) { |
7b925432 RG |
1231 | DNSAction::Action action = got->action; |
1232 | if (action == DNSAction::Action::None) { | |
1233 | action = g_dynBlockAction; | |
1234 | } | |
477c86a0 RG |
1235 | switch (action) { |
1236 | case DNSAction::Action::NoOp: | |
1237 | /* do nothing */ | |
1238 | break; | |
79ee8ff9 | 1239 | case DNSAction::Action::Nxdomain: |
348ef1c6 | 1240 | vinfolog("Query from %s for %s turned into NXDomain because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toLogString()); |
79ee8ff9 RG |
1241 | updateBlockStats(); |
1242 | ||
1243 | dq.dh->rcode = RCode::NXDomain; | |
1244 | dq.dh->qr=true; | |
1245 | return true; | |
477c86a0 | 1246 | case DNSAction::Action::Refused: |
348ef1c6 | 1247 | vinfolog("Query from %s for %s refused because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toLogString()); |
701f690b | 1248 | updateBlockStats(); |
8477236d | 1249 | |
dd46e5e3 RG |
1250 | dq.dh->rcode = RCode::Refused; |
1251 | dq.dh->qr=true; | |
1252 | return true; | |
477c86a0 | 1253 | case DNSAction::Action::Truncate: |
8477236d | 1254 | if(!dq.tcp) { |
701f690b | 1255 | updateBlockStats(); |
8477236d | 1256 | |
348ef1c6 | 1257 | vinfolog("Query from %s for %s truncated because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toLogString()); |
8477236d | 1258 | dq.dh->tc = true; |
1259 | dq.dh->qr = true; | |
955b9377 RG |
1260 | dq.dh->ra = dq.dh->rd; |
1261 | dq.dh->aa = false; | |
1262 | dq.dh->ad = false; | |
8477236d | 1263 | return true; |
1264 | } | |
1265 | else { | |
348ef1c6 | 1266 | vinfolog("Query from %s for %s over TCP *not* truncated because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toLogString()); |
8477236d | 1267 | } |
477c86a0 | 1268 | break; |
3d60b39a | 1269 | case DNSAction::Action::NoRecurse: |
1270 | updateBlockStats(); | |
1271 | vinfolog("Query from %s setting rd=0 because of dynamic block", dq.remote->toStringWithPort()); | |
1272 | dq.dh->rd = false; | |
1273 | return true; | |
477c86a0 | 1274 | default: |
701f690b | 1275 | updateBlockStats(); |
348ef1c6 | 1276 | vinfolog("Query from %s for %s dropped because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toLogString()); |
dd46e5e3 RG |
1277 | return false; |
1278 | } | |
71c94675 | 1279 | } |
1280 | } | |
1281 | ||
e91084ce RG |
1282 | DNSAction::Action action=DNSAction::Action::None; |
1283 | string ruleresult; | |
2a28db86 | 1284 | bool drop = false; |
0beaa5c8 | 1285 | for(const auto& lr : *holders.rulactions) { |
4d5959e6 RG |
1286 | if(lr.d_rule->matches(&dq)) { |
1287 | lr.d_rule->d_matches++; | |
1288 | action=(*lr.d_action)(&dq, &ruleresult); | |
2a28db86 | 1289 | if (processRulesResult(action, dq, ruleresult, drop)) { |
3d60b39a | 1290 | break; |
e91084ce RG |
1291 | } |
1292 | } | |
1293 | } | |
1294 | ||
2a28db86 RG |
1295 | if (drop) { |
1296 | return false; | |
1297 | } | |
1298 | ||
e91084ce RG |
1299 | return true; |
1300 | } | |
1301 | ||
fbf14b03 | 1302 | ssize_t udpClientSendRequestToBackend(const std::shared_ptr<DownstreamState>& ss, const int sd, const char* request, const size_t requestLen, bool healthCheck) |
fbe2a2e0 | 1303 | { |
b58f08e5 RG |
1304 | ssize_t result; |
1305 | ||
fbe2a2e0 | 1306 | if (ss->sourceItf == 0) { |
b58f08e5 RG |
1307 | result = send(sd, request, requestLen, 0); |
1308 | } | |
1309 | else { | |
1310 | struct msghdr msgh; | |
1311 | struct iovec iov; | |
7bec330a | 1312 | cmsgbuf_aligned cbuf; |
a2353842 | 1313 | ComboAddress remote(ss->remote); |
7bec330a OM |
1314 | fillMSGHdr(&msgh, &iov, &cbuf, sizeof(cbuf), const_cast<char*>(request), requestLen, &remote); |
1315 | addCMsgSrcAddr(&msgh, &cbuf, &ss->sourceAddr, ss->sourceItf); | |
b58f08e5 | 1316 | result = sendmsg(sd, &msgh, 0); |
fbe2a2e0 RG |
1317 | } |
1318 | ||
b58f08e5 RG |
1319 | if (result == -1) { |
1320 | int savederrno = errno; | |
1321 | vinfolog("Error sending request to backend %s: %d", ss->remote.toStringWithPort(), savederrno); | |
1322 | ||
1323 | /* This might sound silly, but on Linux send() might fail with EINVAL | |
1b126225 RG |
1324 | if the interface the socket was bound to doesn't exist anymore. |
1325 | We don't want to reconnect the real socket if the healthcheck failed, | |
1326 | because it's not using the same socket. | |
1327 | */ | |
1328 | if (!healthCheck && (savederrno == EINVAL || savederrno == ENODEV)) { | |
b58f08e5 RG |
1329 | ss->reconnect(); |
1330 | } | |
fbe2a2e0 RG |
1331 | } |
1332 | ||
b58f08e5 | 1333 | return result; |
fbe2a2e0 RG |
1334 | } |
1335 | ||
0beaa5c8 | 1336 | static bool isUDPQueryAcceptable(ClientState& cs, LocalHolders& holders, const struct msghdr* msgh, const ComboAddress& remote, ComboAddress& dest) |
24d5cb00 | 1337 | { |
0beaa5c8 RG |
1338 | if (msgh->msg_flags & MSG_TRUNC) { |
1339 | /* message was too large for our buffer */ | |
1340 | vinfolog("Dropping message too large for our buffer"); | |
cb167afd | 1341 | ++g_stats.nonCompliantQueries; |
0beaa5c8 RG |
1342 | return false; |
1343 | } | |
a4652d55 | 1344 | |
0beaa5c8 RG |
1345 | if(!holders.acl->match(remote)) { |
1346 | vinfolog("Query from %s dropped because of ACL", remote.toStringWithPort()); | |
cb167afd | 1347 | ++g_stats.aclDrops; |
0beaa5c8 | 1348 | return false; |
2b3eefc3 | 1349 | } |
2b3eefc3 | 1350 | |
0beaa5c8 | 1351 | cs.queries++; |
cb167afd | 1352 | ++g_stats.queries; |
2b3eefc3 | 1353 | |
0beaa5c8 RG |
1354 | if (HarvestDestinationAddress(msgh, &dest)) { |
1355 | /* we don't get the port, only the address */ | |
1356 | dest.sin4.sin_port = cs.local.sin4.sin_port; | |
1357 | } | |
1358 | else { | |
1359 | dest.sin4.sin_family = 0; | |
2b3eefc3 | 1360 | } |
549d63c9 | 1361 | |
0beaa5c8 RG |
1362 | return true; |
1363 | } | |
1364 | ||
4ab01344 | 1365 | boost::optional<std::vector<uint8_t>> checkDNSCryptQuery(const ClientState& cs, const char* query, uint16_t& len, std::shared_ptr<DNSCryptQuery>& dnsCryptQuery, time_t now, bool tcp) |
0beaa5c8 RG |
1366 | { |
1367 | if (cs.dnscryptCtx) { | |
7129b5c4 | 1368 | #ifdef HAVE_DNSCRYPT |
0beaa5c8 RG |
1369 | vector<uint8_t> response; |
1370 | uint16_t decryptedQueryLen = 0; | |
2b3eefc3 | 1371 | |
43234e76 | 1372 | dnsCryptQuery = std::make_shared<DNSCryptQuery>(cs.dnscryptCtx); |
0beaa5c8 | 1373 | |
4ab01344 | 1374 | bool decrypted = handleDNSCryptQuery(const_cast<char*>(query), len, dnsCryptQuery, &decryptedQueryLen, tcp, now, response); |
0beaa5c8 RG |
1375 | |
1376 | if (!decrypted) { | |
1377 | if (response.size() > 0) { | |
4ab01344 | 1378 | return response; |
2b3eefc3 | 1379 | } |
4ab01344 | 1380 | throw std::runtime_error("Unable to decrypt DNSCrypt query, dropping."); |
0beaa5c8 | 1381 | } |
2b3eefc3 | 1382 | |
0beaa5c8 | 1383 | len = decryptedQueryLen; |
7129b5c4 | 1384 | #endif /* HAVE_DNSCRYPT */ |
0beaa5c8 | 1385 | } |
4ab01344 | 1386 | return boost::none; |
0beaa5c8 | 1387 | } |
2b3eefc3 | 1388 | |
0beaa5c8 RG |
1389 | bool checkQueryHeaders(const struct dnsheader* dh) |
1390 | { | |
1391 | if (dh->qr) { // don't respond to responses | |
cb167afd | 1392 | ++g_stats.nonCompliantQueries; |
0beaa5c8 RG |
1393 | return false; |
1394 | } | |
2b3eefc3 | 1395 | |
0beaa5c8 | 1396 | if (dh->qdcount == 0) { |
cb167afd | 1397 | ++g_stats.emptyQueries; |
0beaa5c8 RG |
1398 | return false; |
1399 | } | |
0ba5eecf | 1400 | |
0beaa5c8 | 1401 | if (dh->rd) { |
cb167afd | 1402 | ++g_stats.rdQueries; |
0beaa5c8 | 1403 | } |
e91084ce | 1404 | |
0beaa5c8 RG |
1405 | return true; |
1406 | } | |
963bef8d | 1407 | |
0beaa5c8 | 1408 | #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) |
e59405cd | 1409 | static void queueResponse(const ClientState& cs, const char* response, uint16_t responseLen, const ComboAddress& dest, const ComboAddress& remote, struct mmsghdr& outMsg, struct iovec* iov, cmsgbuf_aligned* cbuf) |
0beaa5c8 RG |
1410 | { |
1411 | outMsg.msg_len = 0; | |
1412 | fillMSGHdr(&outMsg.msg_hdr, iov, nullptr, 0, const_cast<char*>(response), responseLen, const_cast<ComboAddress*>(&remote)); | |
11e1e08b | 1413 | |
0beaa5c8 RG |
1414 | if (dest.sin4.sin_family == 0) { |
1415 | outMsg.msg_hdr.msg_control = nullptr; | |
1416 | } | |
1417 | else { | |
1418 | addCMsgSrcAddr(&outMsg.msg_hdr, cbuf, &dest, 0); | |
1419 | } | |
1420 | } | |
1421 | #endif /* defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) */ | |
43eeadc1 | 1422 | |
3e425868 RG |
1423 | /* self-generated responses or cache hits */ |
1424 | static bool prepareOutgoingResponse(LocalHolders& holders, ClientState& cs, DNSQuestion& dq, bool cacheHit) | |
54aaa82b | 1425 | { |
3e425868 | 1426 | DNSResponse dr(dq.qname, dq.qtype, dq.qclass, dq.consumed, dq.local, dq.remote, reinterpret_cast<dnsheader*>(dq.dh), dq.size, dq.len, dq.tcp, dq.queryTime); |
4ab01344 | 1427 | |
54aaa82b | 1428 | #ifdef HAVE_PROTOBUF |
1429 | dr.uniqueId = dq.uniqueId; | |
1430 | #endif | |
1431 | dr.qTag = dq.qTag; | |
3e425868 | 1432 | dr.delayMsec = dq.delayMsec; |
54aaa82b | 1433 | |
3e425868 RG |
1434 | if (!applyRulesToResponse(cacheHit ? holders.cacheHitRespRulactions : holders.selfAnsweredRespRulactions, dr)) { |
1435 | return false; | |
54aaa82b | 1436 | } |
1437 | ||
3e425868 RG |
1438 | /* in case a rule changed it */ |
1439 | dq.delayMsec = dr.delayMsec; | |
1440 | ||
54aaa82b | 1441 | #ifdef HAVE_DNSCRYPT |
7129b5c4 | 1442 | if (!cs.muted) { |
3e425868 RG |
1443 | if (!encryptResponse(reinterpret_cast<char*>(dq.dh), &dq.len, dq.size, dq.tcp, dq.dnsCryptQuery, nullptr, nullptr)) { |
1444 | return false; | |
54aaa82b | 1445 | } |
54aaa82b | 1446 | } |
7129b5c4 | 1447 | #endif /* HAVE_DNSCRYPT */ |
54aaa82b | 1448 | |
389d903a RG |
1449 | if (cacheHit) { |
1450 | ++g_stats.cacheHits; | |
1451 | } | |
3e425868 | 1452 | |
61d10a4d MH |
1453 | switch (dr.dh->rcode) { |
1454 | case RCode::NXDomain: | |
1455 | ++g_stats.frontendNXDomain; | |
1456 | break; | |
1457 | case RCode::ServFail: | |
1458 | ++g_stats.frontendServFail; | |
1459 | break; | |
1460 | case RCode::NoError: | |
1461 | ++g_stats.frontendNoError; | |
1462 | break; | |
1463 | } | |
3e425868 | 1464 | |
54aaa82b | 1465 | doLatencyStats(0); // we're not going to measure this |
3e425868 | 1466 | return true; |
54aaa82b | 1467 | } |
1468 | ||
3e425868 | 1469 | ProcessQueryResult processQuery(DNSQuestion& dq, ClientState& cs, LocalHolders& holders, std::shared_ptr<DownstreamState>& selectedBackend) |
0beaa5c8 | 1470 | { |
4ab01344 | 1471 | const uint16_t queryId = ntohs(dq.dh->id); |
2b3eefc3 | 1472 | |
0beaa5c8 | 1473 | try { |
43234e76 RG |
1474 | /* we need an accurate ("real") value for the response and |
1475 | to store into the IDS, but not for insertion into the | |
1476 | rings for example */ | |
43234e76 RG |
1477 | struct timespec now; |
1478 | gettime(&now); | |
2efd427d | 1479 | |
2a28db86 | 1480 | if (!applyRulesToQuery(holders, dq, now)) { |
3e425868 | 1481 | return ProcessQueryResult::Drop; |
0beaa5c8 | 1482 | } |
b63add03 | 1483 | |
0beaa5c8 | 1484 | if(dq.dh->qr) { // something turned it into a response |
4ab01344 | 1485 | fixUpQueryTurnedResponse(dq, dq.origFlags); |
0beaa5c8 | 1486 | |
3e425868 RG |
1487 | if (!prepareOutgoingResponse(holders, cs, dq, false)) { |
1488 | return ProcessQueryResult::Drop; | |
22b2b326 | 1489 | } |
5f504638 | 1490 | |
3e425868 | 1491 | ++g_stats.selfAnswered; |
7fc95193 | 1492 | ++cs.responses; |
3e425868 | 1493 | return ProcessQueryResult::SendAnswer; |
0beaa5c8 RG |
1494 | } |
1495 | ||
2a28db86 | 1496 | std::shared_ptr<ServerPool> serverPool = getPool(*holders.pools, dq.poolname); |
4ab01344 | 1497 | dq.packetCache = serverPool->packetCache; |
a1b1a29d | 1498 | auto policy = *(holders.policy); |
0beaa5c8 | 1499 | if (serverPool->policy != nullptr) { |
a1b1a29d | 1500 | policy = *(serverPool->policy); |
0beaa5c8 | 1501 | } |
a1b1a29d RG |
1502 | auto servers = serverPool->getServers(); |
1503 | if (policy.isLua) { | |
0beaa5c8 | 1504 | std::lock_guard<std::mutex> lock(g_luamutex); |
3e425868 | 1505 | selectedBackend = policy.policy(servers, &dq); |
a1b1a29d RG |
1506 | } |
1507 | else { | |
3e425868 | 1508 | selectedBackend = policy.policy(servers, &dq); |
0beaa5c8 | 1509 | } |
228e4fe8 | 1510 | |
9837850d | 1511 | uint16_t cachedResponseSize = dq.size; |
3e425868 | 1512 | uint32_t allowExpired = selectedBackend ? 0 : g_staleCacheEntriesTTL; |
9837850d | 1513 | |
4ab01344 RG |
1514 | if (dq.packetCache && !dq.skipCache) { |
1515 | dq.dnssecOK = (getEDNSZ(dq) & EDNS_HEADER_FLAG_DO); | |
1ef18cab | 1516 | } |
1517 | ||
3e425868 | 1518 | if (dq.useECS && ((selectedBackend && selectedBackend->useECS) || (!selectedBackend && serverPool->getECS()))) { |
389d903a | 1519 | // we special case our cache in case a downstream explicitly gave us a universally valid response with a 0 scope |
2d0cefab RG |
1520 | // we need ECS parsing (parseECS) to be true so we can be sure that the initial incoming query did not have an existing |
1521 | // ECS option, which would make it unsuitable for the zero-scope feature. | |
3e425868 | 1522 | if (dq.packetCache && !dq.skipCache && (!selectedBackend || !selectedBackend->disableZeroScope) && dq.packetCache->isECSParsingEnabled()) { |
4ab01344 | 1523 | if (dq.packetCache->get(dq, dq.consumed, dq.dh->id, reinterpret_cast<char*>(dq.dh), &cachedResponseSize, &dq.cacheKeyNoECS, dq.subnet, dq.dnssecOK, allowExpired)) { |
3e425868 RG |
1524 | dq.len = cachedResponseSize; |
1525 | ||
1526 | if (!prepareOutgoingResponse(holders, cs, dq, true)) { | |
1527 | return ProcessQueryResult::Drop; | |
1528 | } | |
1529 | ||
1530 | return ProcessQueryResult::SendAnswer; | |
389d903a RG |
1531 | } |
1532 | ||
4ab01344 | 1533 | if (!dq.subnet) { |
389d903a | 1534 | /* there was no existing ECS on the query, enable the zero-scope feature */ |
4ab01344 | 1535 | dq.useZeroScope = true; |
389d903a | 1536 | } |
9837850d | 1537 | } |
389d903a | 1538 | |
be90d6bd | 1539 | if (!handleEDNSClientSubnet(dq, dq.ednsAdded, dq.ecsAdded, g_preserveTrailingData)) { |
4ab01344 | 1540 | vinfolog("Dropping query from %s because we couldn't insert the ECS value", dq.remote->toStringWithPort()); |
3e425868 | 1541 | return ProcessQueryResult::Drop; |
886e2cf2 | 1542 | } |
0beaa5c8 | 1543 | } |
886e2cf2 | 1544 | |
4ab01344 RG |
1545 | if (dq.packetCache && !dq.skipCache) { |
1546 | if (dq.packetCache->get(dq, dq.consumed, dq.dh->id, reinterpret_cast<char*>(dq.dh), &cachedResponseSize, &dq.cacheKey, dq.subnet, dq.dnssecOK, allowExpired)) { | |
3e425868 RG |
1547 | dq.len = cachedResponseSize; |
1548 | ||
1549 | if (!prepareOutgoingResponse(holders, cs, dq, true)) { | |
1550 | return ProcessQueryResult::Drop; | |
1551 | } | |
1552 | ||
1553 | return ProcessQueryResult::SendAnswer; | |
886e2cf2 | 1554 | } |
cb167afd | 1555 | ++g_stats.cacheMisses; |
0beaa5c8 | 1556 | } |
886e2cf2 | 1557 | |
3e425868 | 1558 | if(!selectedBackend) { |
cb167afd | 1559 | ++g_stats.noPolicy; |
26a3cdb7 | 1560 | |
348ef1c6 | 1561 | vinfolog("%s query for %s|%s from %s, no policy applied", g_servFailOnNoPolicy ? "ServFailed" : "Dropped", dq.qname->toLogString(), QType(dq.qtype).getName(), dq.remote->toStringWithPort()); |
3e425868 | 1562 | if (g_servFailOnNoPolicy) { |
4ab01344 | 1563 | restoreFlags(dq.dh, dq.origFlags); |
26a3cdb7 | 1564 | |
0beaa5c8 RG |
1565 | dq.dh->rcode = RCode::ServFail; |
1566 | dq.dh->qr = true; | |
26a3cdb7 | 1567 | |
3e425868 RG |
1568 | if (!prepareOutgoingResponse(holders, cs, dq, false)) { |
1569 | return ProcessQueryResult::Drop; | |
1570 | } | |
be6c318f | 1571 | // no response-only statistics counter to update. |
3e425868 | 1572 | return ProcessQueryResult::SendAnswer; |
1ea747c0 | 1573 | } |
3e425868 RG |
1574 | |
1575 | return ProcessQueryResult::Drop; | |
0beaa5c8 | 1576 | } |
1ea747c0 | 1577 | |
3e425868 RG |
1578 | if (dq.addXPF && selectedBackend->xpfRRCode != 0) { |
1579 | addXPF(dq, selectedBackend->xpfRRCode, g_preserveTrailingData); | |
5cc8371b RG |
1580 | } |
1581 | ||
3e425868 RG |
1582 | selectedBackend->queries++; |
1583 | return ProcessQueryResult::PassToBackend; | |
4ab01344 RG |
1584 | } |
1585 | catch(const std::exception& e){ | |
1586 | vinfolog("Got an error while parsing a %s query from %s, id %d: %s", (dq.tcp ? "TCP" : "UDP"), dq.remote->toStringWithPort(), queryId, e.what()); | |
4ab01344 | 1587 | } |
3e425868 | 1588 | return ProcessQueryResult::Drop; |
4ab01344 RG |
1589 | } |
1590 | ||
7bec330a | 1591 | static void processUDPQuery(ClientState& cs, LocalHolders& holders, const struct msghdr* msgh, const ComboAddress& remote, ComboAddress& dest, char* query, uint16_t len, size_t queryBufferSize, struct mmsghdr* responsesVect, unsigned int* queuedResponses, struct iovec* respIOV, cmsgbuf_aligned* respCBuf) |
4ab01344 RG |
1592 | { |
1593 | assert(responsesVect == nullptr || (queuedResponses != nullptr && respIOV != nullptr && respCBuf != nullptr)); | |
1594 | uint16_t queryId = 0; | |
1595 | ||
1596 | try { | |
1597 | if (!isUDPQueryAcceptable(cs, holders, msgh, remote, dest)) { | |
1598 | return; | |
1599 | } | |
1600 | ||
1601 | /* we need an accurate ("real") value for the response and | |
1602 | to store into the IDS, but not for insertion into the | |
1603 | rings for example */ | |
1604 | struct timespec queryRealTime; | |
4ab01344 RG |
1605 | gettime(&queryRealTime, true); |
1606 | ||
1607 | std::shared_ptr<DNSCryptQuery> dnsCryptQuery = nullptr; | |
4ab01344 RG |
1608 | auto dnsCryptResponse = checkDNSCryptQuery(cs, query, len, dnsCryptQuery, queryRealTime.tv_sec, false); |
1609 | if (dnsCryptResponse) { | |
1610 | sendUDPResponse(cs.udpFD, reinterpret_cast<char*>(dnsCryptResponse->data()), static_cast<uint16_t>(dnsCryptResponse->size()), 0, dest, remote); | |
1611 | return; | |
1612 | } | |
4ab01344 RG |
1613 | |
1614 | struct dnsheader* dh = reinterpret_cast<struct dnsheader*>(query); | |
1615 | queryId = ntohs(dh->id); | |
1616 | ||
1617 | if (!checkQueryHeaders(dh)) { | |
1618 | return; | |
1619 | } | |
1620 | ||
1621 | uint16_t qtype, qclass; | |
1622 | unsigned int consumed = 0; | |
1623 | DNSName qname(query, len, sizeof(dnsheader), false, &qtype, &qclass, &consumed); | |
1624 | DNSQuestion dq(&qname, qtype, qclass, consumed, dest.sin4.sin_family != 0 ? &dest : &cs.local, &remote, dh, queryBufferSize, len, false, &queryRealTime); | |
1625 | dq.dnsCryptQuery = std::move(dnsCryptQuery); | |
3e425868 RG |
1626 | std::shared_ptr<DownstreamState> ss{nullptr}; |
1627 | auto result = processQuery(dq, cs, holders, ss); | |
4ab01344 | 1628 | |
3e425868 RG |
1629 | if (result == ProcessQueryResult::Drop) { |
1630 | return; | |
1631 | } | |
1632 | ||
1633 | if (result == ProcessQueryResult::SendAnswer) { | |
4ab01344 | 1634 | #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) |
3e425868 RG |
1635 | if (dq.delayMsec == 0 && responsesVect != nullptr) { |
1636 | queueResponse(cs, reinterpret_cast<char*>(dq.dh), dq.len, *dq.local, *dq.remote, responsesVect[*queuedResponses], respIOV, respCBuf); | |
4ab01344 RG |
1637 | (*queuedResponses)++; |
1638 | return; | |
1639 | } | |
1640 | #endif /* defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) */ | |
6d192f51 RG |
1641 | /* we use dest, always, because we don't want to use the listening address to send a response since it could be 0.0.0.0 */ |
1642 | sendUDPResponse(cs.udpFD, reinterpret_cast<char*>(dq.dh), dq.len, dq.delayMsec, dest, *dq.remote); | |
3e425868 RG |
1643 | return; |
1644 | } | |
4ab01344 | 1645 | |
3e425868 | 1646 | if (result != ProcessQueryResult::PassToBackend || ss == nullptr) { |
4ab01344 RG |
1647 | return; |
1648 | } | |
11e1e08b | 1649 | |
0beaa5c8 RG |
1650 | unsigned int idOffset = (ss->idOffset++) % ss->idStates.size(); |
1651 | IDState* ids = &ss->idStates[idOffset]; | |
1652 | ids->age = 0; | |
0956c5c5 | 1653 | DOHUnit* du = nullptr; |
c9ba8478 | 1654 | |
9bd1a882 RG |
1655 | /* that means that the state was in use, possibly with an allocated |
1656 | DOHUnit that we will need to handle, but we can't touch it before | |
1657 | confirming that we now own this state */ | |
311f19d5 | 1658 | if (ids->isInUse()) { |
0956c5c5 RG |
1659 | du = ids->du; |
1660 | } | |
c9ba8478 | 1661 | |
a9489723 | 1662 | /* we atomically replace the value, we now own this state */ |
311f19d5 RG |
1663 | if (!ids->markAsUsed()) { |
1664 | /* the state was not in use. | |
9bd1a882 | 1665 | we reset 'du' because it might have still been in use when we read it. */ |
0956c5c5 | 1666 | du = nullptr; |
fbf14b03 | 1667 | ++ss->outstanding; |
71b86bd8 | 1668 | } |
0beaa5c8 | 1669 | else { |
9bd1a882 RG |
1670 | /* we are reusing a state, no change in outstanding but if there was an existing DOHUnit we need |
1671 | to handle it because it's about to be overwritten. */ | |
a9489723 | 1672 | ids->du = nullptr; |
fbf14b03 | 1673 | ++ss->reuseds; |
cb167afd | 1674 | ++g_stats.downstreamTimeouts; |
0956c5c5 | 1675 | handleDOHTimeout(du); |
0beaa5c8 | 1676 | } |
0e41337b | 1677 | |
0beaa5c8 | 1678 | ids->cs = &cs; |
a9489723 | 1679 | ids->origFD = cs.udpFD; |
0beaa5c8 | 1680 | ids->origID = dh->id; |
d0ae6360 | 1681 | setIDStateFromDNSQuestion(*ids, dq, std::move(qname)); |
0beaa5c8 RG |
1682 | |
1683 | /* If we couldn't harvest the real dest addr, still | |
1684 | write down the listening addr since it will be useful | |
1685 | (especially if it's not an 'any' one). | |
1686 | We need to keep track of which one it is since we may | |
1687 | want to use the real but not the listening addr to reply. | |
1688 | */ | |
1689 | if (dest.sin4.sin_family != 0) { | |
1690 | ids->origDest = dest; | |
1691 | ids->destHarvested = true; | |
1692 | } | |
1693 | else { | |
1694 | ids->origDest = cs.local; | |
1695 | ids->destHarvested = false; | |
1696 | } | |
7129b5c4 | 1697 | |
0beaa5c8 | 1698 | dh->id = idOffset; |
ca404e94 | 1699 | |
38069e7e | 1700 | int fd = pickBackendSocketForSending(ss); |
150105a2 | 1701 | ssize_t ret = udpClientSendRequestToBackend(ss, fd, query, dq.len); |
11e1e08b | 1702 | |
0beaa5c8 | 1703 | if(ret < 0) { |
fbf14b03 | 1704 | ++ss->sendErrors; |
cb167afd | 1705 | ++g_stats.downstreamSendErrors; |
0beaa5c8 | 1706 | } |
ca404e94 | 1707 | |
348ef1c6 | 1708 | vinfolog("Got query for %s|%s from %s, relayed to %s", ids->qname.toLogString(), QType(ids->qtype).getName(), remote.toStringWithPort(), ss->getName()); |
0beaa5c8 RG |
1709 | } |
1710 | catch(const std::exception& e){ | |
1711 | vinfolog("Got an error in UDP question thread while parsing a query from %s, id %d: %s", remote.toStringWithPort(), queryId, e.what()); | |
1712 | } | |
1713 | } | |
1714 | ||
1715 | #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) | |
1716 | static void MultipleMessagesUDPClientThread(ClientState* cs, LocalHolders& holders) | |
1717 | { | |
1718 | struct MMReceiver | |
1719 | { | |
8179b6d6 | 1720 | char packet[s_maxPacketCacheEntrySize]; |
0beaa5c8 RG |
1721 | ComboAddress remote; |
1722 | ComboAddress dest; | |
1723 | struct iovec iov; | |
392966bb OM |
1724 | /* used by HarvestDestinationAddress */ |
1725 | cmsgbuf_aligned cbuf; | |
0beaa5c8 RG |
1726 | }; |
1727 | const size_t vectSize = g_udpVectorSize; | |
1728 | /* the actual buffer is larger because: | |
1729 | - we may have to add EDNS and/or ECS | |
1730 | - we use it for self-generated responses (from rule or cache) | |
1731 | but we only accept incoming payloads up to that size | |
1732 | */ | |
1733 | static_assert(s_udpIncomingBufferSize <= sizeof(MMReceiver::packet), "the incoming buffer size should not be larger than sizeof(MMReceiver::packet)"); | |
1734 | ||
1735 | auto recvData = std::unique_ptr<MMReceiver[]>(new MMReceiver[vectSize]); | |
1736 | auto msgVec = std::unique_ptr<struct mmsghdr[]>(new struct mmsghdr[vectSize]); | |
1737 | auto outMsgVec = std::unique_ptr<struct mmsghdr[]>(new struct mmsghdr[vectSize]); | |
1738 | ||
1739 | /* initialize the structures needed to receive our messages */ | |
1740 | for (size_t idx = 0; idx < vectSize; idx++) { | |
1741 | recvData[idx].remote.sin4.sin_family = cs->local.sin4.sin_family; | |
7bec330a | 1742 | fillMSGHdr(&msgVec[idx].msg_hdr, &recvData[idx].iov, &recvData[idx].cbuf, sizeof(recvData[idx].cbuf), recvData[idx].packet, s_udpIncomingBufferSize, &recvData[idx].remote); |
0beaa5c8 RG |
1743 | } |
1744 | ||
1745 | /* go now */ | |
1746 | for(;;) { | |
1747 | ||
1748 | /* reset the IO vector, since it's also used to send the vector of responses | |
1749 | to avoid having to copy the data around */ | |
1750 | for (size_t idx = 0; idx < vectSize; idx++) { | |
1751 | recvData[idx].iov.iov_base = recvData[idx].packet; | |
1752 | recvData[idx].iov.iov_len = sizeof(recvData[idx].packet); | |
5f504638 | 1753 | } |
0beaa5c8 RG |
1754 | |
1755 | /* block until we have at least one message ready, but return | |
1756 | as many as possible to save the syscall costs */ | |
1757 | int msgsGot = recvmmsg(cs->udpFD, msgVec.get(), vectSize, MSG_WAITFORONE | MSG_TRUNC, nullptr); | |
1758 | ||
1759 | if (msgsGot <= 0) { | |
a702a96c | 1760 | vinfolog("Getting UDP messages via recvmmsg() failed with: %s", stringerror()); |
0beaa5c8 | 1761 | continue; |
773470ca | 1762 | } |
0beaa5c8 RG |
1763 | |
1764 | unsigned int msgsToSend = 0; | |
1765 | ||
1766 | /* process the received messages */ | |
1767 | for (int msgIdx = 0; msgIdx < msgsGot; msgIdx++) { | |
1768 | const struct msghdr* msgh = &msgVec[msgIdx].msg_hdr; | |
1769 | unsigned int got = msgVec[msgIdx].msg_len; | |
1770 | const ComboAddress& remote = recvData[msgIdx].remote; | |
1771 | ||
33d01bbd | 1772 | if (static_cast<size_t>(got) < sizeof(struct dnsheader)) { |
3e425868 | 1773 | ++g_stats.nonCompliantQueries; |
0beaa5c8 RG |
1774 | continue; |
1775 | } | |
1776 | ||
7bec330a | 1777 | processUDPQuery(*cs, holders, msgh, remote, recvData[msgIdx].dest, recvData[msgIdx].packet, static_cast<uint16_t>(got), sizeof(recvData[msgIdx].packet), outMsgVec.get(), &msgsToSend, &recvData[msgIdx].iov, &recvData[msgIdx].cbuf); |
0beaa5c8 | 1778 | |
2b3eefc3 | 1779 | } |
0beaa5c8 RG |
1780 | |
1781 | /* immediate (not delayed or sent to a backend) responses (mostly from a rule, dynamic block | |
1782 | or the cache) can be sent in batch too */ | |
1783 | ||
1784 | if (msgsToSend > 0 && msgsToSend <= static_cast<unsigned int>(msgsGot)) { | |
1785 | int sent = sendmmsg(cs->udpFD, outMsgVec.get(), msgsToSend, 0); | |
1786 | ||
2b3eefc3 | 1787 | if (sent < 0 || static_cast<unsigned int>(sent) != msgsToSend) { |
a702a96c | 1788 | vinfolog("Error sending responses with sendmmsg() (%d on %u): %s", sent, msgsToSend, stringerror()); |
2b3eefc3 | 1789 | } |
2b3eefc3 | 1790 | } |
0beaa5c8 | 1791 | |
24d5cb00 | 1792 | } |
0beaa5c8 RG |
1793 | } |
1794 | #endif /* defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) */ | |
1795 | ||
1796 | // listens to incoming queries, sends out to downstream servers, noting the intended return path | |
9b73b71c | 1797 | static void udpClientThread(ClientState* cs) |
0beaa5c8 RG |
1798 | try |
1799 | { | |
519f5484 | 1800 | setThreadName("dnsdist/udpClie"); |
0beaa5c8 RG |
1801 | LocalHolders holders; |
1802 | ||
1803 | #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) | |
1804 | if (g_udpVectorSize > 1) { | |
1805 | MultipleMessagesUDPClientThread(cs, holders); | |
1806 | ||
1807 | } | |
1808 | else | |
1809 | #endif /* defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) */ | |
1810 | { | |
8179b6d6 | 1811 | char packet[s_maxPacketCacheEntrySize]; |
0beaa5c8 RG |
1812 | /* the actual buffer is larger because: |
1813 | - we may have to add EDNS and/or ECS | |
1814 | - we use it for self-generated responses (from rule or cache) | |
1815 | but we only accept incoming payloads up to that size | |
1816 | */ | |
1817 | static_assert(s_udpIncomingBufferSize <= sizeof(packet), "the incoming buffer size should not be larger than sizeof(MMReceiver::packet)"); | |
1818 | struct msghdr msgh; | |
1819 | struct iovec iov; | |
1820 | /* used by HarvestDestinationAddress */ | |
7bec330a | 1821 | cmsgbuf_aligned cbuf; |
0beaa5c8 RG |
1822 | |
1823 | ComboAddress remote; | |
1824 | ComboAddress dest; | |
1825 | remote.sin4.sin_family = cs->local.sin4.sin_family; | |
be7dec02 | 1826 | fillMSGHdr(&msgh, &iov, &cbuf, sizeof(cbuf), packet, s_udpIncomingBufferSize, &remote); |
0beaa5c8 RG |
1827 | |
1828 | for(;;) { | |
1829 | ssize_t got = recvmsg(cs->udpFD, &msgh, 0); | |
1830 | ||
1831 | if (got < 0 || static_cast<size_t>(got) < sizeof(struct dnsheader)) { | |
cb167afd | 1832 | ++g_stats.nonCompliantQueries; |
0beaa5c8 RG |
1833 | continue; |
1834 | } | |
1835 | ||
be7dec02 | 1836 | processUDPQuery(*cs, holders, &msgh, remote, dest, packet, static_cast<uint16_t>(got), sizeof(packet), nullptr, nullptr, nullptr, nullptr); |
0beaa5c8 RG |
1837 | } |
1838 | } | |
24d5cb00 | 1839 | } |
2b3eefc3 | 1840 | catch(const std::exception &e) |
a4652d55 | 1841 | { |
1842 | errlog("UDP client thread died because of exception: %s", e.what()); | |
a4652d55 | 1843 | } |
2b3eefc3 | 1844 | catch(const PDNSException &e) |
a4652d55 | 1845 | { |
1846 | errlog("UDP client thread died because of PowerDNS exception: %s", e.reason); | |
a4652d55 | 1847 | } |
1848 | catch(...) | |
1849 | { | |
1850 | errlog("UDP client thread died because of an exception: %s", "unknown"); | |
a4652d55 | 1851 | } |
24d5cb00 | 1852 | |
555970c9 RG |
1853 | uint16_t getRandomDNSID() |
1854 | { | |
1855 | #ifdef HAVE_LIBSODIUM | |
dd9c8246 | 1856 | return randombytes_uniform(65536); |
555970c9 RG |
1857 | #else |
1858 | return (random() % 65536); | |
1859 | #endif | |
1860 | } | |
1861 | ||
6c1ca990 | 1862 | uint64_t g_maxTCPClientThreads{10}; |
886e2cf2 | 1863 | std::atomic<uint16_t> g_cacheCleaningDelay{60}; |
f65ea0c2 | 1864 | std::atomic<uint16_t> g_cacheCleaningPercentage{100}; |
e41f8165 | 1865 | |
9b73b71c | 1866 | void maintThread() |
886e2cf2 | 1867 | { |
519f5484 | 1868 | setThreadName("dnsdist/main"); |
886e2cf2 RG |
1869 | int interval = 1; |
1870 | size_t counter = 0; | |
5f3ea719 | 1871 | int32_t secondsToWaitLog = 0; |
886e2cf2 RG |
1872 | |
1873 | for(;;) { | |
1874 | sleep(interval); | |
1875 | ||
069e59db | 1876 | { |
1877 | std::lock_guard<std::mutex> lock(g_luamutex); | |
5f3ea719 PL |
1878 | auto f = g_lua.readVariable<boost::optional<std::function<void()> > >("maintenance"); |
1879 | if(f) { | |
1880 | try { | |
1881 | (*f)(); | |
1882 | secondsToWaitLog = 0; | |
1883 | } | |
1884 | catch(std::exception &e) { | |
1885 | if (secondsToWaitLog <= 0) { | |
1886 | infolog("Error during execution of maintenance function: %s", e.what()); | |
1887 | secondsToWaitLog = 61; | |
1888 | } | |
1889 | secondsToWaitLog -= interval; | |
1890 | } | |
1891 | } | |
069e59db | 1892 | } |
886e2cf2 RG |
1893 | |
1894 | counter++; | |
1895 | if (counter >= g_cacheCleaningDelay) { | |
c1b81381 RG |
1896 | /* keep track, for each cache, of whether we should keep |
1897 | expired entries */ | |
1898 | std::map<std::shared_ptr<DNSDistPacketCache>, bool> caches; | |
1899 | ||
1900 | /* gather all caches actually used by at least one pool, and see | |
1901 | if something prevents us from cleaning the expired entries */ | |
a9c2e4ab | 1902 | auto localPools = g_pools.getLocal(); |
a9c2e4ab | 1903 | for (const auto& entry : *localPools) { |
c1b81381 RG |
1904 | auto& pool = entry.second; |
1905 | ||
1906 | auto packetCache = pool->packetCache; | |
1907 | if (!packetCache) { | |
1908 | continue; | |
1909 | } | |
1910 | ||
1911 | auto pair = caches.insert({packetCache, false}); | |
1912 | auto& iter = pair.first; | |
1913 | /* if we need to keep stale data for this cache (ie, not clear | |
1914 | expired entries when at least one pool using this cache | |
1915 | has all its backends down) */ | |
1916 | if (packetCache->keepStaleData() && iter->second == false) { | |
1917 | /* so far all pools had at least one backend up */ | |
1918 | if (pool->countServers(true) == 0) { | |
1919 | iter->second = true; | |
1920 | } | |
886e2cf2 RG |
1921 | } |
1922 | } | |
c1b81381 RG |
1923 | |
1924 | for (auto pair : caches) { | |
1925 | /* shall we keep expired entries ? */ | |
1926 | if (pair.second == true) { | |
1927 | continue; | |
886e2cf2 | 1928 | } |
c1b81381 RG |
1929 | auto& packetCache = pair.first; |
1930 | size_t upTo = (packetCache->getMaxEntries()* (100 - g_cacheCleaningPercentage)) / 100; | |
1931 | packetCache->purgeExpired(upTo); | |
886e2cf2 RG |
1932 | } |
1933 | counter = 0; | |
1934 | } | |
1935 | ||
1936 | // ponder pruning g_dynblocks of expired entries here | |
1937 | } | |
886e2cf2 RG |
1938 | } |
1939 | ||
9b73b71c | 1940 | static void secPollThread() |
5d4e1ef8 RG |
1941 | { |
1942 | setThreadName("dnsdist/secpoll"); | |
1943 | ||
1944 | for (;;) { | |
1945 | try { | |
1946 | doSecPoll(g_secPollSuffix); | |
1947 | } | |
1948 | catch(...) { | |
1949 | } | |
1950 | sleep(g_secPollInterval); | |
1951 | } | |
1952 | } | |
1953 | ||
9b73b71c | 1954 | static void healthChecksThread() |
3ae86514 | 1955 | { |
519f5484 | 1956 | setThreadName("dnsdist/healthC"); |
77c9bc9a | 1957 | |
dd9c8246 | 1958 | static const int interval = 1; |
64e4ebb4 | 1959 | |
3ae86514 | 1960 | for(;;) { |
1961 | sleep(interval); | |
773470ca | 1962 | |
dd9c8246 | 1963 | if(g_tcpclientthreads->getQueuedCount() > 1 && !g_tcpclientthreads->hasReachedMaxThreads()) { |
a9bf3ec4 | 1964 | g_tcpclientthreads->addTCPClientThread(); |
dd9c8246 | 1965 | } |
3ae86514 | 1966 | |
dd9c8246 | 1967 | auto mplexer = std::shared_ptr<FDMultiplexer>(FDMultiplexer::getMultiplexerSilent()); |
a9c2e4ab RG |
1968 | auto states = g_dstates.getLocal(); // this points to the actual shared_ptrs! |
1969 | for(auto& dss : *states) { | |
dd9c8246 | 1970 | if(++dss->lastCheck < dss->checkInterval) { |
7c9bf18d | 1971 | continue; |
dd9c8246 | 1972 | } |
5d7e6765 | 1973 | |
dd9c8246 | 1974 | dss->lastCheck = 0; |
7565f4e6 | 1975 | |
dd9c8246 RG |
1976 | if (dss->availability == DownstreamState::Availability::Auto) { |
1977 | if (!queueHealthCheck(mplexer, dss)) { | |
1978 | updateHealthCheckResult(dss, false); | |
2993d58d | 1979 | } |
773470ca | 1980 | } |
b076b34a | 1981 | |
773470ca | 1982 | auto delta = dss->sw.udiffAndSet()/1000000.0; |
1983 | dss->queryLoad = 1.0*(dss->queries.load() - dss->prev.queries.load())/delta; | |
1984 | dss->dropRate = 1.0*(dss->reuseds.load() - dss->prev.reuseds.load())/delta; | |
3c115e0f | 1985 | dss->prev.queries.store(dss->queries.load()); |
773470ca | 1986 | dss->prev.reuseds.store(dss->reuseds.load()); |
3c115e0f | 1987 | |
dd9c8246 | 1988 | for (IDState& ids : dss->idStates) { // timeouts |
a9489723 | 1989 | int64_t usageIndicator = ids.usageIndicator; |
311f19d5 RG |
1990 | if(IDState::isInUse(usageIndicator) && ids.age++ > g_udpTimeout) { |
1991 | /* We mark the state as unused as soon as possible | |
51642fe3 RG |
1992 | to limit the risk of racing with the |
1993 | responder thread. | |
51642fe3 | 1994 | */ |
0956c5c5 RG |
1995 | auto oldDU = ids.du; |
1996 | ||
311f19d5 | 1997 | if (!ids.tryMarkUnused(usageIndicator)) { |
71b86bd8 RG |
1998 | /* this state has been altered in the meantime, |
1999 | don't go anywhere near it */ | |
2000 | continue; | |
2001 | } | |
fbf14b03 | 2002 | ids.du = nullptr; |
9bd1a882 | 2003 | handleDOHTimeout(oldDU); |
64e4ebb4 | 2004 | ids.age = 0; |
3c115e0f | 2005 | dss->reuseds++; |
2006 | --dss->outstanding; | |
cb167afd | 2007 | ++g_stats.downstreamTimeouts; // this is an 'actively' discovered timeout |
c08a5092 | 2008 | vinfolog("Had a downstream timeout from %s (%s) for query for %s|%s from %s", |
2009 | dss->remote.toStringWithPort(), dss->name, | |
348ef1c6 | 2010 | ids.qname.toLogString(), QType(ids.qtype).getName(), ids.origRemote.toStringWithPort()); |
51642fe3 | 2011 | |
c2fbeb27 | 2012 | struct timespec ts; |
85c7ca75 | 2013 | gettime(&ts); |
c2fbeb27 | 2014 | |
2d11d1b2 | 2015 | struct dnsheader fake; |
2016 | memset(&fake, 0, sizeof(fake)); | |
2017 | fake.id = ids.origID; | |
c2fbeb27 | 2018 | |
6d31c8b6 | 2019 | g_rings.insertResponse(ts, ids.origRemote, ids.qname, ids.qtype, std::numeric_limits<unsigned int>::max(), 0, fake, dss->remote); |
232f0877 | 2020 | } |
3ae86514 | 2021 | } |
2022 | } | |
dd9c8246 RG |
2023 | |
2024 | handleQueuedHealthChecks(mplexer); | |
3ae86514 | 2025 | } |
3ae86514 | 2026 | } |
2027 | ||
c2a42f97 | 2028 | static void bindAny(int af, int sock) |
2029 | { | |
18f8e493 | 2030 | __attribute__((unused)) int one = 1; |
c2a42f97 | 2031 | |
2032 | #ifdef IP_FREEBIND | |
2033 | if (setsockopt(sock, IPPROTO_IP, IP_FREEBIND, &one, sizeof(one)) < 0) | |
a702a96c | 2034 | warnlog("Warning: IP_FREEBIND setsockopt failed: %s", stringerror()); |
c2a42f97 | 2035 | #endif |
2036 | ||
2037 | #ifdef IP_BINDANY | |
2038 | if (af == AF_INET) | |
2039 | if (setsockopt(sock, IPPROTO_IP, IP_BINDANY, &one, sizeof(one)) < 0) | |
a702a96c | 2040 | warnlog("Warning: IP_BINDANY setsockopt failed: %s", stringerror()); |
c2a42f97 | 2041 | #endif |
2042 | #ifdef IPV6_BINDANY | |
2043 | if (af == AF_INET6) | |
2044 | if (setsockopt(sock, IPPROTO_IPV6, IPV6_BINDANY, &one, sizeof(one)) < 0) | |
a702a96c | 2045 | warnlog("Warning: IPV6_BINDANY setsockopt failed: %s", stringerror()); |
c2a42f97 | 2046 | #endif |
2047 | #ifdef SO_BINDANY | |
2048 | if (setsockopt(sock, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) < 0) | |
a702a96c | 2049 | warnlog("Warning: SO_BINDANY setsockopt failed: %s", stringerror()); |
c2a42f97 | 2050 | #endif |
2051 | } | |
2052 | ||
a36ce055 RG |
2053 | static void dropGroupPrivs(gid_t gid) |
2054 | { | |
2055 | if (gid) { | |
2056 | if (setgid(gid) == 0) { | |
2057 | if (setgroups(0, NULL) < 0) { | |
a702a96c | 2058 | warnlog("Warning: Unable to drop supplementary gids: %s", stringerror()); |
a36ce055 RG |
2059 | } |
2060 | } | |
2061 | else { | |
a702a96c | 2062 | warnlog("Warning: Unable to set group ID to %d: %s", gid, stringerror()); |
a36ce055 RG |
2063 | } |
2064 | } | |
2065 | } | |
2066 | ||
2067 | static void dropUserPrivs(uid_t uid) | |
2068 | { | |
2069 | if(uid) { | |
2070 | if(setuid(uid) < 0) { | |
a702a96c | 2071 | warnlog("Warning: Unable to set user ID to %d: %s", uid, stringerror()); |
a36ce055 RG |
2072 | } |
2073 | } | |
2074 | } | |
2075 | ||
41408d3a RG |
2076 | static void checkFileDescriptorsLimits(size_t udpBindsCount, size_t tcpBindsCount) |
2077 | { | |
2078 | /* stdin, stdout, stderr */ | |
2079 | size_t requiredFDsCount = 3; | |
a9c2e4ab | 2080 | auto backends = g_dstates.getLocal(); |
cd73ceeb RG |
2081 | /* UDP sockets to backends */ |
2082 | size_t backendUDPSocketsCount = 0; | |
a9c2e4ab | 2083 | for (const auto& backend : *backends) { |
5bdbb83d | 2084 | backendUDPSocketsCount += backend->sockets.size(); |
cd73ceeb RG |
2085 | } |
2086 | requiredFDsCount += backendUDPSocketsCount; | |
2087 | /* TCP sockets to backends */ | |
a9c2e4ab | 2088 | requiredFDsCount += (backends->size() * g_maxTCPClientThreads); |
9fcd6adb | 2089 | /* listening sockets */ |
41408d3a RG |
2090 | requiredFDsCount += udpBindsCount; |
2091 | requiredFDsCount += tcpBindsCount; | |
2092 | /* max TCP connections currently served */ | |
2093 | requiredFDsCount += g_maxTCPClientThreads; | |
f2e29d04 | 2094 | /* max pipes for communicating between TCP acceptors and client threads */ |
41408d3a | 2095 | requiredFDsCount += (g_maxTCPClientThreads * 2); |
41408d3a | 2096 | /* max TCP queued connections */ |
9fcd6adb | 2097 | requiredFDsCount += g_maxTCPQueuedConnections; |
41408d3a RG |
2098 | /* DelayPipe pipe */ |
2099 | requiredFDsCount += 2; | |
2100 | /* syslog socket */ | |
2101 | requiredFDsCount++; | |
2102 | /* webserver main socket */ | |
2103 | requiredFDsCount++; | |
2104 | /* console main socket */ | |
2105 | requiredFDsCount++; | |
2106 | /* carbon export */ | |
2107 | requiredFDsCount++; | |
2108 | /* history file */ | |
2109 | requiredFDsCount++; | |
2110 | struct rlimit rl; | |
2111 | getrlimit(RLIMIT_NOFILE, &rl); | |
9fcd6adb | 2112 | if (rl.rlim_cur <= requiredFDsCount) { |
41408d3a RG |
2113 | warnlog("Warning, this configuration can use more than %d file descriptors, web server and console connections not included, and the current limit is %d.", std::to_string(requiredFDsCount), std::to_string(rl.rlim_cur)); |
2114 | #ifdef HAVE_SYSTEMD | |
2115 | warnlog("You can increase this value by using LimitNOFILE= in the systemd unit file or ulimit."); | |
2116 | #else | |
2117 | warnlog("You can increase this value by using ulimit."); | |
2118 | #endif | |
2119 | } | |
2120 | } | |
c2a42f97 | 2121 | |
6e9fd124 RG |
2122 | static void setUpLocalBind(std::unique_ptr<ClientState>& cs) |
2123 | { | |
2124 | /* skip some warnings if there is an identical UDP context */ | |
fbf14b03 | 2125 | bool warn = cs->tcp == false || cs->tlsFrontend != nullptr || cs->dohFrontend != nullptr; |
6e9fd124 RG |
2126 | int& fd = cs->tcp == false ? cs->udpFD : cs->tcpFD; |
2127 | (void) warn; | |
2128 | ||
2129 | fd = SSocket(cs->local.sin4.sin_family, cs->tcp == false ? SOCK_DGRAM : SOCK_STREAM, 0); | |
2130 | ||
2131 | if (cs->tcp) { | |
2132 | SSetsockopt(fd, SOL_SOCKET, SO_REUSEADDR, 1); | |
2133 | #ifdef TCP_DEFER_ACCEPT | |
2134 | SSetsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, 1); | |
2135 | #endif | |
2136 | if (cs->fastOpenQueueSize > 0) { | |
2137 | #ifdef TCP_FASTOPEN | |
2138 | SSetsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, cs->fastOpenQueueSize); | |
2139 | #else | |
2140 | if (warn) { | |
2141 | warnlog("TCP Fast Open has been configured on local address '%s' but is not supported", cs->local.toStringWithPort()); | |
2142 | } | |
2143 | #endif | |
2144 | } | |
2145 | } | |
2146 | ||
2147 | if(cs->local.sin4.sin_family == AF_INET6) { | |
2148 | SSetsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, 1); | |
2149 | } | |
2150 | ||
2151 | bindAny(cs->local.sin4.sin_family, fd); | |
2152 | ||
2153 | if(!cs->tcp && IsAnyAddress(cs->local)) { | |
2154 | int one=1; | |
2155 | setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one)); // linux supports this, so why not - might fail on other systems | |
2156 | #ifdef IPV6_RECVPKTINFO | |
2157 | setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)); | |
2158 | #endif | |
2159 | } | |
2160 | ||
2161 | if (cs->reuseport) { | |
2162 | #ifdef SO_REUSEPORT | |
2163 | SSetsockopt(fd, SOL_SOCKET, SO_REUSEPORT, 1); | |
2164 | #else | |
2165 | if (warn) { | |
2166 | /* no need to warn again if configured but support is not available, we already did for UDP */ | |
2167 | warnlog("SO_REUSEPORT has been configured on local address '%s' but is not supported", cs->local.toStringWithPort()); | |
2168 | } | |
2169 | #endif | |
2170 | } | |
2171 | ||
90f9fbc0 RG |
2172 | if (!cs->tcp) { |
2173 | if (cs->local.isIPv4()) { | |
2174 | try { | |
2175 | setSocketIgnorePMTU(cs->udpFD); | |
2176 | } | |
2177 | catch(const std::exception& e) { | |
2178 | warnlog("Failed to set IP_MTU_DISCOVER on UDP server socket for local address '%s': %s", cs->local.toStringWithPort(), e.what()); | |
2179 | } | |
2180 | } | |
2181 | } | |
2182 | ||
6e9fd124 RG |
2183 | const std::string& itf = cs->interface; |
2184 | if (!itf.empty()) { | |
2185 | #ifdef SO_BINDTODEVICE | |
2186 | int res = setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, itf.c_str(), itf.length()); | |
2187 | if (res != 0) { | |
a702a96c | 2188 | warnlog("Error setting up the interface on local address '%s': %s", cs->local.toStringWithPort(), stringerror()); |
6e9fd124 RG |
2189 | } |
2190 | #else | |
2191 | if (warn) { | |
2192 | warnlog("An interface has been configured on local address '%s' but SO_BINDTODEVICE is not supported", cs->local.toStringWithPort()); | |
2193 | } | |
2194 | #endif | |
2195 | } | |
2196 | ||
2197 | #ifdef HAVE_EBPF | |
2198 | if (g_defaultBPFFilter) { | |
2199 | cs->attachFilter(g_defaultBPFFilter); | |
2200 | vinfolog("Attaching default BPF Filter to %s frontend %s", (!cs->tcp ? "UDP" : "TCP"), cs->local.toStringWithPort()); | |
2201 | } | |
2202 | #endif /* HAVE_EBPF */ | |
2203 | ||
2204 | if (cs->tlsFrontend != nullptr) { | |
2205 | if (!cs->tlsFrontend->setupTLS()) { | |
2206 | errlog("Error while setting up TLS on local address '%s', exiting", cs->local.toStringWithPort()); | |
2207 | _exit(EXIT_FAILURE); | |
2208 | } | |
2209 | } | |
2210 | ||
fbf14b03 RG |
2211 | if (cs->dohFrontend != nullptr) { |
2212 | cs->dohFrontend->setup(); | |
2213 | } | |
2214 | ||
6e9fd124 RG |
2215 | SBind(fd, cs->local); |
2216 | ||
2217 | if (cs->tcp) { | |
fbf14b03 | 2218 | SListen(cs->tcpFD, SOMAXCONN); |
6e9fd124 RG |
2219 | if (cs->tlsFrontend != nullptr) { |
2220 | warnlog("Listening on %s for TLS", cs->local.toStringWithPort()); | |
2221 | } | |
fbf14b03 RG |
2222 | else if (cs->dohFrontend != nullptr) { |
2223 | warnlog("Listening on %s for DoH", cs->local.toStringWithPort()); | |
2224 | } | |
6e9fd124 RG |
2225 | else if (cs->dnscryptCtx != nullptr) { |
2226 | warnlog("Listening on %s for DNSCrypt", cs->local.toStringWithPort()); | |
2227 | } | |
2228 | else { | |
2229 | warnlog("Listening on %s", cs->local.toStringWithPort()); | |
2230 | } | |
2231 | } | |
2232 | ||
2233 | cs->ready = true; | |
2234 | } | |
2235 | ||
7cc68f53 | 2236 | struct |
2237 | { | |
2238 | vector<string> locals; | |
2239 | vector<string> remotes; | |
5efcfa63 | 2240 | bool checkConfig{false}; |
7cc68f53 | 2241 | bool beClient{false}; |
505ca3d1 | 2242 | bool beSupervised{false}; |
7cc68f53 | 2243 | string command; |
2244 | string config; | |
a36ce055 RG |
2245 | string uid; |
2246 | string gid; | |
7cc68f53 | 2247 | } g_cmdLine; |
520eb5a0 | 2248 | |
e41f8165 | 2249 | std::atomic<bool> g_configurationDone{false}; |
520eb5a0 | 2250 | |
7d3ee2bb PL |
2251 | static void usage() |
2252 | { | |
2253 | cout<<endl; | |
b82a127e RG |
2254 | cout<<"Syntax: dnsdist [-C,--config file] [-c,--client [IP[:PORT]]]\n"; |
2255 | cout<<"[-e,--execute cmd] [-h,--help] [-l,--local addr]\n"; | |
4406b79b | 2256 | cout<<"[-v,--verbose] [--check-config] [--version]\n"; |
7d3ee2bb PL |
2257 | cout<<"\n"; |
2258 | cout<<"-a,--acl netmask Add this netmask to the ACL\n"; | |
2259 | cout<<"-C,--config file Load configuration from 'file'\n"; | |
2260 | cout<<"-c,--client Operate as a client, connect to dnsdist. This reads\n"; | |
2261 | cout<<" controlSocket from your configuration file, but also\n"; | |
2262 | cout<<" accepts an IP:PORT argument\n"; | |
2263 | #ifdef HAVE_LIBSODIUM | |
2264 | cout<<"-k,--setkey KEY Use KEY for encrypted communication to dnsdist. This\n"; | |
2265 | cout<<" is similar to setting setKey in the configuration file.\n"; | |
17a0ddad CH |
2266 | cout<<" NOTE: this will leak this key in your shell's history\n"; |
2267 | cout<<" and in the systems running process list.\n"; | |
7d3ee2bb PL |
2268 | #endif |
2269 | cout<<"--check-config Validate the configuration file and exit. The exit-code\n"; | |
2270 | cout<<" reflects the validation, 0 is OK, 1 means an error.\n"; | |
2271 | cout<<" Any errors are printed as well.\n"; | |
7d3ee2bb PL |
2272 | cout<<"-e,--execute cmd Connect to dnsdist and execute 'cmd'\n"; |
2273 | cout<<"-g,--gid gid Change the process group ID after binding sockets\n"; | |
2274 | cout<<"-h,--help Display this helpful message\n"; | |
2275 | cout<<"-l,--local address Listen on this local address\n"; | |
2276 | cout<<"--supervised Don't open a console, I'm supervised\n"; | |
2277 | cout<<" (use with e.g. systemd and daemontools)\n"; | |
2278 | cout<<"--disable-syslog Don't log to syslog, only to stdout\n"; | |
2279 | cout<<" (use with e.g. systemd)\n"; | |
7d3ee2bb PL |
2280 | cout<<"-u,--uid uid Change the process user ID after binding sockets\n"; |
2281 | cout<<"-v,--verbose Enable verbose mode\n"; | |
4406b79b | 2282 | cout<<"-V,--version Show dnsdist version information and exit\n"; |
7d3ee2bb PL |
2283 | } |
2284 | ||
24d5cb00 | 2285 | int main(int argc, char** argv) |
2286 | try | |
2287 | { | |
41408d3a RG |
2288 | size_t udpBindsCount = 0; |
2289 | size_t tcpBindsCount = 0; | |
94721140 | 2290 | rl_attempted_completion_function = my_completion; |
2291 | rl_completion_append_character = 0; | |
2292 | ||
726ddf60 | 2293 | signal(SIGPIPE, SIG_IGN); |
6d01c80c | 2294 | signal(SIGCHLD, SIG_IGN); |
0ca6a67f | 2295 | openlog("dnsdist", LOG_PID|LOG_NDELAY, LOG_DAEMON); |
6d01c80c | 2296 | |
0b62ec78 | 2297 | #ifdef HAVE_LIBSODIUM |
6d01c80c | 2298 | if (sodium_init() == -1) { |
2299 | cerr<<"Unable to initialize crypto library"<<endl; | |
2300 | exit(EXIT_FAILURE); | |
2301 | } | |
7691e7df | 2302 | g_hashperturb=randombytes_uniform(0xffffffff); |
2303 | srandom(randombytes_uniform(0xffffffff)); | |
2304 | #else | |
2305 | { | |
2306 | struct timeval tv; | |
2307 | gettimeofday(&tv, 0); | |
2308 | srandom(tv.tv_sec ^ tv.tv_usec ^ getpid()); | |
2309 | g_hashperturb=random(); | |
2310 | } | |
2311 | ||
0b62ec78 | 2312 | #endif |
094b6aff | 2313 | ComboAddress clientAddress = ComboAddress(); |
11058bd6 | 2314 | g_cmdLine.config=SYSCONFDIR "/dnsdist.conf"; |
359bdba5 | 2315 | struct option longopts[]={ |
8f2d5ec3 | 2316 | {"acl", required_argument, 0, 'a'}, |
359bdba5 CH |
2317 | {"check-config", no_argument, 0, 1}, |
2318 | {"client", no_argument, 0, 'c'}, | |
7cc68f53 | 2319 | {"config", required_argument, 0, 'C'}, |
359bdba5 | 2320 | {"disable-syslog", no_argument, 0, 2}, |
7cc68f53 | 2321 | {"execute", required_argument, 0, 'e'}, |
359bdba5 CH |
2322 | {"gid", required_argument, 0, 'g'}, |
2323 | {"help", no_argument, 0, 'h'}, | |
2324 | {"local", required_argument, 0, 'l'}, | |
359bdba5 | 2325 | {"setkey", required_argument, 0, 'k'}, |
359bdba5 CH |
2326 | {"supervised", no_argument, 0, 3}, |
2327 | {"uid", required_argument, 0, 'u'}, | |
2328 | {"verbose", no_argument, 0, 'v'}, | |
2329 | {"version", no_argument, 0, 'V'}, | |
2330 | {0,0,0,0} | |
7cc68f53 | 2331 | }; |
2332 | int longindex=0; | |
8f2d5ec3 | 2333 | string optstring; |
7cc68f53 | 2334 | for(;;) { |
359bdba5 | 2335 | int c=getopt_long(argc, argv, "a:cC:e:g:hk:l:u:vV", longopts, &longindex); |
7cc68f53 | 2336 | if(c==-1) |
2337 | break; | |
2338 | switch(c) { | |
5efcfa63 PL |
2339 | case 1: |
2340 | g_cmdLine.checkConfig=true; | |
2341 | break; | |
bbfaaa6f PL |
2342 | case 2: |
2343 | g_syslog=false; | |
2344 | break; | |
b7165327 CH |
2345 | case 3: |
2346 | g_cmdLine.beSupervised=true; | |
2347 | break; | |
7cc68f53 | 2348 | case 'C': |
2349 | g_cmdLine.config=optarg; | |
2350 | break; | |
2351 | case 'c': | |
2352 | g_cmdLine.beClient=true; | |
2353 | break; | |
7cc68f53 | 2354 | case 'e': |
2355 | g_cmdLine.command=optarg; | |
2356 | break; | |
a36ce055 RG |
2357 | case 'g': |
2358 | g_cmdLine.gid=optarg; | |
2359 | break; | |
7cc68f53 | 2360 | case 'h': |
6306c282 | 2361 | cout<<"dnsdist "<<VERSION<<endl; |
7d3ee2bb | 2362 | usage(); |
7cc68f53 | 2363 | cout<<"\n"; |
2364 | exit(EXIT_SUCCESS); | |
2365 | break; | |
8f2d5ec3 | 2366 | case 'a': |
2367 | optstring=optarg; | |
2368 | g_ACL.modify([optstring](NetmaskGroup& nmg) { nmg.addMask(optstring); }); | |
2369 | break; | |
ddb14ec9 | 2370 | case 'k': |
b4b5edbd | 2371 | #ifdef HAVE_LIBSODIUM |
b5521206 | 2372 | if (B64Decode(string(optarg), g_consoleKey) < 0) { |
ddb14ec9 PL |
2373 | cerr<<"Unable to decode key '"<<optarg<<"'."<<endl; |
2374 | exit(EXIT_FAILURE); | |
2375 | } | |
b4b5edbd CH |
2376 | #else |
2377 | cerr<<"dnsdist has been built without libsodium, -k/--setkey is unsupported."<<endl; | |
2378 | exit(EXIT_FAILURE); | |
ddb14ec9 | 2379 | #endif |
b4b5edbd | 2380 | break; |
7cc68f53 | 2381 | case 'l': |
6a363878 | 2382 | g_cmdLine.locals.push_back(trim_copy(string(optarg))); |
7cc68f53 | 2383 | break; |
a36ce055 RG |
2384 | case 'u': |
2385 | g_cmdLine.uid=optarg; | |
2386 | break; | |
7cc68f53 | 2387 | case 'v': |
2388 | g_verbose=true; | |
2389 | break; | |
6306c282 | 2390 | case 'V': |
d4d796e5 PD |
2391 | #ifdef LUAJIT_VERSION |
2392 | cout<<"dnsdist "<<VERSION<<" ("<<LUA_RELEASE<<" ["<<LUAJIT_VERSION<<"])"<<endl; | |
2393 | #else | |
2394 | cout<<"dnsdist "<<VERSION<<" ("<<LUA_RELEASE<<")"<<endl; | |
2395 | #endif | |
70829d97 | 2396 | cout<<"Enabled features: "; |
90fe8ae6 RG |
2397 | #ifdef HAVE_CDB |
2398 | cout<<"cdb "; | |
2399 | #endif | |
a227f47d RG |
2400 | #ifdef HAVE_DNS_OVER_TLS |
2401 | cout<<"dns-over-tls("; | |
2402 | #ifdef HAVE_GNUTLS | |
3909bf10 CH |
2403 | cout<<"gnutls"; |
2404 | #ifdef HAVE_LIBSSL | |
2405 | cout<<" "; | |
2406 | #endif | |
a227f47d RG |
2407 | #endif |
2408 | #ifdef HAVE_LIBSSL | |
2409 | cout<<"openssl"; | |
2410 | #endif | |
2411 | cout<<") "; | |
2412 | #endif | |
fbf14b03 RG |
2413 | #ifdef HAVE_DNS_OVER_HTTPS |
2414 | cout<<"dns-over-https(DOH) "; | |
2415 | #endif | |
70829d97 PL |
2416 | #ifdef HAVE_DNSCRYPT |
2417 | cout<<"dnscrypt "; | |
2418 | #endif | |
0beaa5c8 RG |
2419 | #ifdef HAVE_EBPF |
2420 | cout<<"ebpf "; | |
2421 | #endif | |
82a91ddf CH |
2422 | #ifdef HAVE_FSTRM |
2423 | cout<<"fstrm "; | |
2424 | #endif | |
f4b1f1fd RG |
2425 | #ifdef HAVE_LIBCRYPTO |
2426 | cout<<"ipcipher "; | |
2427 | #endif | |
3909bf10 CH |
2428 | #ifdef HAVE_LIBSODIUM |
2429 | cout<<"libsodium "; | |
2430 | #endif | |
f441962a RG |
2431 | #ifdef HAVE_LMDB |
2432 | cout<<"lmdb "; | |
2433 | #endif | |
70829d97 PL |
2434 | #ifdef HAVE_PROTOBUF |
2435 | cout<<"protobuf "; | |
2436 | #endif | |
2437 | #ifdef HAVE_RE2 | |
2438 | cout<<"re2 "; | |
2439 | #endif | |
0beaa5c8 RG |
2440 | #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) |
2441 | cout<<"recvmmsg/sendmmsg "; | |
2442 | #endif | |
2443 | #ifdef HAVE_NET_SNMP | |
2444 | cout<<"snmp "; | |
2445 | #endif | |
70829d97 PL |
2446 | #ifdef HAVE_SYSTEMD |
2447 | cout<<"systemd"; | |
2448 | #endif | |
2449 | cout<<endl; | |
6306c282 PL |
2450 | exit(EXIT_SUCCESS); |
2451 | break; | |
7d3ee2bb PL |
2452 | case '?': |
2453 | //getopt_long printed an error message. | |
2454 | usage(); | |
2455 | exit(EXIT_FAILURE); | |
2456 | break; | |
7cc68f53 | 2457 | } |
24d5cb00 | 2458 | } |
6ab65223 | 2459 | |
7cc68f53 | 2460 | argc-=optind; |
2461 | argv+=optind; | |
2462 | for(auto p = argv; *p; ++p) { | |
094b6aff PL |
2463 | if(g_cmdLine.beClient) { |
2464 | clientAddress = ComboAddress(*p, 5199); | |
2465 | } else { | |
2466 | g_cmdLine.remotes.push_back(*p); | |
2467 | } | |
7cc68f53 | 2468 | } |
2469 | ||
a1b1a29d | 2470 | ServerPolicy leastOutstandingPol{"leastOutstanding", leastOutstanding, false}; |
cd29dcb1 | 2471 | |
e5a14b2b | 2472 | g_policy.setState(leastOutstandingPol); |
7cc68f53 | 2473 | if(g_cmdLine.beClient || !g_cmdLine.command.empty()) { |
203b5348 | 2474 | setupLua(true, false, g_cmdLine.config); |
094b6aff PL |
2475 | if (clientAddress != ComboAddress()) |
2476 | g_serverControl = clientAddress; | |
7cc68f53 | 2477 | doClient(g_serverControl, g_cmdLine.command); |
e16fd59c | 2478 | _exit(EXIT_SUCCESS); |
6d01c80c | 2479 | } |
2e72cc0e | 2480 | |
8f133915 | 2481 | auto acl = g_ACL.getCopy(); |
8f2d5ec3 | 2482 | if(acl.empty()) { |
2483 | for(auto& addr : {"127.0.0.0/8", "10.0.0.0/8", "100.64.0.0/10", "169.254.0.0/16", "192.168.0.0/16", "172.16.0.0/12", "::1/128", "fc00::/7", "fe80::/10"}) | |
2484 | acl.addMask(addr); | |
2485 | g_ACL.setState(acl); | |
2486 | } | |
8f133915 | 2487 | |
b5521206 | 2488 | auto consoleACL = g_consoleACL.getCopy(); |
5ceea33e RG |
2489 | for (const auto& mask : { "127.0.0.1/8", "::1/128" }) { |
2490 | consoleACL.addMask(mask); | |
2491 | } | |
b5521206 RG |
2492 | g_consoleACL.setState(consoleACL); |
2493 | ||
5efcfa63 | 2494 | if (g_cmdLine.checkConfig) { |
203b5348 | 2495 | setupLua(false, true, g_cmdLine.config); |
5efcfa63 PL |
2496 | // No exception was thrown |
2497 | infolog("Configuration '%s' OK!", g_cmdLine.config); | |
d8c19b98 | 2498 | _exit(EXIT_SUCCESS); |
5efcfa63 PL |
2499 | } |
2500 | ||
203b5348 | 2501 | auto todo=setupLua(false, false, g_cmdLine.config); |
2e72cc0e | 2502 | |
636cc544 CHB |
2503 | auto localPools = g_pools.getCopy(); |
2504 | { | |
2505 | bool precompute = false; | |
2506 | if (g_policy.getLocal()->name == "chashed") { | |
2507 | precompute = true; | |
2508 | } else { | |
2509 | for (const auto& entry: localPools) { | |
2510 | if (entry.second->policy != nullptr && entry.second->policy->name == "chashed") { | |
2511 | precompute = true; | |
2512 | break ; | |
2513 | } | |
2514 | } | |
2515 | } | |
2516 | if (precompute) { | |
2517 | vinfolog("Pre-computing hashes for consistent hash load-balancing policy"); | |
2518 | // pre compute hashes | |
2519 | auto backends = g_dstates.getLocal(); | |
2520 | for (auto& backend: *backends) { | |
2521 | backend->hash(); | |
2522 | } | |
d58e616a CHB |
2523 | } |
2524 | } | |
2525 | ||
6e9fd124 RG |
2526 | if (!g_cmdLine.locals.empty()) { |
2527 | for (auto it = g_frontends.begin(); it != g_frontends.end(); ) { | |
fbf14b03 RG |
2528 | /* DoH, DoT and DNSCrypt frontends are separate */ |
2529 | if ((*it)->dohFrontend == nullptr && (*it)->tlsFrontend == nullptr && (*it)->dnscryptCtx == nullptr) { | |
6e9fd124 | 2530 | it = g_frontends.erase(it); |
9f67b883 | 2531 | } |
6e9fd124 RG |
2532 | else { |
2533 | ++it; | |
9f67b883 | 2534 | } |
9f67b883 RG |
2535 | } |
2536 | ||
6e9fd124 RG |
2537 | for(const auto& loc : g_cmdLine.locals) { |
2538 | /* UDP */ | |
2539 | g_frontends.push_back(std::unique_ptr<ClientState>(new ClientState(ComboAddress(loc, 53), false, false, 0, "", {}))); | |
2540 | /* TCP */ | |
2541 | g_frontends.push_back(std::unique_ptr<ClientState>(new ClientState(ComboAddress(loc, 53), true, false, 0, "", {}))); | |
87b515ed | 2542 | } |
a36ce055 RG |
2543 | } |
2544 | ||
6e9fd124 RG |
2545 | if (g_frontends.empty()) { |
2546 | /* UDP */ | |
2547 | g_frontends.push_back(std::unique_ptr<ClientState>(new ClientState(ComboAddress("127.0.0.1", 53), false, false, 0, "", {}))); | |
2548 | /* TCP */ | |
2549 | g_frontends.push_back(std::unique_ptr<ClientState>(new ClientState(ComboAddress("127.0.0.1", 53), true, false, 0, "", {}))); | |
11e1e08b | 2550 | } |
11e1e08b | 2551 | |
6e9fd124 | 2552 | g_configurationDone = true; |
a227f47d | 2553 | |
6e9fd124 RG |
2554 | for(auto& frontend : g_frontends) { |
2555 | setUpLocalBind(frontend); | |
a227f47d | 2556 | |
6e9fd124 RG |
2557 | if (frontend->tcp == false) { |
2558 | ++udpBindsCount; | |
a227f47d RG |
2559 | } |
2560 | else { | |
6e9fd124 | 2561 | ++tcpBindsCount; |
a227f47d RG |
2562 | } |
2563 | } | |
2564 | ||
b82a127e | 2565 | warnlog("dnsdist %s comes with ABSOLUTELY NO WARRANTY. This is free software, and you are welcome to redistribute it according to the terms of the GPL version 2", VERSION); |
9c9b4998 | 2566 | |
b82a127e RG |
2567 | vector<string> vec; |
2568 | std::string acls; | |
2569 | g_ACL.getLocal()->toStringVector(&vec); | |
2570 | for(const auto& s : vec) { | |
2571 | if (!acls.empty()) | |
2572 | acls += ", "; | |
2573 | acls += s; | |
b076b34a | 2574 | } |
b82a127e | 2575 | infolog("ACL allowing queries from: %s", acls.c_str()); |
b5521206 RG |
2576 | vec.clear(); |
2577 | acls.clear(); | |
2578 | g_consoleACL.getLocal()->toStringVector(&vec); | |
2579 | for (const auto& entry : vec) { | |
2580 | if (!acls.empty()) { | |
2581 | acls += ", "; | |
2582 | } | |
2583 | acls += entry; | |
2584 | } | |
2585 | infolog("Console ACL allowing connections from: %s", acls.c_str()); | |
6d01c80c | 2586 | |
9c9b4998 RG |
2587 | #ifdef HAVE_LIBSODIUM |
2588 | if (g_consoleEnabled && g_consoleKey.empty()) { | |
2589 | warnlog("Warning, the console has been enabled via 'controlSocket()' but no key has been set with 'setKey()' so all connections will fail until a key has been set"); | |
2590 | } | |
2591 | #endif | |
2592 | ||
a65aec1f PL |
2593 | uid_t newgid=getegid(); |
2594 | gid_t newuid=geteuid(); | |
aac59883 RG |
2595 | |
2596 | if(!g_cmdLine.gid.empty()) | |
2597 | newgid = strToGID(g_cmdLine.gid.c_str()); | |
2598 | ||
2599 | if(!g_cmdLine.uid.empty()) | |
2600 | newuid = strToUID(g_cmdLine.uid.c_str()); | |
2601 | ||
d2138f20 PL |
2602 | if (getegid() != newgid) { |
2603 | if (running_in_service_mgr()) { | |
2604 | errlog("--gid/-g set on command-line, but dnsdist was started as a systemd service. Use the 'Group' setting in the systemd unit file to set the group to run as"); | |
2605 | _exit(EXIT_FAILURE); | |
2606 | } | |
a65aec1f | 2607 | dropGroupPrivs(newgid); |
d2138f20 PL |
2608 | } |
2609 | ||
2610 | if (geteuid() != newuid) { | |
2611 | if (running_in_service_mgr()) { | |
2612 | errlog("--uid/-u set on command-line, but dnsdist was started as a systemd service. Use the 'User' setting in the systemd unit file to set the user to run as"); | |
2613 | _exit(EXIT_FAILURE); | |
2614 | } | |
a65aec1f | 2615 | dropUserPrivs(newuid); |
d2138f20 | 2616 | } |
a65aec1f | 2617 | |
fdc3ea42 RG |
2618 | try { |
2619 | /* we might still have capabilities remaining, | |
2620 | for example if we have been started as root | |
2621 | without --uid or --gid (please don't do that) | |
2622 | or as an unprivileged user with ambient | |
2623 | capabilities like CAP_NET_BIND_SERVICE. | |
2624 | */ | |
83fe2c55 | 2625 | dropCapabilities(g_capabilitiesToRetain); |
fdc3ea42 RG |
2626 | } |
2627 | catch(const std::exception& e) { | |
2628 | warnlog("%s", e.what()); | |
2629 | } | |
aac59883 | 2630 | |
a36ce055 RG |
2631 | /* this need to be done _after_ dropping privileges */ |
2632 | g_delay = new DelayPipe<DelayedPacket>(); | |
2633 | ||
9f4eb5cc RG |
2634 | if (g_snmpAgent) { |
2635 | g_snmpAgent->run(); | |
2636 | } | |
2637 | ||
1f7646c2 | 2638 | g_tcpclientthreads = std::unique_ptr<TCPClientCollection>(new TCPClientCollection(g_maxTCPClientThreads, g_useTCPSinglePipe)); |
a9bf3ec4 | 2639 | |
2e72cc0e | 2640 | for(auto& t : todo) |
2641 | t(); | |
2642 | ||
636cc544 | 2643 | localPools = g_pools.getCopy(); |
8f4f5186 RG |
2644 | /* create the default pool no matter what */ |
2645 | createPoolIfNotExists(localPools, ""); | |
7cc68f53 | 2646 | if(g_cmdLine.remotes.size()) { |
2647 | for(const auto& address : g_cmdLine.remotes) { | |
c9262563 | 2648 | auto ret=std::make_shared<DownstreamState>(ComboAddress(address, 53)); |
886e2cf2 | 2649 | addServerToPool(localPools, "", ret); |
5d7e6765 | 2650 | if (ret->connected && !ret->threadStarted.test_and_set()) { |
2717a92f | 2651 | ret->tid = thread(responderThread, ret); |
7565f4e6 | 2652 | } |
ecbe9133 | 2653 | g_dstates.modify([ret](servers_t& servers) { servers.push_back(ret); }); |
6d01c80c | 2654 | } |
2655 | } | |
886e2cf2 | 2656 | g_pools.setState(localPools); |
6d01c80c | 2657 | |
a9c2e4ab | 2658 | if(g_dstates.getLocal()->empty()) { |
e73ec7d3 | 2659 | errlog("No downstream servers defined: all packets will get dropped"); |
2660 | // you might define them later, but you need to know | |
2661 | } | |
2662 | ||
41408d3a RG |
2663 | checkFileDescriptorsLimits(udpBindsCount, tcpBindsCount); |
2664 | ||
dd9c8246 | 2665 | auto mplexer = std::shared_ptr<FDMultiplexer>(FDMultiplexer::getMultiplexerSilent()); |
e5a14b2b | 2666 | for(auto& dss : g_dstates.getCopy()) { // it is a copy, but the internal shared_ptrs are the real deal |
dd9c8246 RG |
2667 | if (dss->availability == DownstreamState::Availability::Auto) { |
2668 | if (!queueHealthCheck(mplexer, dss, true)) { | |
2669 | dss->upStatus = false; | |
2670 | warnlog("Marking downstream %s as 'down'", dss->getNameWithAddr()); | |
2671 | } | |
773470ca | 2672 | } |
2673 | } | |
dd9c8246 | 2674 | handleQueuedHealthChecks(mplexer, true); |
b076b34a | 2675 | |
6e9fd124 | 2676 | for(auto& cs : g_frontends) { |
fbf14b03 RG |
2677 | if (cs->dohFrontend != nullptr) { |
2678 | #ifdef HAVE_DNS_OVER_HTTPS | |
2679 | std::thread t1(dohThread, cs.get()); | |
2b0cb8f8 RG |
2680 | if (!cs->cpus.empty()) { |
2681 | mapThreadToCPUList(t1.native_handle(), cs->cpus); | |
2682 | } | |
fbf14b03 RG |
2683 | t1.detach(); |
2684 | #endif /* HAVE_DNS_OVER_HTTPS */ | |
2685 | continue; | |
2686 | } | |
a36ce055 | 2687 | if (cs->udpFD >= 0) { |
6e9fd124 | 2688 | thread t1(udpClientThread, cs.get()); |
f0e4dcba RG |
2689 | if (!cs->cpus.empty()) { |
2690 | mapThreadToCPUList(t1.native_handle(), cs->cpus); | |
2691 | } | |
a36ce055 | 2692 | t1.detach(); |
652a7355 | 2693 | } |
a36ce055 | 2694 | else if (cs->tcpFD >= 0) { |
6e9fd124 | 2695 | thread t1(tcpAcceptorThread, cs.get()); |
f0e4dcba RG |
2696 | if (!cs->cpus.empty()) { |
2697 | mapThreadToCPUList(t1.native_handle(), cs->cpus); | |
2698 | } | |
a36ce055 | 2699 | t1.detach(); |
726ddf60 | 2700 | } |
24d5cb00 | 2701 | } |
7730131a | 2702 | |
42fae326 | 2703 | thread carbonthread(carbonDumpThread); |
2704 | carbonthread.detach(); | |
2705 | ||
3c115e0f | 2706 | thread stattid(maintThread); |
886e2cf2 | 2707 | stattid.detach(); |
6d01c80c | 2708 | |
886e2cf2 RG |
2709 | thread healththread(healthChecksThread); |
2710 | ||
5d4e1ef8 RG |
2711 | if (!g_secPollSuffix.empty()) { |
2712 | thread secpollthread(secPollThread); | |
2713 | secpollthread.detach(); | |
2714 | } | |
2715 | ||
b82a127e | 2716 | if(g_cmdLine.beSupervised) { |
6ab65223 PL |
2717 | #ifdef HAVE_SYSTEMD |
2718 | sd_notify(0, "READY=1"); | |
2719 | #endif | |
886e2cf2 | 2720 | healththread.join(); |
773470ca | 2721 | } |
6d01c80c | 2722 | else { |
886e2cf2 | 2723 | healththread.detach(); |
505ca3d1 | 2724 | doConsole(); |
3c115e0f | 2725 | } |
9cf811d1 | 2726 | _exit(EXIT_SUCCESS); |
3c115e0f | 2727 | |
6d01c80c | 2728 | } |
3f5c3f1d PD |
2729 | catch(const LuaContext::ExecutionErrorException& e) { |
2730 | try { | |
2731 | errlog("Fatal Lua error: %s", e.what()); | |
2732 | std::rethrow_if_nested(e); | |
2010ac95 RG |
2733 | } catch(const std::exception& ne) { |
2734 | errlog("Details: %s", ne.what()); | |
3f5c3f1d PD |
2735 | } |
2736 | catch(PDNSException &ae) | |
2737 | { | |
2738 | errlog("Fatal pdns error: %s", ae.reason); | |
2739 | } | |
2740 | _exit(EXIT_FAILURE); | |
2741 | } | |
24d5cb00 | 2742 | catch(std::exception &e) |
2743 | { | |
6d01c80c | 2744 | errlog("Fatal error: %s", e.what()); |
4a966472 | 2745 | _exit(EXIT_FAILURE); |
24d5cb00 | 2746 | } |
3f81d239 | 2747 | catch(PDNSException &ae) |
7730131a | 2748 | { |
6d01c80c | 2749 | errlog("Fatal pdns error: %s", ae.reason); |
4a966472 | 2750 | _exit(EXIT_FAILURE); |
7730131a | 2751 | } |
eb0335ff MC |
2752 | |
2753 | uint64_t getLatencyCount(const std::string&) | |
2754 | { | |
2755 | return g_stats.responses + g_stats.selfAnswered + g_stats.cacheHits; | |
2756 | } |