]>
Commit | Line | Data |
---|---|---|
24d5cb00 | 1 | /* |
12471842 PL |
2 | * This file is part of PowerDNS or dnsdist. |
3 | * Copyright -- PowerDNS.COM B.V. and its contributors | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify | |
6 | * it under the terms of version 2 of the GNU General Public License as | |
7 | * published by the Free Software Foundation. | |
8 | * | |
9 | * In addition, for the avoidance of any doubt, permission is granted to | |
10 | * link this program with OpenSSL and to (re)distribute the binaries | |
11 | * produced as the result of such linking. | |
12 | * | |
13 | * This program is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | * GNU General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU General Public License | |
19 | * along with this program; if not, write to the Free Software | |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |
21 | */ | |
5cc8371b RG |
22 | |
23 | #include "config.h" | |
24 | ||
25 | #include <fstream> | |
26 | #include <getopt.h> | |
27 | #include <grp.h> | |
2c5db3f7 | 28 | #include <limits> |
5cc8371b RG |
29 | #include <netinet/tcp.h> |
30 | #include <pwd.h> | |
31 | #include <sys/resource.h> | |
32 | #include <unistd.h> | |
424bdfb1 | 33 | |
4d39d7f3 | 34 | #if defined (__OpenBSD__) || defined(__NetBSD__) |
424bdfb1 PD |
35 | #include <readline/readline.h> |
36 | #else | |
37 | #include <editline/readline.h> | |
38 | #endif | |
39 | ||
6ab65223 PL |
40 | #ifdef HAVE_SYSTEMD |
41 | #include <systemd/sd-daemon.h> | |
42 | #endif | |
43 | ||
5cc8371b RG |
44 | #include "dnsdist.hh" |
45 | #include "dnsdist-cache.hh" | |
b5521206 | 46 | #include "dnsdist-console.hh" |
5cc8371b RG |
47 | #include "dnsdist-ecs.hh" |
48 | #include "dnsdist-lua.hh" | |
03b00917 | 49 | #include "dnsdist-rings.hh" |
5d4e1ef8 | 50 | #include "dnsdist-secpoll.hh" |
53c57da7 | 51 | #include "dnsdist-xpf.hh" |
5cc8371b RG |
52 | |
53 | #include "base64.hh" | |
54 | #include "delaypipe.hh" | |
55 | #include "dolog.hh" | |
56 | #include "dnsname.hh" | |
e0fd37ec | 57 | #include "dnsparser.hh" |
5cc8371b RG |
58 | #include "dnswriter.hh" |
59 | #include "ednsoptions.hh" | |
60 | #include "gettime.hh" | |
61 | #include "lock.hh" | |
62 | #include "misc.hh" | |
63 | #include "sodcrypto.hh" | |
64 | #include "sstuff.hh" | |
519f5484 | 65 | #include "threadname.hh" |
5cc8371b | 66 | |
a40df301 | 67 | /* Known sins: |
e48090d1 | 68 | |
d12cea01 | 69 | Receiver is currently single threaded |
e5a14b2b | 70 | not *that* bad actually, but now that we are thread safe, might want to scale |
a40df301 | 71 | */ |
24d5cb00 | 72 | |
0940e4eb | 73 | /* the Rulaction plan |
74 | Set of Rules, if one matches, it leads to an Action | |
75 | Both rules and actions could conceivably be Lua based. | |
76 | On the C++ side, both could be inherited from a class Rule and a class Action, | |
77 | on the Lua side we can't do that. */ | |
78 | ||
64e4ebb4 | 79 | using std::atomic; |
80 | using std::thread; | |
7730131a | 81 | bool g_verbose; |
b065f701 | 82 | |
e48090d1 | 83 | struct DNSDistStats g_stats; |
37a5c2d5 PO |
84 | MetricDefinitionStorage g_metricDefinitions; |
85 | ||
3b203c83 | 86 | uint16_t g_maxOutstanding{std::numeric_limits<uint16_t>::max()}; |
9e87dcb8 | 87 | bool g_verboseHealthChecks{false}; |
1ea747c0 | 88 | uint32_t g_staleCacheEntriesTTL{0}; |
bbfaaa6f | 89 | bool g_syslog{true}; |
0dffe9e3 | 90 | bool g_allowEmptyResponse{false}; |
cffde2fd | 91 | |
92 | GlobalStateHolder<NetmaskGroup> g_ACL; | |
c9262563 | 93 | string g_outputBuffer; |
a227f47d | 94 | |
a227f47d | 95 | std::vector<std::shared_ptr<TLSFrontend>> g_tlslocals; |
fbf14b03 | 96 | std::vector<std::shared_ptr<DOHFrontend>> g_dohlocals; |
6e9fd124 | 97 | std::vector<std::shared_ptr<DNSCryptContext>> g_dnsCryptLocals; |
87b515ed RG |
98 | #ifdef HAVE_EBPF |
99 | shared_ptr<BPFFilter> g_defaultBPFFilter; | |
8429ad04 | 100 | std::vector<std::shared_ptr<DynBPFFilter> > g_dynBPFFilters; |
87b515ed | 101 | #endif /* HAVE_EBPF */ |
6e9fd124 | 102 | std::vector<std::unique_ptr<ClientState>> g_frontends; |
886e2cf2 | 103 | GlobalStateHolder<pools_t> g_pools; |
0beaa5c8 | 104 | size_t g_udpVectorSize{1}; |
7c0860e1 | 105 | |
9f4eb5cc RG |
106 | bool g_snmpEnabled{false}; |
107 | bool g_snmpTrapsEnabled{false}; | |
108 | DNSDistSNMPAgent* g_snmpAgent{nullptr}; | |
109 | ||
726ddf60 | 110 | /* UDP: the grand design. Per socket we listen on for incoming queries there is one thread. |
7730131a | 111 | Then we have a bunch of connected sockets for talking to downstream servers. |
112 | We send directly to those sockets. | |
24d5cb00 | 113 | |
7730131a | 114 | For the return path, per downstream server we have a thread that listens to responses. |
24d5cb00 | 115 | |
7730131a | 116 | Per socket there is an array of 2^16 states, when we send out a packet downstream, we note |
117 | there the original requestor and the original id. The new ID is the offset in the array. | |
118 | ||
119 | When an answer comes in on a socket, we look up the offset by the id, and lob it to the | |
120 | original requestor. | |
121 | ||
122 | IDs are assigned by atomic increments of the socket offset. | |
123 | */ | |
124 | ||
4d5959e6 RG |
125 | GlobalStateHolder<vector<DNSDistRuleAction> > g_rulactions; |
126 | GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_resprulactions; | |
127 | GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_cachehitresprulactions; | |
2d4783a8 | 128 | GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_selfansweredresprulactions; |
4d5959e6 | 129 | |
df111b53 | 130 | Rings g_rings; |
786e4d8c | 131 | QueryCount g_qcount; |
7c0860e1 | 132 | |
ecbe9133 | 133 | GlobalStateHolder<servers_t> g_dstates; |
78ffa782 | 134 | GlobalStateHolder<NetmaskTree<DynBlock>> g_dynblockNMG; |
71c94675 | 135 | GlobalStateHolder<SuffixMatchTree<DynBlock>> g_dynblockSMT; |
dd46e5e3 | 136 | DNSAction::Action g_dynBlockAction = DNSAction::Action::Drop; |
3f6d07a4 RG |
137 | int g_tcpRecvTimeout{2}; |
138 | int g_tcpSendTimeout{2}; | |
e0b5e49d | 139 | int g_udpTimeout{2}; |
3f6d07a4 | 140 | |
26a3cdb7 | 141 | bool g_servFailOnNoPolicy{false}; |
e72fbfc4 | 142 | bool g_truncateTC{false}; |
53c57da7 RG |
143 | bool g_fixupCase{false}; |
144 | bool g_preserveTrailingData{false}; | |
32b86928 | 145 | bool g_roundrobinFailOnNoServer{false}; |
0beaa5c8 | 146 | |
e0fd37ec | 147 | static void truncateTC(char* packet, uint16_t* len, size_t responseSize, unsigned int consumed) |
6ad8b29a | 148 | try |
149 | { | |
e0fd37ec RG |
150 | bool hadEDNS = false; |
151 | uint16_t payloadSize = 0; | |
152 | uint16_t z = 0; | |
153 | ||
154 | if (g_addEDNSToSelfGeneratedResponses) { | |
155 | hadEDNS = getEDNSUDPPayloadSizeAndZ(packet, *len, &payloadSize, &z); | |
156 | } | |
157 | ||
2aa2c0aa | 158 | *len=static_cast<uint16_t>(sizeof(dnsheader)+consumed+DNS_TYPE_SIZE+DNS_CLASS_SIZE); |
e7c732b8 RG |
159 | struct dnsheader* dh = reinterpret_cast<struct dnsheader*>(packet); |
160 | dh->ancount = dh->arcount = dh->nscount = 0; | |
e0fd37ec RG |
161 | |
162 | if (hadEDNS) { | |
5e98ccfa | 163 | addEDNS(dh, *len, responseSize, z & EDNS_HEADER_FLAG_DO, payloadSize, 0); |
e0fd37ec | 164 | } |
6ad8b29a | 165 | } |
166 | catch(...) | |
167 | { | |
168 | g_stats.truncFail++; | |
169 | } | |
170 | ||
7b3865cd | 171 | struct DelayedPacket |
172 | { | |
173 | int fd; | |
174 | string packet; | |
175 | ComboAddress destination; | |
68f3eb4d | 176 | ComboAddress origDest; |
7b3865cd | 177 | void operator()() |
178 | { | |
20b019fa RG |
179 | ssize_t res; |
180 | if(origDest.sin4.sin_family == 0) { | |
181 | res = sendto(fd, packet.c_str(), packet.size(), 0, (struct sockaddr*)&destination, destination.getSocklen()); | |
182 | } | |
183 | else { | |
184 | res = sendfromto(fd, packet.c_str(), packet.size(), 0, origDest, destination); | |
185 | } | |
186 | if (res == -1) { | |
187 | int err = errno; | |
188 | vinfolog("Error sending delayed response to %s: %s", destination.toStringWithPort(), strerror(err)); | |
189 | } | |
7b3865cd | 190 | } |
191 | }; | |
192 | ||
3e425868 | 193 | DelayPipe<DelayedPacket>* g_delay = nullptr; |
7b3865cd | 194 | |
f653b8df | 195 | void doLatencyStats(double udiff) |
daacd477 | 196 | { |
cb167afd CHB |
197 | if(udiff < 1000) ++g_stats.latency0_1; |
198 | else if(udiff < 10000) ++g_stats.latency1_10; | |
199 | else if(udiff < 50000) ++g_stats.latency10_50; | |
200 | else if(udiff < 100000) ++g_stats.latency50_100; | |
201 | else if(udiff < 1000000) ++g_stats.latency100_1000; | |
202 | else ++g_stats.latencySlow; | |
eb0335ff | 203 | g_stats.latencySum += udiff / 1000; |
be6c318f | 204 | |
daacd477 | 205 | auto doAvg = [](double& var, double n, double weight) { |
206 | var = (weight -1) * var/weight + n/weight; | |
207 | }; | |
208 | ||
209 | doAvg(g_stats.latencyAvg100, udiff, 100); | |
210 | doAvg(g_stats.latencyAvg1000, udiff, 1000); | |
211 | doAvg(g_stats.latencyAvg10000, udiff, 10000); | |
212 | doAvg(g_stats.latencyAvg1000000, udiff, 1000000); | |
213 | } | |
ca404e94 | 214 | |
e7c732b8 | 215 | bool responseContentMatches(const char* response, const uint16_t responseLen, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const ComboAddress& remote, unsigned int& consumed) |
fcffc585 | 216 | { |
fcffc585 RG |
217 | if (responseLen < sizeof(dnsheader)) { |
218 | return false; | |
219 | } | |
220 | ||
3e425868 | 221 | const struct dnsheader* dh = reinterpret_cast<const struct dnsheader*>(response); |
c8c3d4e4 | 222 | if (dh->qdcount == 0) { |
ad7ec9a2 | 223 | if ((dh->rcode != RCode::NoError && dh->rcode != RCode::NXDomain) || g_allowEmptyResponse) { |
c8c3d4e4 RG |
224 | return true; |
225 | } | |
226 | else { | |
cb167afd | 227 | ++g_stats.nonCompliantResponses; |
c8c3d4e4 RG |
228 | return false; |
229 | } | |
230 | } | |
231 | ||
3e425868 RG |
232 | uint16_t rqtype, rqclass; |
233 | DNSName rqname; | |
fcffc585 RG |
234 | try { |
235 | rqname=DNSName(response, responseLen, sizeof(dnsheader), false, &rqtype, &rqclass, &consumed); | |
236 | } | |
3e425868 RG |
237 | catch(const std::exception& e) { |
238 | if(responseLen > 0 && static_cast<size_t>(responseLen) > sizeof(dnsheader)) { | |
fcffc585 | 239 | infolog("Backend %s sent us a response with id %d that did not parse: %s", remote.toStringWithPort(), ntohs(dh->id), e.what()); |
3e425868 | 240 | } |
cb167afd | 241 | ++g_stats.nonCompliantResponses; |
fcffc585 RG |
242 | return false; |
243 | } | |
244 | ||
245 | if (rqtype != qtype || rqclass != qclass || rqname != qname) { | |
246 | return false; | |
247 | } | |
248 | ||
249 | return true; | |
250 | } | |
251 | ||
4ab01344 | 252 | static void restoreFlags(struct dnsheader* dh, uint16_t origFlags) |
fcffc585 RG |
253 | { |
254 | static const uint16_t rdMask = 1 << FLAGS_RD_OFFSET; | |
255 | static const uint16_t cdMask = 1 << FLAGS_CD_OFFSET; | |
256 | static const uint16_t restoreFlagsMask = UINT16_MAX & ~(rdMask | cdMask); | |
0f72fd5c RG |
257 | uint16_t * flags = getFlagsFromDNSHeader(dh); |
258 | /* clear the flags we are about to restore */ | |
259 | *flags &= restoreFlagsMask; | |
260 | /* only keep the flags we want to restore */ | |
261 | origFlags &= ~restoreFlagsMask; | |
262 | /* set the saved flags as they were */ | |
263 | *flags |= origFlags; | |
264 | } | |
265 | ||
4ab01344 | 266 | static bool fixUpQueryTurnedResponse(DNSQuestion& dq, const uint16_t origFlags) |
e7c732b8 RG |
267 | { |
268 | restoreFlags(dq.dh, origFlags); | |
269 | ||
270 | return addEDNSToQueryTurnedResponse(dq); | |
271 | } | |
272 | ||
3e425868 | 273 | static bool fixUpResponse(char** response, uint16_t* responseLen, size_t* responseSize, const DNSName& qname, uint16_t origFlags, bool ednsAdded, bool ecsAdded, std::vector<uint8_t>& rewrittenResponse, uint16_t addRoom, bool* zeroScope) |
0f72fd5c | 274 | { |
fcffc585 RG |
275 | if (*responseLen < sizeof(dnsheader)) { |
276 | return false; | |
277 | } | |
278 | ||
3e425868 | 279 | struct dnsheader* dh = reinterpret_cast<struct dnsheader*>(*response); |
c8c3d4e4 RG |
280 | restoreFlags(dh, origFlags); |
281 | ||
282 | if (*responseLen == sizeof(dnsheader)) { | |
283 | return true; | |
284 | } | |
285 | ||
fcffc585 RG |
286 | if(g_fixupCase) { |
287 | string realname = qname.toDNSString(); | |
288 | if (*responseLen >= (sizeof(dnsheader) + realname.length())) { | |
289 | memcpy(*response + sizeof(dnsheader), realname.c_str(), realname.length()); | |
290 | } | |
291 | } | |
292 | ||
ff73f02b | 293 | if (ednsAdded || ecsAdded) { |
ceae225c | 294 | uint16_t optStart; |
fcffc585 RG |
295 | size_t optLen = 0; |
296 | bool last = false; | |
297 | ||
11d5d247 RG |
298 | const std::string responseStr(*response, *responseLen); |
299 | int res = locateEDNSOptRR(responseStr, &optStart, &optLen, &last); | |
fcffc585 RG |
300 | |
301 | if (res == 0) { | |
49c33a6c RG |
302 | if (zeroScope) { // this finds if an EDNS Client Subnet scope was set, and if it is 0 |
303 | size_t optContentStart = 0; | |
304 | uint16_t optContentLen = 0; | |
305 | /* we need at least 4 bytes after the option length (family: 2, source prefix-length: 1, scope prefix-length: 1) */ | |
306 | if (isEDNSOptionInOpt(responseStr, optStart, optLen, EDNSOptionCode::ECS, &optContentStart, &optContentLen) && optContentLen >= 4) { | |
307 | /* see if the EDNS Client Subnet SCOPE PREFIX-LENGTH byte in position 3 is set to 0, which is the only thing | |
308 | we care about. */ | |
309 | *zeroScope = responseStr.at(optContentStart + 3) == 0; | |
310 | } | |
40669042 | 311 | } |
9837850d | 312 | |
ff73f02b RG |
313 | if (ednsAdded) { |
314 | /* we added the entire OPT RR, | |
315 | therefore we need to remove it entirely */ | |
316 | if (last) { | |
317 | /* simply remove the last AR */ | |
318 | *responseLen -= optLen; | |
319 | uint16_t arcount = ntohs(dh->arcount); | |
320 | arcount--; | |
321 | dh->arcount = htons(arcount); | |
322 | } | |
323 | else { | |
324 | /* Removing an intermediary RR could lead to compression error */ | |
11d5d247 | 325 | if (rewriteResponseWithoutEDNS(responseStr, rewrittenResponse) == 0) { |
ff73f02b RG |
326 | *responseLen = rewrittenResponse.size(); |
327 | if (addRoom && (UINT16_MAX - *responseLen) > addRoom) { | |
328 | rewrittenResponse.reserve(*responseLen + addRoom); | |
329 | } | |
330 | *responseSize = rewrittenResponse.capacity(); | |
331 | *response = reinterpret_cast<char*>(rewrittenResponse.data()); | |
332 | } | |
333 | else { | |
334 | warnlog("Error rewriting content"); | |
335 | } | |
336 | } | |
fcffc585 RG |
337 | } |
338 | else { | |
ff73f02b RG |
339 | /* the OPT RR was already present, but without ECS, |
340 | we need to remove the ECS option if any */ | |
341 | if (last) { | |
342 | /* nothing after the OPT RR, we can simply remove the | |
343 | ECS option */ | |
344 | size_t existingOptLen = optLen; | |
11d5d247 | 345 | removeEDNSOptionFromOPT(*response + optStart, &optLen, EDNSOptionCode::ECS); |
ff73f02b | 346 | *responseLen -= (existingOptLen - optLen); |
fcffc585 RG |
347 | } |
348 | else { | |
ff73f02b | 349 | /* Removing an intermediary RR could lead to compression error */ |
11d5d247 | 350 | if (rewriteResponseWithoutEDNSOption(responseStr, EDNSOptionCode::ECS, rewrittenResponse) == 0) { |
ff73f02b RG |
351 | *responseLen = rewrittenResponse.size(); |
352 | if (addRoom && (UINT16_MAX - *responseLen) > addRoom) { | |
353 | rewrittenResponse.reserve(*responseLen + addRoom); | |
354 | } | |
355 | *responseSize = rewrittenResponse.capacity(); | |
356 | *response = reinterpret_cast<char*>(rewrittenResponse.data()); | |
357 | } | |
358 | else { | |
359 | warnlog("Error rewriting content"); | |
360 | } | |
fcffc585 RG |
361 | } |
362 | } | |
363 | } | |
364 | } | |
365 | ||
366 | return true; | |
367 | } | |
368 | ||
fcffc585 | 369 | #ifdef HAVE_DNSCRYPT |
3e425868 | 370 | static bool encryptResponse(char* response, uint16_t* responseLen, size_t responseSize, bool tcp, std::shared_ptr<DNSCryptQuery> dnsCryptQuery, dnsheader** dh, dnsheader* dhCopy) |
fcffc585 | 371 | { |
0f72fd5c RG |
372 | if (dnsCryptQuery) { |
373 | uint16_t encryptedResponseLen = 0; | |
57847d65 RG |
374 | |
375 | /* save the original header before encrypting it in place */ | |
376 | if (dh != nullptr && *dh != nullptr && dhCopy != nullptr) { | |
377 | memcpy(dhCopy, *dh, sizeof(dnsheader)); | |
378 | *dh = dhCopy; | |
379 | } | |
380 | ||
43234e76 | 381 | int res = dnsCryptQuery->encryptResponse(response, *responseLen, responseSize, tcp, &encryptedResponseLen); |
fcffc585 | 382 | if (res == 0) { |
0f72fd5c | 383 | *responseLen = encryptedResponseLen; |
fcffc585 RG |
384 | } else { |
385 | /* dropping response */ | |
386 | vinfolog("Error encrypting the response, dropping."); | |
387 | return false; | |
388 | } | |
389 | } | |
0f72fd5c RG |
390 | return true; |
391 | } | |
7129b5c4 | 392 | #endif /* HAVE_DNSCRYPT */ |
fcffc585 | 393 | |
3e425868 RG |
394 | static bool applyRulesToResponse(LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr) |
395 | { | |
396 | DNSResponseAction::Action action=DNSResponseAction::Action::None; | |
397 | std::string ruleresult; | |
398 | for(const auto& lr : *localRespRulactions) { | |
399 | if(lr.d_rule->matches(&dr)) { | |
400 | lr.d_rule->d_matches++; | |
401 | action=(*lr.d_action)(&dr, &ruleresult); | |
402 | switch(action) { | |
403 | case DNSResponseAction::Action::Allow: | |
404 | return true; | |
405 | break; | |
406 | case DNSResponseAction::Action::Drop: | |
407 | return false; | |
408 | break; | |
409 | case DNSResponseAction::Action::HeaderModify: | |
410 | return true; | |
411 | break; | |
412 | case DNSResponseAction::Action::ServFail: | |
413 | dr.dh->rcode = RCode::ServFail; | |
414 | return true; | |
415 | break; | |
416 | /* non-terminal actions follow */ | |
417 | case DNSResponseAction::Action::Delay: | |
418 | dr.delayMsec = static_cast<int>(pdns_stou(ruleresult)); // sorry | |
419 | break; | |
420 | case DNSResponseAction::Action::None: | |
421 | break; | |
422 | } | |
423 | } | |
424 | } | |
425 | ||
426 | return true; | |
427 | } | |
428 | ||
429 | bool processResponse(char** response, uint16_t* responseLen, size_t* responseSize, LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr, size_t addRoom, std::vector<uint8_t>& rewrittenResponse, bool muted) | |
430 | { | |
431 | if (!applyRulesToResponse(localRespRulactions, dr)) { | |
432 | return false; | |
433 | } | |
434 | ||
435 | bool zeroScope = false; | |
436 | if (!fixUpResponse(response, responseLen, responseSize, *dr.qname, dr.origFlags, dr.ednsAdded, dr.ecsAdded, rewrittenResponse, addRoom, dr.useZeroScope ? &zeroScope : nullptr)) { | |
437 | return false; | |
438 | } | |
439 | ||
440 | if (dr.packetCache && !dr.skipCache) { | |
441 | if (!dr.useZeroScope) { | |
442 | /* if the query was not suitable for zero-scope, for | |
443 | example because it had an existing ECS entry so the hash is | |
444 | not really 'no ECS', so just insert it for the existing subnet | |
445 | since: | |
446 | - we don't have the correct hash for a non-ECS query | |
447 | - inserting with hash computed before the ECS replacement but with | |
448 | the subnet extracted _after_ the replacement would not work. | |
449 | */ | |
450 | zeroScope = false; | |
451 | } | |
452 | // if zeroScope, pass the pre-ECS hash-key and do not pass the subnet to the cache | |
453 | dr.packetCache->insert(zeroScope ? dr.cacheKeyNoECS : dr.cacheKey, zeroScope ? boost::none : dr.subnet, dr.origFlags, dr.dnssecOK, *dr.qname, dr.qtype, dr.qclass, *response, *responseLen, dr.tcp, dr.dh->rcode, dr.tempFailureTTL); | |
454 | } | |
455 | ||
456 | #ifdef HAVE_DNSCRYPT | |
457 | if (!muted) { | |
458 | if (!encryptResponse(*response, responseLen, *responseSize, dr.tcp, dr.dnsCryptQuery, nullptr, nullptr)) { | |
459 | return false; | |
460 | } | |
461 | } | |
7129b5c4 | 462 | #endif /* HAVE_DNSCRYPT */ |
3e425868 RG |
463 | |
464 | return true; | |
465 | } | |
466 | ||
467 | static bool sendUDPResponse(int origFD, const char* response, const uint16_t responseLen, const int delayMsec, const ComboAddress& origDest, const ComboAddress& origRemote) | |
0f72fd5c | 468 | { |
fcffc585 RG |
469 | if(delayMsec && g_delay) { |
470 | DelayedPacket dp{origFD, string(response,responseLen), origRemote, origDest}; | |
471 | g_delay->submit(dp, delayMsec); | |
472 | } | |
473 | else { | |
20b019fa RG |
474 | ssize_t res; |
475 | if(origDest.sin4.sin_family == 0) { | |
3e425868 | 476 | res = sendto(origFD, response, responseLen, 0, reinterpret_cast<const struct sockaddr*>(&origRemote), origRemote.getSocklen()); |
20b019fa RG |
477 | } |
478 | else { | |
479 | res = sendfromto(origFD, response, responseLen, 0, origDest, origRemote); | |
480 | } | |
481 | if (res == -1) { | |
482 | int err = errno; | |
483 | vinfolog("Error sending response to %s: %s", origRemote.toStringWithPort(), strerror(err)); | |
484 | } | |
fcffc585 RG |
485 | } |
486 | ||
487 | return true; | |
488 | } | |
489 | ||
150105a2 | 490 | |
fbf14b03 | 491 | int pickBackendSocketForSending(std::shared_ptr<DownstreamState>& state) |
150105a2 | 492 | { |
5bdbb83d | 493 | return state->sockets[state->socketsOffset++ % state->sockets.size()]; |
150105a2 RG |
494 | } |
495 | ||
5bdbb83d | 496 | static void pickBackendSocketsReadyForReceiving(const std::shared_ptr<DownstreamState>& state, std::vector<int>& ready) |
150105a2 | 497 | { |
5bdbb83d | 498 | ready.clear(); |
150105a2 | 499 | |
5bdbb83d RG |
500 | if (state->sockets.size() == 1) { |
501 | ready.push_back(state->sockets[0]); | |
502 | return ; | |
150105a2 RG |
503 | } |
504 | ||
5bdbb83d RG |
505 | { |
506 | std::lock_guard<std::mutex> lock(state->socketsLock); | |
507 | state->mplexer->getAvailableFDs(ready, -1); | |
150105a2 | 508 | } |
150105a2 RG |
509 | } |
510 | ||
4632045b | 511 | // listens on a dedicated socket, lobs answers from downstream servers to original requestors |
9b73b71c | 512 | void responderThread(std::shared_ptr<DownstreamState> dss) |
66d1c0bf | 513 | try { |
519f5484 | 514 | setThreadName("dnsdist/respond"); |
d8c19b98 | 515 | auto localRespRulactions = g_resprulactions.getLocal(); |
11e1e08b | 516 | char packet[4096 + DNSCRYPT_MAX_RESPONSE_PADDING_AND_MAC_SIZE]; |
b50ee135 | 517 | static_assert(sizeof(packet) <= UINT16_MAX, "Packet size should fit in a uint16_t"); |
3e425868 RG |
518 | /* when the answer is encrypted in place, we need to get a copy |
519 | of the original header before encryption to fill the ring buffer */ | |
520 | dnsheader cleartextDH; | |
ca404e94 | 521 | vector<uint8_t> rewrittenResponse; |
7267213a | 522 | |
66d1c0bf | 523 | uint16_t queryId = 0; |
5bdbb83d RG |
524 | std::vector<int> sockets; |
525 | sockets.reserve(dss->sockets.size()); | |
526 | ||
7730131a | 527 | for(;;) { |
57847d65 | 528 | dnsheader* dh = reinterpret_cast<struct dnsheader*>(packet); |
66d1c0bf | 529 | try { |
5bdbb83d RG |
530 | pickBackendSocketsReadyForReceiving(dss, sockets); |
531 | for (const auto& fd : sockets) { | |
532 | ssize_t got = recv(fd, packet, sizeof(packet), 0); | |
533 | char * response = packet; | |
534 | size_t responseSize = sizeof(packet); | |
ca404e94 | 535 | |
2aa2c0aa | 536 | if (got < 0 || static_cast<size_t>(got) < sizeof(dnsheader)) |
5bdbb83d | 537 | continue; |
4f380af3 | 538 | |
3e425868 | 539 | uint16_t responseLen = static_cast<uint16_t>(got); |
5bdbb83d | 540 | queryId = dh->id; |
b50ee135 | 541 | |
fbf14b03 | 542 | if(queryId >= dss->idStates.size()) { |
5bdbb83d | 543 | continue; |
fbf14b03 | 544 | } |
4f380af3 | 545 | |
5bdbb83d | 546 | IDState* ids = &dss->idStates[queryId]; |
a9489723 | 547 | int64_t usageIndicator = ids->usageIndicator; |
c9ba8478 | 548 | |
311f19d5 | 549 | if(!IDState::isInUse(usageIndicator)) { |
9bd1a882 | 550 | /* the corresponding state is marked as not in use, meaning that: |
a9489723 | 551 | - it was already cleaned up by another thread and the state is gone ; |
9bd1a882 RG |
552 | - we already got a response for this query and this one is a duplicate. |
553 | Either way, we don't touch it. | |
554 | */ | |
5bdbb83d | 555 | continue; |
9bd1a882 | 556 | } |
7267213a | 557 | |
9bd1a882 | 558 | /* read the potential DOHUnit state as soon as possible, but don't use it |
a9489723 | 559 | until we have confirmed that we own this state by updating usageIndicator */ |
0956c5c5 | 560 | auto du = ids->du; |
5bdbb83d RG |
561 | /* setting age to 0 to prevent the maintainer thread from |
562 | cleaning this IDS while we process the response. | |
5bdbb83d RG |
563 | */ |
564 | ids->age = 0; | |
a9489723 | 565 | int origFD = ids->origFD; |
fcffc585 | 566 | |
e7c732b8 RG |
567 | unsigned int consumed = 0; |
568 | if (!responseContentMatches(response, responseLen, ids->qname, ids->qtype, ids->qclass, dss->remote, consumed)) { | |
5bdbb83d RG |
569 | continue; |
570 | } | |
7267213a | 571 | |
3c72bc54 | 572 | bool isDoH = du != nullptr; |
a9489723 RG |
573 | /* atomically mark the state as available, but only if it has not been altered |
574 | in the meantime */ | |
311f19d5 | 575 | if (ids->tryMarkUnused(usageIndicator)) { |
9bd1a882 RG |
576 | /* clear the potential DOHUnit asap, it's ours now |
577 | and since we just marked the state as unused, | |
578 | someone could overwrite it. */ | |
246ff31f | 579 | ids->du = nullptr; |
71b86bd8 RG |
580 | /* we only decrement the outstanding counter if the value was not |
581 | altered in the meantime, which would mean that the state has been actively reused | |
582 | and the other thread has not incremented the outstanding counter, so we don't | |
583 | want it to be decremented twice. */ | |
584 | --dss->outstanding; // you'd think an attacker could game this, but we're using connected socket | |
0956c5c5 | 585 | } else { |
9bd1a882 | 586 | /* someone updated the state in the meantime, we can't touch the existing pointer */ |
0956c5c5 | 587 | du = nullptr; |
f2d0c808 RG |
588 | /* since the state has been updated, we can't safely access it so let's just drop |
589 | this response */ | |
590 | continue; | |
71b86bd8 | 591 | } |
2c5db3f7 | 592 | |
5bdbb83d | 593 | if(dh->tc && g_truncateTC) { |
e0fd37ec | 594 | truncateTC(response, &responseLen, responseSize, consumed); |
5bdbb83d | 595 | } |
aeb36780 | 596 | |
5bdbb83d | 597 | dh->id = ids->origID; |
ca404e94 | 598 | |
5bdbb83d | 599 | uint16_t addRoom = 0; |
d0ae6360 RG |
600 | DNSResponse dr = makeDNSResponseFromIDState(*ids, dh, sizeof(packet), responseLen, false); |
601 | if (dr.dnsCryptQuery) { | |
5bdbb83d RG |
602 | addRoom = DNSCRYPT_MAX_RESPONSE_PADDING_AND_MAC_SIZE; |
603 | } | |
ca404e94 | 604 | |
3e425868 RG |
605 | memcpy(&cleartextDH, dr.dh, sizeof(cleartextDH)); |
606 | if (!processResponse(&response, &responseLen, &responseSize, localRespRulactions, dr, addRoom, rewrittenResponse, ids->cs && ids->cs->muted)) { | |
607 | continue; | |
5bdbb83d | 608 | } |
886e2cf2 | 609 | |
5bdbb83d | 610 | if (ids->cs && !ids->cs->muted) { |
0956c5c5 | 611 | if (du) { |
fbf14b03 RG |
612 | #ifdef HAVE_DNS_OVER_HTTPS |
613 | // DoH query | |
246ff31f | 614 | du->response = std::string(response, responseLen); |
0956c5c5 | 615 | if (send(du->rsock, &du, sizeof(du), 0) != sizeof(du)) { |
9bd1a882 RG |
616 | /* at this point we have the only remaining pointer on this |
617 | DOHUnit object since we did set ids->du to nullptr earlier */ | |
0956c5c5 | 618 | delete du; |
fbf14b03 RG |
619 | } |
620 | #endif /* HAVE_DNS_OVER_HTTPS */ | |
0956c5c5 | 621 | du = nullptr; |
fbf14b03 RG |
622 | } |
623 | else { | |
624 | ComboAddress empty; | |
625 | empty.sin4.sin_family = 0; | |
626 | /* if ids->destHarvested is false, origDest holds the listening address. | |
627 | We don't want to use that as a source since it could be 0.0.0.0 for example. */ | |
628 | sendUDPResponse(origFD, response, responseLen, dr.delayMsec, ids->destHarvested ? ids->origDest : empty, ids->origRemote); | |
629 | } | |
5bdbb83d | 630 | } |
66d1c0bf | 631 | |
cb167afd | 632 | ++g_stats.responses; |
66d1c0bf | 633 | |
5bdbb83d | 634 | double udiff = ids->sentTime.udiff(); |
fbf14b03 | 635 | vinfolog("Got answer from %s, relayed to %s%s, took %f usec", dss->remote.toStringWithPort(), ids->origRemote.toStringWithPort(), |
3c72bc54 | 636 | isDoH ? " (https)": "", udiff); |
66d1c0bf | 637 | |
01f6920b RG |
638 | struct timespec ts; |
639 | gettime(&ts); | |
3e425868 | 640 | g_rings.insertResponse(ts, *dr.remote, *dr.qname, dr.qtype, static_cast<unsigned int>(udiff), static_cast<unsigned int>(got), cleartextDH, dss->remote); |
66d1c0bf | 641 | |
61d10a4d MH |
642 | switch (dh->rcode) { |
643 | case RCode::NXDomain: | |
644 | ++g_stats.frontendNXDomain; | |
645 | break; | |
646 | case RCode::ServFail: | |
cb167afd | 647 | ++g_stats.servfailResponses; |
61d10a4d MH |
648 | ++g_stats.frontendServFail; |
649 | break; | |
650 | case RCode::NoError: | |
651 | ++g_stats.frontendNoError; | |
652 | break; | |
cb167afd | 653 | } |
5bdbb83d | 654 | dss->latencyUsec = (127.0 * dss->latencyUsec / 128.0) + udiff/128.0; |
66d1c0bf | 655 | |
5bdbb83d | 656 | doLatencyStats(udiff); |
fcadd56e | 657 | |
5bdbb83d RG |
658 | rewrittenResponse.clear(); |
659 | } | |
66d1c0bf | 660 | } |
df560083 | 661 | catch(const std::exception& e){ |
cd7fa253 | 662 | vinfolog("Got an error in UDP responder thread while parsing a response from %s, id %d: %s", dss->remote.toStringWithPort(), queryId, e.what()); |
66d1c0bf | 663 | } |
7730131a | 664 | } |
7730131a | 665 | } |
66d1c0bf RG |
666 | catch(const std::exception& e) |
667 | { | |
668 | errlog("UDP responder thread died because of exception: %s", e.what()); | |
66d1c0bf RG |
669 | } |
670 | catch(const PDNSException& e) | |
671 | { | |
672 | errlog("UDP responder thread died because of PowerDNS exception: %s", e.reason); | |
66d1c0bf RG |
673 | } |
674 | catch(...) | |
675 | { | |
676 | errlog("UDP responder thread died because of an exception: %s", "unknown"); | |
66d1c0bf | 677 | } |
24d5cb00 | 678 | |
5d7e6765 | 679 | bool DownstreamState::reconnect() |
3c115e0f | 680 | { |
5d7e6765 RG |
681 | std::unique_lock<std::mutex> tl(connectLock, std::try_to_lock); |
682 | if (!tl.owns_lock()) { | |
683 | /* we are already reconnecting */ | |
684 | return false; | |
685 | } | |
686 | ||
b58f08e5 | 687 | connected = false; |
5bdbb83d | 688 | for (auto& fd : sockets) { |
150105a2 | 689 | if (fd != -1) { |
5d7e6765 | 690 | if (sockets.size() > 1) { |
5bdbb83d RG |
691 | std::lock_guard<std::mutex> lock(socketsLock); |
692 | mplexer->removeReadFD(fd); | |
693 | } | |
150105a2 RG |
694 | /* shutdown() is needed to wake up recv() in the responderThread */ |
695 | shutdown(fd, SHUT_RDWR); | |
696 | close(fd); | |
697 | fd = -1; | |
f99e3aaf | 698 | } |
150105a2 RG |
699 | if (!IsAnyAddress(remote)) { |
700 | fd = SSocket(remote.sin4.sin_family, SOCK_DGRAM, 0); | |
701 | if (!IsAnyAddress(sourceAddr)) { | |
702 | SSetsockopt(fd, SOL_SOCKET, SO_REUSEADDR, 1); | |
703 | SBind(fd, sourceAddr); | |
704 | } | |
705 | try { | |
706 | SConnect(fd, remote); | |
5d7e6765 | 707 | if (sockets.size() > 1) { |
5bdbb83d RG |
708 | std::lock_guard<std::mutex> lock(socketsLock); |
709 | mplexer->addReadFD(fd, [](int, boost::any) {}); | |
710 | } | |
150105a2 RG |
711 | connected = true; |
712 | } | |
713 | catch(const std::runtime_error& error) { | |
714 | infolog("Error connecting to new server with address %s: %s", remote.toStringWithPort(), error.what()); | |
38069e7e RG |
715 | connected = false; |
716 | break; | |
717 | } | |
7565f4e6 | 718 | } |
38069e7e RG |
719 | } |
720 | ||
721 | /* if at least one (re-)connection failed, close all sockets */ | |
722 | if (!connected) { | |
5bdbb83d | 723 | for (auto& fd : sockets) { |
38069e7e | 724 | if (fd != -1) { |
5d7e6765 RG |
725 | if (sockets.size() > 1) { |
726 | std::lock_guard<std::mutex> lock(socketsLock); | |
727 | mplexer->removeReadFD(fd); | |
728 | } | |
38069e7e RG |
729 | /* shutdown() is needed to wake up recv() in the responderThread */ |
730 | shutdown(fd, SHUT_RDWR); | |
731 | close(fd); | |
732 | fd = -1; | |
150105a2 | 733 | } |
7565f4e6 | 734 | } |
b58f08e5 | 735 | } |
5d7e6765 RG |
736 | |
737 | return connected; | |
b58f08e5 | 738 | } |
f2caf657 CHB |
739 | void DownstreamState::hash() |
740 | { | |
d58e616a | 741 | vinfolog("Computing hashes for id=%s and weight=%d", id, weight); |
f2caf657 | 742 | auto w = weight; |
d58e616a | 743 | WriteLock wl(&d_lock); |
f2caf657 CHB |
744 | hashes.clear(); |
745 | while (w > 0) { | |
746 | std::string uuid = boost::str(boost::format("%s-%d") % id % w); | |
747 | unsigned int wshash = burtleCI((const unsigned char*)uuid.c_str(), uuid.size(), g_hashperturb); | |
748 | hashes.insert(wshash); | |
749 | --w; | |
750 | } | |
751 | } | |
752 | ||
753 | void DownstreamState::setId(const boost::uuids::uuid& newId) | |
754 | { | |
755 | id = newId; | |
d58e616a CHB |
756 | // compute hashes only if already done |
757 | if (!hashes.empty()) { | |
758 | hash(); | |
759 | } | |
f2caf657 CHB |
760 | } |
761 | ||
762 | void DownstreamState::setWeight(int newWeight) | |
763 | { | |
5a2bbe8c CHB |
764 | if (newWeight < 1) { |
765 | errlog("Error setting server's weight: downstream weight value must be greater than 0."); | |
766 | return ; | |
767 | } | |
f2caf657 | 768 | weight = newWeight; |
d58e616a CHB |
769 | if (!hashes.empty()) { |
770 | hash(); | |
771 | } | |
f2caf657 | 772 | } |
b58f08e5 | 773 | |
150105a2 | 774 | DownstreamState::DownstreamState(const ComboAddress& remote_, const ComboAddress& sourceAddr_, unsigned int sourceItf_, size_t numberOfSockets): remote(remote_), sourceAddr(sourceAddr_), sourceItf(sourceItf_) |
b58f08e5 | 775 | { |
d58e616a | 776 | pthread_rwlock_init(&d_lock, nullptr); |
d61aa945 | 777 | id = getUniqueID(); |
5d7e6765 RG |
778 | threadStarted.clear(); |
779 | ||
5bdbb83d RG |
780 | mplexer = std::unique_ptr<FDMultiplexer>(FDMultiplexer::getMultiplexerSilent()); |
781 | ||
782 | sockets.resize(numberOfSockets); | |
783 | for (auto& fd : sockets) { | |
150105a2 RG |
784 | fd = -1; |
785 | } | |
786 | ||
b58f08e5 RG |
787 | if (!IsAnyAddress(remote)) { |
788 | reconnect(); | |
f99e3aaf RG |
789 | idStates.resize(g_maxOutstanding); |
790 | sw.start(); | |
791 | infolog("Added downstream server %s", remote.toStringWithPort()); | |
fbe2a2e0 | 792 | } |
1720247e | 793 | |
3c115e0f | 794 | } |
795 | ||
773470ca | 796 | std::mutex g_luamutex; |
3f25bce6 | 797 | LuaContext g_lua; |
3f25bce6 | 798 | |
ecbe9133 | 799 | GlobalStateHolder<ServerPolicy> g_policy; |
773470ca | 800 | |
497a6e3a | 801 | shared_ptr<DownstreamState> firstAvailable(const NumberedServerVector& servers, const DNSQuestion* dq) |
773470ca | 802 | { |
22b2b326 | 803 | for(auto& d : servers) { |
da4e7813 | 804 | if(d.second->isUp() && d.second->qps.check()) |
805 | return d.second; | |
773470ca | 806 | } |
497a6e3a | 807 | return leastOutstanding(servers, dq); |
2c5db3f7 | 808 | } |
809 | ||
d1fba58e | 810 | // get server with least outstanding queries, and within those, with the lowest order, and within those: the fastest |
497a6e3a | 811 | shared_ptr<DownstreamState> leastOutstanding(const NumberedServerVector& servers, const DNSQuestion* dq) |
c9262563 | 812 | { |
886e2cf2 RG |
813 | if (servers.size() == 1 && servers[0].second->isUp()) { |
814 | return servers[0].second; | |
815 | } | |
816 | ||
d1fba58e | 817 | vector<pair<tuple<int,int,double>, shared_ptr<DownstreamState>>> poss; |
818 | /* so you might wonder, why do we go through this trouble? The data on which we sort could change during the sort, | |
819 | which would suck royally and could even lead to crashes. So first we snapshot on what we sort, and then we sort */ | |
478ce172 | 820 | poss.reserve(servers.size()); |
0e41337b | 821 | for(auto& d : servers) { |
da4e7813 | 822 | if(d.second->isUp()) { |
d1fba58e | 823 | poss.push_back({make_tuple(d.second->outstanding.load(), d.second->order, d.second->latencyUsec), d.second}); |
c9262563 | 824 | } |
825 | } | |
826 | if(poss.empty()) | |
827 | return shared_ptr<DownstreamState>(); | |
828 | nth_element(poss.begin(), poss.begin(), poss.end(), [](const decltype(poss)::value_type& a, const decltype(poss)::value_type& b) { return a.first < b.first; }); | |
829 | return poss.begin()->second; | |
830 | } | |
831 | ||
497a6e3a | 832 | shared_ptr<DownstreamState> valrandom(unsigned int val, const NumberedServerVector& servers, const DNSQuestion* dq) |
c9262563 | 833 | { |
834 | vector<pair<int, shared_ptr<DownstreamState>>> poss; | |
278403d3 DM |
835 | int sum = 0; |
836 | int max = std::numeric_limits<int>::max(); | |
837 | ||
22b2b326 | 838 | for(auto& d : servers) { // w=1, w=10 -> 1, 11 |
da4e7813 | 839 | if(d.second->isUp()) { |
278403d3 DM |
840 | // Don't overflow sum when adding high weights |
841 | if(d.second->weight > max - sum) { | |
842 | sum = max; | |
843 | } else { | |
844 | sum += d.second->weight; | |
845 | } | |
846 | ||
da4e7813 | 847 | poss.push_back({sum, d.second}); |
c9262563 | 848 | } |
849 | } | |
d2894425 NJ |
850 | |
851 | // Catch poss & sum are empty to avoid SIGFPE | |
852 | if(poss.empty()) | |
853 | return shared_ptr<DownstreamState>(); | |
854 | ||
a7f3108c | 855 | int r = val % sum; |
af619119 | 856 | auto p = upper_bound(poss.begin(), poss.end(),r, [](int r_, const decltype(poss)::value_type& a) { return r_ < a.first;}); |
c9262563 | 857 | if(p==poss.end()) |
858 | return shared_ptr<DownstreamState>(); | |
859 | return p->second; | |
860 | } | |
861 | ||
497a6e3a | 862 | shared_ptr<DownstreamState> wrandom(const NumberedServerVector& servers, const DNSQuestion* dq) |
a7f3108c | 863 | { |
497a6e3a | 864 | return valrandom(random(), servers, dq); |
a7f3108c | 865 | } |
866 | ||
36e763fa | 867 | uint32_t g_hashperturb; |
497a6e3a | 868 | shared_ptr<DownstreamState> whashed(const NumberedServerVector& servers, const DNSQuestion* dq) |
a7f3108c | 869 | { |
497a6e3a | 870 | return valrandom(dq->qname->hash(g_hashperturb), servers, dq); |
a7f3108c | 871 | } |
872 | ||
1720247e CHB |
873 | shared_ptr<DownstreamState> chashed(const NumberedServerVector& servers, const DNSQuestion* dq) |
874 | { | |
1720247e | 875 | unsigned int qhash = dq->qname->hash(g_hashperturb); |
b712c51e CHB |
876 | unsigned int sel = std::numeric_limits<unsigned int>::max(); |
877 | unsigned int min = std::numeric_limits<unsigned int>::max(); | |
878 | shared_ptr<DownstreamState> ret = nullptr, first = nullptr; | |
1720247e CHB |
879 | |
880 | for (const auto& d: servers) { | |
881 | if (d.second->isUp()) { | |
93ca495f | 882 | // make sure hashes have been computed |
d58e616a CHB |
883 | if (d.second->hashes.empty()) { |
884 | d.second->hash(); | |
885 | } | |
886 | { | |
887 | ReadLock rl(&(d.second->d_lock)); | |
93ca495f CHB |
888 | const auto& server = d.second; |
889 | // we want to keep track of the last hash | |
b712c51e CHB |
890 | if (min > *(server->hashes.begin())) { |
891 | min = *(server->hashes.begin()); | |
892 | first = server; | |
93ca495f | 893 | } |
b712c51e CHB |
894 | |
895 | auto hash_it = server->hashes.lower_bound(qhash); | |
896 | if (hash_it != server->hashes.end()) { | |
897 | if (*hash_it < sel) { | |
93ca495f CHB |
898 | sel = *hash_it; |
899 | ret = server; | |
900 | } | |
d58e616a | 901 | } |
1720247e CHB |
902 | } |
903 | } | |
904 | } | |
93ca495f CHB |
905 | if (ret != nullptr) { |
906 | return ret; | |
1720247e | 907 | } |
b712c51e CHB |
908 | if (first != nullptr) { |
909 | return first; | |
1720247e | 910 | } |
93ca495f | 911 | return shared_ptr<DownstreamState>(); |
1720247e | 912 | } |
a7f3108c | 913 | |
497a6e3a | 914 | shared_ptr<DownstreamState> roundrobin(const NumberedServerVector& servers, const DNSQuestion* dq) |
5f504638 | 915 | { |
da4e7813 | 916 | NumberedServerVector poss; |
c9262563 | 917 | |
22b2b326 | 918 | for(auto& d : servers) { |
da4e7813 | 919 | if(d.second->isUp()) { |
5f504638 | 920 | poss.push_back(d); |
c9262563 | 921 | } |
5f504638 | 922 | } |
c9262563 | 923 | |
ecbe9133 | 924 | const auto *res=&poss; |
32b86928 | 925 | if(poss.empty() && !g_roundrobinFailOnNoServer) |
ecbe9133 | 926 | res = &servers; |
c9262563 | 927 | |
928 | if(res->empty()) | |
929 | return shared_ptr<DownstreamState>(); | |
930 | ||
931 | static unsigned int counter; | |
932 | ||
da4e7813 | 933 | return (*res)[(counter++) % res->size()].second; |
5f504638 | 934 | } |
935 | ||
e6841c9e | 936 | ComboAddress g_serverControl{"127.0.0.1:5199"}; |
b076b34a | 937 | |
886e2cf2 RG |
938 | std::shared_ptr<ServerPool> createPoolIfNotExists(pools_t& pools, const string& poolName) |
939 | { | |
940 | std::shared_ptr<ServerPool> pool; | |
941 | pools_t::iterator it = pools.find(poolName); | |
942 | if (it != pools.end()) { | |
943 | pool = it->second; | |
944 | } | |
945 | else { | |
8f4f5186 RG |
946 | if (!poolName.empty()) |
947 | vinfolog("Creating pool %s", poolName); | |
886e2cf2 RG |
948 | pool = std::make_shared<ServerPool>(); |
949 | pools.insert(std::pair<std::string,std::shared_ptr<ServerPool> >(poolName, pool)); | |
950 | } | |
951 | return pool; | |
952 | } | |
78c8047b | 953 | |
742c079a RG |
954 | void setPoolPolicy(pools_t& pools, const string& poolName, std::shared_ptr<ServerPolicy> policy) |
955 | { | |
956 | std::shared_ptr<ServerPool> pool = createPoolIfNotExists(pools, poolName); | |
957 | if (!poolName.empty()) { | |
958 | vinfolog("Setting pool %s server selection policy to %s", poolName, policy->name); | |
959 | } else { | |
960 | vinfolog("Setting default pool server selection policy to %s", policy->name); | |
961 | } | |
962 | pool->policy = policy; | |
963 | } | |
964 | ||
886e2cf2 | 965 | void addServerToPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server) |
22b2b326 | 966 | { |
886e2cf2 | 967 | std::shared_ptr<ServerPool> pool = createPoolIfNotExists(pools, poolName); |
c218b223 | 968 | if (!poolName.empty()) { |
8f4f5186 | 969 | vinfolog("Adding server to pool %s", poolName); |
c218b223 | 970 | } else { |
8f4f5186 | 971 | vinfolog("Adding server to default pool"); |
c218b223 | 972 | } |
a1b1a29d | 973 | pool->addServer(server); |
886e2cf2 RG |
974 | } |
975 | ||
976 | void removeServerFromPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server) | |
977 | { | |
8f4f5186 | 978 | std::shared_ptr<ServerPool> pool = getPool(pools, poolName); |
886e2cf2 | 979 | |
c218b223 | 980 | if (!poolName.empty()) { |
8f4f5186 | 981 | vinfolog("Removing server from pool %s", poolName); |
c218b223 | 982 | } |
983 | else { | |
8f4f5186 | 984 | vinfolog("Removing server from default pool"); |
c218b223 | 985 | } |
886e2cf2 | 986 | |
a1b1a29d | 987 | pool->removeServer(server); |
886e2cf2 RG |
988 | } |
989 | ||
990 | std::shared_ptr<ServerPool> getPool(const pools_t& pools, const std::string& poolName) | |
991 | { | |
992 | pools_t::const_iterator it = pools.find(poolName); | |
993 | ||
994 | if (it == pools.end()) { | |
995 | throw std::out_of_range("No pool named " + poolName); | |
996 | } | |
997 | ||
998 | return it->second; | |
999 | } | |
1000 | ||
a1b1a29d | 1001 | NumberedServerVector getDownstreamCandidates(const pools_t& pools, const std::string& poolName) |
886e2cf2 RG |
1002 | { |
1003 | std::shared_ptr<ServerPool> pool = getPool(pools, poolName); | |
a1b1a29d | 1004 | return pool->getServers(); |
22b2b326 | 1005 | } |
c9262563 | 1006 | |
614239d0 | 1007 | static void spoofResponseFromString(DNSQuestion& dq, const string& spoofContent) |
7791f83a RG |
1008 | { |
1009 | string result; | |
614239d0 RG |
1010 | |
1011 | std::vector<std::string> addrs; | |
1012 | stringtok(addrs, spoofContent, " ,"); | |
1013 | ||
1014 | if (addrs.size() == 1) { | |
1015 | try { | |
1016 | ComboAddress spoofAddr(spoofContent); | |
1017 | SpoofAction sa({spoofAddr}); | |
1018 | sa(&dq, &result); | |
1019 | } | |
1020 | catch(const PDNSException &e) { | |
1021 | SpoofAction sa(spoofContent); // CNAME then | |
1022 | sa(&dq, &result); | |
1023 | } | |
1024 | } else { | |
1025 | std::vector<ComboAddress> cas; | |
1026 | for (const auto& addr : addrs) { | |
1027 | try { | |
1028 | cas.push_back(ComboAddress(addr)); | |
1029 | } | |
1030 | catch (...) { | |
1031 | } | |
1032 | } | |
1033 | SpoofAction sa(cas); | |
6ca7a40a | 1034 | sa(&dq, &result); |
7791f83a RG |
1035 | } |
1036 | } | |
1037 | ||
2a28db86 RG |
1038 | bool processRulesResult(const DNSAction::Action& action, DNSQuestion& dq, std::string& ruleresult, bool& drop) |
1039 | { | |
1040 | switch(action) { | |
1041 | case DNSAction::Action::Allow: | |
1042 | return true; | |
1043 | break; | |
1044 | case DNSAction::Action::Drop: | |
1045 | ++g_stats.ruleDrop; | |
1046 | drop = true; | |
1047 | return true; | |
1048 | break; | |
1049 | case DNSAction::Action::Nxdomain: | |
1050 | dq.dh->rcode = RCode::NXDomain; | |
1051 | dq.dh->qr=true; | |
1052 | ++g_stats.ruleNXDomain; | |
1053 | return true; | |
1054 | break; | |
1055 | case DNSAction::Action::Refused: | |
1056 | dq.dh->rcode = RCode::Refused; | |
1057 | dq.dh->qr=true; | |
1058 | ++g_stats.ruleRefused; | |
1059 | return true; | |
1060 | break; | |
1061 | case DNSAction::Action::ServFail: | |
1062 | dq.dh->rcode = RCode::ServFail; | |
1063 | dq.dh->qr=true; | |
1064 | ++g_stats.ruleServFail; | |
1065 | return true; | |
1066 | break; | |
1067 | case DNSAction::Action::Spoof: | |
1068 | spoofResponseFromString(dq, ruleresult); | |
1069 | return true; | |
1070 | break; | |
1071 | case DNSAction::Action::Truncate: | |
1072 | dq.dh->tc = true; | |
1073 | dq.dh->qr = true; | |
1074 | return true; | |
1075 | break; | |
1076 | case DNSAction::Action::HeaderModify: | |
1077 | return true; | |
1078 | break; | |
1079 | case DNSAction::Action::Pool: | |
1080 | dq.poolname=ruleresult; | |
1081 | return true; | |
1082 | break; | |
1083 | case DNSAction::Action::NoRecurse: | |
1084 | dq.dh->rd = false; | |
1085 | return true; | |
1086 | break; | |
1087 | /* non-terminal actions follow */ | |
1088 | case DNSAction::Action::Delay: | |
1089 | dq.delayMsec = static_cast<int>(pdns_stou(ruleresult)); // sorry | |
1090 | break; | |
1091 | case DNSAction::Action::None: | |
1092 | /* fall-through */ | |
1093 | case DNSAction::Action::NoOp: | |
1094 | break; | |
1095 | } | |
1096 | ||
1097 | /* false means that we don't stop the processing */ | |
1098 | return false; | |
1099 | } | |
1100 | ||
1101 | ||
1102 | static bool applyRulesToQuery(LocalHolders& holders, DNSQuestion& dq, const struct timespec& now) | |
e91084ce | 1103 | { |
4ab01344 | 1104 | g_rings.insertQuery(now, *dq.remote, *dq.qname, dq.qtype, dq.len, *dq.dh); |
e91084ce | 1105 | |
786e4d8c | 1106 | if(g_qcount.enabled) { |
348ef1c6 | 1107 | string qname = (*dq.qname).toLogString(); |
786e4d8c RS |
1108 | bool countQuery{true}; |
1109 | if(g_qcount.filter) { | |
1110 | std::lock_guard<std::mutex> lock(g_luamutex); | |
dd1a3034 | 1111 | std::tie (countQuery, qname) = g_qcount.filter(&dq); |
786e4d8c RS |
1112 | } |
1113 | ||
1114 | if(countQuery) { | |
1115 | WriteLock wl(&g_qcount.queryLock); | |
1116 | if(!g_qcount.records.count(qname)) { | |
1117 | g_qcount.records[qname] = 0; | |
1118 | } | |
1119 | g_qcount.records[qname]++; | |
1120 | } | |
1121 | } | |
1122 | ||
0beaa5c8 | 1123 | if(auto got = holders.dynNMGBlock->lookup(*dq.remote)) { |
701f690b | 1124 | auto updateBlockStats = [&got]() { |
cb167afd | 1125 | ++g_stats.dynBlocked; |
701f690b | 1126 | got->second.blocks++; |
1127 | }; | |
1128 | ||
e91084ce | 1129 | if(now < got->second.until) { |
7b925432 RG |
1130 | DNSAction::Action action = got->second.action; |
1131 | if (action == DNSAction::Action::None) { | |
1132 | action = g_dynBlockAction; | |
1133 | } | |
477c86a0 RG |
1134 | switch (action) { |
1135 | case DNSAction::Action::NoOp: | |
1136 | /* do nothing */ | |
1137 | break; | |
79ee8ff9 RG |
1138 | |
1139 | case DNSAction::Action::Nxdomain: | |
1140 | vinfolog("Query from %s turned into NXDomain because of dynamic block", dq.remote->toStringWithPort()); | |
1141 | updateBlockStats(); | |
1142 | ||
1143 | dq.dh->rcode = RCode::NXDomain; | |
1144 | dq.dh->qr=true; | |
1145 | return true; | |
1146 | ||
477c86a0 | 1147 | case DNSAction::Action::Refused: |
dd46e5e3 | 1148 | vinfolog("Query from %s refused because of dynamic block", dq.remote->toStringWithPort()); |
701f690b | 1149 | updateBlockStats(); |
8477236d | 1150 | |
dd46e5e3 | 1151 | dq.dh->rcode = RCode::Refused; |
79ee8ff9 | 1152 | dq.dh->qr = true; |
dd46e5e3 | 1153 | return true; |
477c86a0 RG |
1154 | |
1155 | case DNSAction::Action::Truncate: | |
8477236d | 1156 | if(!dq.tcp) { |
701f690b | 1157 | updateBlockStats(); |
8477236d | 1158 | vinfolog("Query from %s truncated because of dynamic block", dq.remote->toStringWithPort()); |
1159 | dq.dh->tc = true; | |
1160 | dq.dh->qr = true; | |
1161 | return true; | |
1162 | } | |
1163 | else { | |
348ef1c6 | 1164 | vinfolog("Query from %s for %s over TCP *not* truncated because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toLogString()); |
8477236d | 1165 | } |
477c86a0 | 1166 | break; |
3d60b39a | 1167 | case DNSAction::Action::NoRecurse: |
1168 | updateBlockStats(); | |
1169 | vinfolog("Query from %s setting rd=0 because of dynamic block", dq.remote->toStringWithPort()); | |
1170 | dq.dh->rd = false; | |
1171 | return true; | |
477c86a0 | 1172 | default: |
701f690b | 1173 | updateBlockStats(); |
dd46e5e3 RG |
1174 | vinfolog("Query from %s dropped because of dynamic block", dq.remote->toStringWithPort()); |
1175 | return false; | |
1176 | } | |
e91084ce RG |
1177 | } |
1178 | } | |
1179 | ||
0beaa5c8 | 1180 | if(auto got = holders.dynSMTBlock->lookup(*dq.qname)) { |
701f690b | 1181 | auto updateBlockStats = [&got]() { |
cb167afd | 1182 | ++g_stats.dynBlocked; |
701f690b | 1183 | got->blocks++; |
1184 | }; | |
1185 | ||
71c94675 | 1186 | if(now < got->until) { |
7b925432 RG |
1187 | DNSAction::Action action = got->action; |
1188 | if (action == DNSAction::Action::None) { | |
1189 | action = g_dynBlockAction; | |
1190 | } | |
477c86a0 RG |
1191 | switch (action) { |
1192 | case DNSAction::Action::NoOp: | |
1193 | /* do nothing */ | |
1194 | break; | |
79ee8ff9 | 1195 | case DNSAction::Action::Nxdomain: |
348ef1c6 | 1196 | vinfolog("Query from %s for %s turned into NXDomain because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toLogString()); |
79ee8ff9 RG |
1197 | updateBlockStats(); |
1198 | ||
1199 | dq.dh->rcode = RCode::NXDomain; | |
1200 | dq.dh->qr=true; | |
1201 | return true; | |
477c86a0 | 1202 | case DNSAction::Action::Refused: |
348ef1c6 | 1203 | vinfolog("Query from %s for %s refused because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toLogString()); |
701f690b | 1204 | updateBlockStats(); |
8477236d | 1205 | |
dd46e5e3 RG |
1206 | dq.dh->rcode = RCode::Refused; |
1207 | dq.dh->qr=true; | |
1208 | return true; | |
477c86a0 | 1209 | case DNSAction::Action::Truncate: |
8477236d | 1210 | if(!dq.tcp) { |
701f690b | 1211 | updateBlockStats(); |
8477236d | 1212 | |
348ef1c6 | 1213 | vinfolog("Query from %s for %s truncated because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toLogString()); |
8477236d | 1214 | dq.dh->tc = true; |
1215 | dq.dh->qr = true; | |
1216 | return true; | |
1217 | } | |
1218 | else { | |
348ef1c6 | 1219 | vinfolog("Query from %s for %s over TCP *not* truncated because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toLogString()); |
8477236d | 1220 | } |
477c86a0 | 1221 | break; |
3d60b39a | 1222 | case DNSAction::Action::NoRecurse: |
1223 | updateBlockStats(); | |
1224 | vinfolog("Query from %s setting rd=0 because of dynamic block", dq.remote->toStringWithPort()); | |
1225 | dq.dh->rd = false; | |
1226 | return true; | |
477c86a0 | 1227 | default: |
701f690b | 1228 | updateBlockStats(); |
348ef1c6 | 1229 | vinfolog("Query from %s for %s dropped because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toLogString()); |
dd46e5e3 RG |
1230 | return false; |
1231 | } | |
71c94675 | 1232 | } |
1233 | } | |
1234 | ||
e91084ce RG |
1235 | DNSAction::Action action=DNSAction::Action::None; |
1236 | string ruleresult; | |
2a28db86 | 1237 | bool drop = false; |
0beaa5c8 | 1238 | for(const auto& lr : *holders.rulactions) { |
4d5959e6 RG |
1239 | if(lr.d_rule->matches(&dq)) { |
1240 | lr.d_rule->d_matches++; | |
1241 | action=(*lr.d_action)(&dq, &ruleresult); | |
2a28db86 | 1242 | if (processRulesResult(action, dq, ruleresult, drop)) { |
3d60b39a | 1243 | break; |
e91084ce RG |
1244 | } |
1245 | } | |
1246 | } | |
1247 | ||
2a28db86 RG |
1248 | if (drop) { |
1249 | return false; | |
1250 | } | |
1251 | ||
e91084ce RG |
1252 | return true; |
1253 | } | |
1254 | ||
fbf14b03 | 1255 | ssize_t udpClientSendRequestToBackend(const std::shared_ptr<DownstreamState>& ss, const int sd, const char* request, const size_t requestLen, bool healthCheck) |
fbe2a2e0 | 1256 | { |
b58f08e5 RG |
1257 | ssize_t result; |
1258 | ||
fbe2a2e0 | 1259 | if (ss->sourceItf == 0) { |
b58f08e5 RG |
1260 | result = send(sd, request, requestLen, 0); |
1261 | } | |
1262 | else { | |
1263 | struct msghdr msgh; | |
1264 | struct iovec iov; | |
7bec330a | 1265 | cmsgbuf_aligned cbuf; |
a2353842 | 1266 | ComboAddress remote(ss->remote); |
7bec330a OM |
1267 | fillMSGHdr(&msgh, &iov, &cbuf, sizeof(cbuf), const_cast<char*>(request), requestLen, &remote); |
1268 | addCMsgSrcAddr(&msgh, &cbuf, &ss->sourceAddr, ss->sourceItf); | |
b58f08e5 | 1269 | result = sendmsg(sd, &msgh, 0); |
fbe2a2e0 RG |
1270 | } |
1271 | ||
b58f08e5 RG |
1272 | if (result == -1) { |
1273 | int savederrno = errno; | |
1274 | vinfolog("Error sending request to backend %s: %d", ss->remote.toStringWithPort(), savederrno); | |
1275 | ||
1276 | /* This might sound silly, but on Linux send() might fail with EINVAL | |
1b126225 RG |
1277 | if the interface the socket was bound to doesn't exist anymore. |
1278 | We don't want to reconnect the real socket if the healthcheck failed, | |
1279 | because it's not using the same socket. | |
1280 | */ | |
1281 | if (!healthCheck && (savederrno == EINVAL || savederrno == ENODEV)) { | |
b58f08e5 RG |
1282 | ss->reconnect(); |
1283 | } | |
fbe2a2e0 RG |
1284 | } |
1285 | ||
b58f08e5 | 1286 | return result; |
fbe2a2e0 RG |
1287 | } |
1288 | ||
0beaa5c8 | 1289 | static bool isUDPQueryAcceptable(ClientState& cs, LocalHolders& holders, const struct msghdr* msgh, const ComboAddress& remote, ComboAddress& dest) |
24d5cb00 | 1290 | { |
0beaa5c8 RG |
1291 | if (msgh->msg_flags & MSG_TRUNC) { |
1292 | /* message was too large for our buffer */ | |
1293 | vinfolog("Dropping message too large for our buffer"); | |
cb167afd | 1294 | ++g_stats.nonCompliantQueries; |
0beaa5c8 RG |
1295 | return false; |
1296 | } | |
a4652d55 | 1297 | |
0beaa5c8 RG |
1298 | if(!holders.acl->match(remote)) { |
1299 | vinfolog("Query from %s dropped because of ACL", remote.toStringWithPort()); | |
cb167afd | 1300 | ++g_stats.aclDrops; |
0beaa5c8 | 1301 | return false; |
2b3eefc3 | 1302 | } |
2b3eefc3 | 1303 | |
0beaa5c8 | 1304 | cs.queries++; |
cb167afd | 1305 | ++g_stats.queries; |
2b3eefc3 | 1306 | |
0beaa5c8 RG |
1307 | if (HarvestDestinationAddress(msgh, &dest)) { |
1308 | /* we don't get the port, only the address */ | |
1309 | dest.sin4.sin_port = cs.local.sin4.sin_port; | |
1310 | } | |
1311 | else { | |
1312 | dest.sin4.sin_family = 0; | |
2b3eefc3 | 1313 | } |
549d63c9 | 1314 | |
0beaa5c8 RG |
1315 | return true; |
1316 | } | |
1317 | ||
4ab01344 | 1318 | boost::optional<std::vector<uint8_t>> checkDNSCryptQuery(const ClientState& cs, const char* query, uint16_t& len, std::shared_ptr<DNSCryptQuery>& dnsCryptQuery, time_t now, bool tcp) |
0beaa5c8 RG |
1319 | { |
1320 | if (cs.dnscryptCtx) { | |
7129b5c4 | 1321 | #ifdef HAVE_DNSCRYPT |
0beaa5c8 RG |
1322 | vector<uint8_t> response; |
1323 | uint16_t decryptedQueryLen = 0; | |
2b3eefc3 | 1324 | |
43234e76 | 1325 | dnsCryptQuery = std::make_shared<DNSCryptQuery>(cs.dnscryptCtx); |
0beaa5c8 | 1326 | |
4ab01344 | 1327 | bool decrypted = handleDNSCryptQuery(const_cast<char*>(query), len, dnsCryptQuery, &decryptedQueryLen, tcp, now, response); |
0beaa5c8 RG |
1328 | |
1329 | if (!decrypted) { | |
1330 | if (response.size() > 0) { | |
4ab01344 | 1331 | return response; |
2b3eefc3 | 1332 | } |
4ab01344 | 1333 | throw std::runtime_error("Unable to decrypt DNSCrypt query, dropping."); |
0beaa5c8 | 1334 | } |
2b3eefc3 | 1335 | |
0beaa5c8 | 1336 | len = decryptedQueryLen; |
7129b5c4 | 1337 | #endif /* HAVE_DNSCRYPT */ |
0beaa5c8 | 1338 | } |
4ab01344 | 1339 | return boost::none; |
0beaa5c8 | 1340 | } |
2b3eefc3 | 1341 | |
0beaa5c8 RG |
1342 | bool checkQueryHeaders(const struct dnsheader* dh) |
1343 | { | |
1344 | if (dh->qr) { // don't respond to responses | |
cb167afd | 1345 | ++g_stats.nonCompliantQueries; |
0beaa5c8 RG |
1346 | return false; |
1347 | } | |
2b3eefc3 | 1348 | |
0beaa5c8 | 1349 | if (dh->qdcount == 0) { |
cb167afd | 1350 | ++g_stats.emptyQueries; |
0beaa5c8 RG |
1351 | return false; |
1352 | } | |
0ba5eecf | 1353 | |
0beaa5c8 | 1354 | if (dh->rd) { |
cb167afd | 1355 | ++g_stats.rdQueries; |
0beaa5c8 | 1356 | } |
e91084ce | 1357 | |
0beaa5c8 RG |
1358 | return true; |
1359 | } | |
963bef8d | 1360 | |
0beaa5c8 | 1361 | #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) |
e59405cd | 1362 | static void queueResponse(const ClientState& cs, const char* response, uint16_t responseLen, const ComboAddress& dest, const ComboAddress& remote, struct mmsghdr& outMsg, struct iovec* iov, cmsgbuf_aligned* cbuf) |
0beaa5c8 RG |
1363 | { |
1364 | outMsg.msg_len = 0; | |
1365 | fillMSGHdr(&outMsg.msg_hdr, iov, nullptr, 0, const_cast<char*>(response), responseLen, const_cast<ComboAddress*>(&remote)); | |
11e1e08b | 1366 | |
0beaa5c8 RG |
1367 | if (dest.sin4.sin_family == 0) { |
1368 | outMsg.msg_hdr.msg_control = nullptr; | |
1369 | } | |
1370 | else { | |
1371 | addCMsgSrcAddr(&outMsg.msg_hdr, cbuf, &dest, 0); | |
1372 | } | |
1373 | } | |
1374 | #endif /* defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) */ | |
43eeadc1 | 1375 | |
3e425868 RG |
1376 | /* self-generated responses or cache hits */ |
1377 | static bool prepareOutgoingResponse(LocalHolders& holders, ClientState& cs, DNSQuestion& dq, bool cacheHit) | |
54aaa82b | 1378 | { |
3e425868 | 1379 | DNSResponse dr(dq.qname, dq.qtype, dq.qclass, dq.consumed, dq.local, dq.remote, reinterpret_cast<dnsheader*>(dq.dh), dq.size, dq.len, dq.tcp, dq.queryTime); |
4ab01344 | 1380 | |
54aaa82b | 1381 | #ifdef HAVE_PROTOBUF |
1382 | dr.uniqueId = dq.uniqueId; | |
1383 | #endif | |
1384 | dr.qTag = dq.qTag; | |
3e425868 | 1385 | dr.delayMsec = dq.delayMsec; |
54aaa82b | 1386 | |
3e425868 RG |
1387 | if (!applyRulesToResponse(cacheHit ? holders.cacheHitRespRulactions : holders.selfAnsweredRespRulactions, dr)) { |
1388 | return false; | |
54aaa82b | 1389 | } |
1390 | ||
3e425868 RG |
1391 | /* in case a rule changed it */ |
1392 | dq.delayMsec = dr.delayMsec; | |
1393 | ||
54aaa82b | 1394 | #ifdef HAVE_DNSCRYPT |
7129b5c4 | 1395 | if (!cs.muted) { |
3e425868 RG |
1396 | if (!encryptResponse(reinterpret_cast<char*>(dq.dh), &dq.len, dq.size, dq.tcp, dq.dnsCryptQuery, nullptr, nullptr)) { |
1397 | return false; | |
54aaa82b | 1398 | } |
54aaa82b | 1399 | } |
7129b5c4 | 1400 | #endif /* HAVE_DNSCRYPT */ |
54aaa82b | 1401 | |
389d903a RG |
1402 | if (cacheHit) { |
1403 | ++g_stats.cacheHits; | |
1404 | } | |
3e425868 | 1405 | |
61d10a4d MH |
1406 | switch (dr.dh->rcode) { |
1407 | case RCode::NXDomain: | |
1408 | ++g_stats.frontendNXDomain; | |
1409 | break; | |
1410 | case RCode::ServFail: | |
1411 | ++g_stats.frontendServFail; | |
1412 | break; | |
1413 | case RCode::NoError: | |
1414 | ++g_stats.frontendNoError; | |
1415 | break; | |
1416 | } | |
3e425868 | 1417 | |
54aaa82b | 1418 | doLatencyStats(0); // we're not going to measure this |
3e425868 | 1419 | return true; |
54aaa82b | 1420 | } |
1421 | ||
3e425868 | 1422 | ProcessQueryResult processQuery(DNSQuestion& dq, ClientState& cs, LocalHolders& holders, std::shared_ptr<DownstreamState>& selectedBackend) |
0beaa5c8 | 1423 | { |
4ab01344 | 1424 | const uint16_t queryId = ntohs(dq.dh->id); |
2b3eefc3 | 1425 | |
0beaa5c8 | 1426 | try { |
43234e76 RG |
1427 | /* we need an accurate ("real") value for the response and |
1428 | to store into the IDS, but not for insertion into the | |
1429 | rings for example */ | |
43234e76 RG |
1430 | struct timespec now; |
1431 | gettime(&now); | |
2efd427d | 1432 | |
2a28db86 | 1433 | if (!applyRulesToQuery(holders, dq, now)) { |
3e425868 | 1434 | return ProcessQueryResult::Drop; |
0beaa5c8 | 1435 | } |
b63add03 | 1436 | |
0beaa5c8 | 1437 | if(dq.dh->qr) { // something turned it into a response |
4ab01344 | 1438 | fixUpQueryTurnedResponse(dq, dq.origFlags); |
0beaa5c8 | 1439 | |
3e425868 RG |
1440 | if (!prepareOutgoingResponse(holders, cs, dq, false)) { |
1441 | return ProcessQueryResult::Drop; | |
22b2b326 | 1442 | } |
5f504638 | 1443 | |
3e425868 RG |
1444 | ++g_stats.selfAnswered; |
1445 | return ProcessQueryResult::SendAnswer; | |
0beaa5c8 RG |
1446 | } |
1447 | ||
2a28db86 | 1448 | std::shared_ptr<ServerPool> serverPool = getPool(*holders.pools, dq.poolname); |
4ab01344 | 1449 | dq.packetCache = serverPool->packetCache; |
a1b1a29d | 1450 | auto policy = *(holders.policy); |
0beaa5c8 | 1451 | if (serverPool->policy != nullptr) { |
a1b1a29d | 1452 | policy = *(serverPool->policy); |
0beaa5c8 | 1453 | } |
a1b1a29d RG |
1454 | auto servers = serverPool->getServers(); |
1455 | if (policy.isLua) { | |
0beaa5c8 | 1456 | std::lock_guard<std::mutex> lock(g_luamutex); |
3e425868 | 1457 | selectedBackend = policy.policy(servers, &dq); |
a1b1a29d RG |
1458 | } |
1459 | else { | |
3e425868 | 1460 | selectedBackend = policy.policy(servers, &dq); |
0beaa5c8 | 1461 | } |
228e4fe8 | 1462 | |
9837850d | 1463 | uint16_t cachedResponseSize = dq.size; |
3e425868 | 1464 | uint32_t allowExpired = selectedBackend ? 0 : g_staleCacheEntriesTTL; |
9837850d | 1465 | |
4ab01344 RG |
1466 | if (dq.packetCache && !dq.skipCache) { |
1467 | dq.dnssecOK = (getEDNSZ(dq) & EDNS_HEADER_FLAG_DO); | |
1ef18cab | 1468 | } |
1469 | ||
3e425868 | 1470 | if (dq.useECS && ((selectedBackend && selectedBackend->useECS) || (!selectedBackend && serverPool->getECS()))) { |
389d903a | 1471 | // we special case our cache in case a downstream explicitly gave us a universally valid response with a 0 scope |
3e425868 | 1472 | if (dq.packetCache && !dq.skipCache && (!selectedBackend || !selectedBackend->disableZeroScope) && dq.packetCache->isECSParsingEnabled()) { |
4ab01344 | 1473 | if (dq.packetCache->get(dq, dq.consumed, dq.dh->id, reinterpret_cast<char*>(dq.dh), &cachedResponseSize, &dq.cacheKeyNoECS, dq.subnet, dq.dnssecOK, allowExpired)) { |
3e425868 RG |
1474 | dq.len = cachedResponseSize; |
1475 | ||
1476 | if (!prepareOutgoingResponse(holders, cs, dq, true)) { | |
1477 | return ProcessQueryResult::Drop; | |
1478 | } | |
1479 | ||
1480 | return ProcessQueryResult::SendAnswer; | |
389d903a RG |
1481 | } |
1482 | ||
4ab01344 | 1483 | if (!dq.subnet) { |
389d903a | 1484 | /* there was no existing ECS on the query, enable the zero-scope feature */ |
4ab01344 | 1485 | dq.useZeroScope = true; |
389d903a | 1486 | } |
9837850d | 1487 | } |
389d903a | 1488 | |
4ab01344 RG |
1489 | if (!handleEDNSClientSubnet(dq, &(dq.ednsAdded), &(dq.ecsAdded), g_preserveTrailingData)) { |
1490 | vinfolog("Dropping query from %s because we couldn't insert the ECS value", dq.remote->toStringWithPort()); | |
3e425868 | 1491 | return ProcessQueryResult::Drop; |
886e2cf2 | 1492 | } |
0beaa5c8 | 1493 | } |
886e2cf2 | 1494 | |
4ab01344 RG |
1495 | if (dq.packetCache && !dq.skipCache) { |
1496 | if (dq.packetCache->get(dq, dq.consumed, dq.dh->id, reinterpret_cast<char*>(dq.dh), &cachedResponseSize, &dq.cacheKey, dq.subnet, dq.dnssecOK, allowExpired)) { | |
3e425868 RG |
1497 | dq.len = cachedResponseSize; |
1498 | ||
1499 | if (!prepareOutgoingResponse(holders, cs, dq, true)) { | |
1500 | return ProcessQueryResult::Drop; | |
1501 | } | |
1502 | ||
1503 | return ProcessQueryResult::SendAnswer; | |
886e2cf2 | 1504 | } |
cb167afd | 1505 | ++g_stats.cacheMisses; |
0beaa5c8 | 1506 | } |
886e2cf2 | 1507 | |
3e425868 | 1508 | if(!selectedBackend) { |
cb167afd | 1509 | ++g_stats.noPolicy; |
26a3cdb7 | 1510 | |
348ef1c6 | 1511 | vinfolog("%s query for %s|%s from %s, no policy applied", g_servFailOnNoPolicy ? "ServFailed" : "Dropped", dq.qname->toLogString(), QType(dq.qtype).getName(), dq.remote->toStringWithPort()); |
3e425868 | 1512 | if (g_servFailOnNoPolicy) { |
4ab01344 | 1513 | restoreFlags(dq.dh, dq.origFlags); |
26a3cdb7 | 1514 | |
0beaa5c8 RG |
1515 | dq.dh->rcode = RCode::ServFail; |
1516 | dq.dh->qr = true; | |
26a3cdb7 | 1517 | |
3e425868 RG |
1518 | if (!prepareOutgoingResponse(holders, cs, dq, false)) { |
1519 | return ProcessQueryResult::Drop; | |
1520 | } | |
be6c318f | 1521 | // no response-only statistics counter to update. |
3e425868 | 1522 | return ProcessQueryResult::SendAnswer; |
1ea747c0 | 1523 | } |
3e425868 RG |
1524 | |
1525 | return ProcessQueryResult::Drop; | |
0beaa5c8 | 1526 | } |
1ea747c0 | 1527 | |
3e425868 RG |
1528 | if (dq.addXPF && selectedBackend->xpfRRCode != 0) { |
1529 | addXPF(dq, selectedBackend->xpfRRCode, g_preserveTrailingData); | |
5cc8371b RG |
1530 | } |
1531 | ||
3e425868 RG |
1532 | selectedBackend->queries++; |
1533 | return ProcessQueryResult::PassToBackend; | |
4ab01344 RG |
1534 | } |
1535 | catch(const std::exception& e){ | |
1536 | vinfolog("Got an error while parsing a %s query from %s, id %d: %s", (dq.tcp ? "TCP" : "UDP"), dq.remote->toStringWithPort(), queryId, e.what()); | |
4ab01344 | 1537 | } |
3e425868 | 1538 | return ProcessQueryResult::Drop; |
4ab01344 RG |
1539 | } |
1540 | ||
7bec330a | 1541 | static void processUDPQuery(ClientState& cs, LocalHolders& holders, const struct msghdr* msgh, const ComboAddress& remote, ComboAddress& dest, char* query, uint16_t len, size_t queryBufferSize, struct mmsghdr* responsesVect, unsigned int* queuedResponses, struct iovec* respIOV, cmsgbuf_aligned* respCBuf) |
4ab01344 RG |
1542 | { |
1543 | assert(responsesVect == nullptr || (queuedResponses != nullptr && respIOV != nullptr && respCBuf != nullptr)); | |
1544 | uint16_t queryId = 0; | |
1545 | ||
1546 | try { | |
1547 | if (!isUDPQueryAcceptable(cs, holders, msgh, remote, dest)) { | |
1548 | return; | |
1549 | } | |
1550 | ||
1551 | /* we need an accurate ("real") value for the response and | |
1552 | to store into the IDS, but not for insertion into the | |
1553 | rings for example */ | |
1554 | struct timespec queryRealTime; | |
4ab01344 RG |
1555 | gettime(&queryRealTime, true); |
1556 | ||
1557 | std::shared_ptr<DNSCryptQuery> dnsCryptQuery = nullptr; | |
4ab01344 RG |
1558 | auto dnsCryptResponse = checkDNSCryptQuery(cs, query, len, dnsCryptQuery, queryRealTime.tv_sec, false); |
1559 | if (dnsCryptResponse) { | |
1560 | sendUDPResponse(cs.udpFD, reinterpret_cast<char*>(dnsCryptResponse->data()), static_cast<uint16_t>(dnsCryptResponse->size()), 0, dest, remote); | |
1561 | return; | |
1562 | } | |
4ab01344 RG |
1563 | |
1564 | struct dnsheader* dh = reinterpret_cast<struct dnsheader*>(query); | |
1565 | queryId = ntohs(dh->id); | |
1566 | ||
1567 | if (!checkQueryHeaders(dh)) { | |
1568 | return; | |
1569 | } | |
1570 | ||
1571 | uint16_t qtype, qclass; | |
1572 | unsigned int consumed = 0; | |
1573 | DNSName qname(query, len, sizeof(dnsheader), false, &qtype, &qclass, &consumed); | |
1574 | DNSQuestion dq(&qname, qtype, qclass, consumed, dest.sin4.sin_family != 0 ? &dest : &cs.local, &remote, dh, queryBufferSize, len, false, &queryRealTime); | |
1575 | dq.dnsCryptQuery = std::move(dnsCryptQuery); | |
3e425868 RG |
1576 | std::shared_ptr<DownstreamState> ss{nullptr}; |
1577 | auto result = processQuery(dq, cs, holders, ss); | |
4ab01344 | 1578 | |
3e425868 RG |
1579 | if (result == ProcessQueryResult::Drop) { |
1580 | return; | |
1581 | } | |
1582 | ||
1583 | if (result == ProcessQueryResult::SendAnswer) { | |
4ab01344 | 1584 | #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) |
3e425868 RG |
1585 | if (dq.delayMsec == 0 && responsesVect != nullptr) { |
1586 | queueResponse(cs, reinterpret_cast<char*>(dq.dh), dq.len, *dq.local, *dq.remote, responsesVect[*queuedResponses], respIOV, respCBuf); | |
4ab01344 RG |
1587 | (*queuedResponses)++; |
1588 | return; | |
1589 | } | |
1590 | #endif /* defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) */ | |
6d192f51 RG |
1591 | /* we use dest, always, because we don't want to use the listening address to send a response since it could be 0.0.0.0 */ |
1592 | sendUDPResponse(cs.udpFD, reinterpret_cast<char*>(dq.dh), dq.len, dq.delayMsec, dest, *dq.remote); | |
3e425868 RG |
1593 | return; |
1594 | } | |
4ab01344 | 1595 | |
3e425868 | 1596 | if (result != ProcessQueryResult::PassToBackend || ss == nullptr) { |
4ab01344 RG |
1597 | return; |
1598 | } | |
11e1e08b | 1599 | |
0beaa5c8 RG |
1600 | unsigned int idOffset = (ss->idOffset++) % ss->idStates.size(); |
1601 | IDState* ids = &ss->idStates[idOffset]; | |
1602 | ids->age = 0; | |
0956c5c5 RG |
1603 | DOHUnit* du = nullptr; |
1604 | ||
9bd1a882 RG |
1605 | /* that means that the state was in use, possibly with an allocated |
1606 | DOHUnit that we will need to handle, but we can't touch it before | |
1607 | confirming that we now own this state */ | |
311f19d5 | 1608 | if (ids->isInUse()) { |
0956c5c5 RG |
1609 | du = ids->du; |
1610 | } | |
c9ba8478 | 1611 | |
a9489723 | 1612 | /* we atomically replace the value, we now own this state */ |
311f19d5 RG |
1613 | if (!ids->markAsUsed()) { |
1614 | /* the state was not in use. | |
9bd1a882 | 1615 | we reset 'du' because it might have still been in use when we read it. */ |
0956c5c5 | 1616 | du = nullptr; |
9bd1a882 | 1617 | ++ss->outstanding; |
71b86bd8 | 1618 | } |
0beaa5c8 | 1619 | else { |
9bd1a882 RG |
1620 | /* we are reusing a state, no change in outstanding but if there was an existing DOHUnit we need |
1621 | to handle it because it's about to be overwritten. */ | |
a9489723 | 1622 | ids->du = nullptr; |
fbf14b03 | 1623 | ++ss->reuseds; |
cb167afd | 1624 | ++g_stats.downstreamTimeouts; |
0956c5c5 | 1625 | handleDOHTimeout(du); |
0beaa5c8 | 1626 | } |
0e41337b | 1627 | |
0beaa5c8 | 1628 | ids->cs = &cs; |
a9489723 | 1629 | ids->origFD = cs.udpFD; |
0beaa5c8 | 1630 | ids->origID = dh->id; |
d0ae6360 | 1631 | setIDStateFromDNSQuestion(*ids, dq, std::move(qname)); |
0beaa5c8 RG |
1632 | |
1633 | /* If we couldn't harvest the real dest addr, still | |
1634 | write down the listening addr since it will be useful | |
1635 | (especially if it's not an 'any' one). | |
1636 | We need to keep track of which one it is since we may | |
1637 | want to use the real but not the listening addr to reply. | |
1638 | */ | |
1639 | if (dest.sin4.sin_family != 0) { | |
1640 | ids->origDest = dest; | |
1641 | ids->destHarvested = true; | |
1642 | } | |
1643 | else { | |
1644 | ids->origDest = cs.local; | |
1645 | ids->destHarvested = false; | |
1646 | } | |
7129b5c4 | 1647 | |
0beaa5c8 | 1648 | dh->id = idOffset; |
ca404e94 | 1649 | |
38069e7e | 1650 | int fd = pickBackendSocketForSending(ss); |
150105a2 | 1651 | ssize_t ret = udpClientSendRequestToBackend(ss, fd, query, dq.len); |
11e1e08b | 1652 | |
0beaa5c8 | 1653 | if(ret < 0) { |
fbf14b03 | 1654 | ++ss->sendErrors; |
cb167afd | 1655 | ++g_stats.downstreamSendErrors; |
0beaa5c8 | 1656 | } |
ca404e94 | 1657 | |
348ef1c6 | 1658 | vinfolog("Got query for %s|%s from %s, relayed to %s", ids->qname.toLogString(), QType(ids->qtype).getName(), remote.toStringWithPort(), ss->getName()); |
0beaa5c8 RG |
1659 | } |
1660 | catch(const std::exception& e){ | |
1661 | vinfolog("Got an error in UDP question thread while parsing a query from %s, id %d: %s", remote.toStringWithPort(), queryId, e.what()); | |
1662 | } | |
1663 | } | |
1664 | ||
1665 | #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) | |
1666 | static void MultipleMessagesUDPClientThread(ClientState* cs, LocalHolders& holders) | |
1667 | { | |
1668 | struct MMReceiver | |
1669 | { | |
1670 | char packet[4096]; | |
0beaa5c8 RG |
1671 | ComboAddress remote; |
1672 | ComboAddress dest; | |
1673 | struct iovec iov; | |
392966bb OM |
1674 | /* used by HarvestDestinationAddress */ |
1675 | cmsgbuf_aligned cbuf; | |
0beaa5c8 RG |
1676 | }; |
1677 | const size_t vectSize = g_udpVectorSize; | |
1678 | /* the actual buffer is larger because: | |
1679 | - we may have to add EDNS and/or ECS | |
1680 | - we use it for self-generated responses (from rule or cache) | |
1681 | but we only accept incoming payloads up to that size | |
1682 | */ | |
1683 | static_assert(s_udpIncomingBufferSize <= sizeof(MMReceiver::packet), "the incoming buffer size should not be larger than sizeof(MMReceiver::packet)"); | |
1684 | ||
1685 | auto recvData = std::unique_ptr<MMReceiver[]>(new MMReceiver[vectSize]); | |
1686 | auto msgVec = std::unique_ptr<struct mmsghdr[]>(new struct mmsghdr[vectSize]); | |
1687 | auto outMsgVec = std::unique_ptr<struct mmsghdr[]>(new struct mmsghdr[vectSize]); | |
1688 | ||
1689 | /* initialize the structures needed to receive our messages */ | |
1690 | for (size_t idx = 0; idx < vectSize; idx++) { | |
1691 | recvData[idx].remote.sin4.sin_family = cs->local.sin4.sin_family; | |
7bec330a | 1692 | fillMSGHdr(&msgVec[idx].msg_hdr, &recvData[idx].iov, &recvData[idx].cbuf, sizeof(recvData[idx].cbuf), recvData[idx].packet, s_udpIncomingBufferSize, &recvData[idx].remote); |
0beaa5c8 RG |
1693 | } |
1694 | ||
1695 | /* go now */ | |
1696 | for(;;) { | |
1697 | ||
1698 | /* reset the IO vector, since it's also used to send the vector of responses | |
1699 | to avoid having to copy the data around */ | |
1700 | for (size_t idx = 0; idx < vectSize; idx++) { | |
1701 | recvData[idx].iov.iov_base = recvData[idx].packet; | |
1702 | recvData[idx].iov.iov_len = sizeof(recvData[idx].packet); | |
5f504638 | 1703 | } |
0beaa5c8 RG |
1704 | |
1705 | /* block until we have at least one message ready, but return | |
1706 | as many as possible to save the syscall costs */ | |
1707 | int msgsGot = recvmmsg(cs->udpFD, msgVec.get(), vectSize, MSG_WAITFORONE | MSG_TRUNC, nullptr); | |
1708 | ||
1709 | if (msgsGot <= 0) { | |
1710 | vinfolog("Getting UDP messages via recvmmsg() failed with: %s", strerror(errno)); | |
1711 | continue; | |
773470ca | 1712 | } |
0beaa5c8 RG |
1713 | |
1714 | unsigned int msgsToSend = 0; | |
1715 | ||
1716 | /* process the received messages */ | |
1717 | for (int msgIdx = 0; msgIdx < msgsGot; msgIdx++) { | |
1718 | const struct msghdr* msgh = &msgVec[msgIdx].msg_hdr; | |
1719 | unsigned int got = msgVec[msgIdx].msg_len; | |
1720 | const ComboAddress& remote = recvData[msgIdx].remote; | |
1721 | ||
33d01bbd | 1722 | if (static_cast<size_t>(got) < sizeof(struct dnsheader)) { |
3e425868 | 1723 | ++g_stats.nonCompliantQueries; |
0beaa5c8 RG |
1724 | continue; |
1725 | } | |
1726 | ||
7bec330a | 1727 | processUDPQuery(*cs, holders, msgh, remote, recvData[msgIdx].dest, recvData[msgIdx].packet, static_cast<uint16_t>(got), sizeof(recvData[msgIdx].packet), outMsgVec.get(), &msgsToSend, &recvData[msgIdx].iov, &recvData[msgIdx].cbuf); |
0beaa5c8 | 1728 | |
2b3eefc3 | 1729 | } |
0beaa5c8 RG |
1730 | |
1731 | /* immediate (not delayed or sent to a backend) responses (mostly from a rule, dynamic block | |
1732 | or the cache) can be sent in batch too */ | |
1733 | ||
1734 | if (msgsToSend > 0 && msgsToSend <= static_cast<unsigned int>(msgsGot)) { | |
1735 | int sent = sendmmsg(cs->udpFD, outMsgVec.get(), msgsToSend, 0); | |
1736 | ||
2b3eefc3 | 1737 | if (sent < 0 || static_cast<unsigned int>(sent) != msgsToSend) { |
0beaa5c8 | 1738 | vinfolog("Error sending responses with sendmmsg() (%d on %u): %s", sent, msgsToSend, strerror(errno)); |
2b3eefc3 | 1739 | } |
2b3eefc3 | 1740 | } |
0beaa5c8 | 1741 | |
24d5cb00 | 1742 | } |
0beaa5c8 RG |
1743 | } |
1744 | #endif /* defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) */ | |
1745 | ||
1746 | // listens to incoming queries, sends out to downstream servers, noting the intended return path | |
9b73b71c | 1747 | static void udpClientThread(ClientState* cs) |
0beaa5c8 RG |
1748 | try |
1749 | { | |
519f5484 | 1750 | setThreadName("dnsdist/udpClie"); |
0beaa5c8 RG |
1751 | LocalHolders holders; |
1752 | ||
1753 | #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) | |
1754 | if (g_udpVectorSize > 1) { | |
1755 | MultipleMessagesUDPClientThread(cs, holders); | |
1756 | ||
1757 | } | |
1758 | else | |
1759 | #endif /* defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) */ | |
1760 | { | |
1761 | char packet[4096]; | |
1762 | /* the actual buffer is larger because: | |
1763 | - we may have to add EDNS and/or ECS | |
1764 | - we use it for self-generated responses (from rule or cache) | |
1765 | but we only accept incoming payloads up to that size | |
1766 | */ | |
1767 | static_assert(s_udpIncomingBufferSize <= sizeof(packet), "the incoming buffer size should not be larger than sizeof(MMReceiver::packet)"); | |
1768 | struct msghdr msgh; | |
1769 | struct iovec iov; | |
1770 | /* used by HarvestDestinationAddress */ | |
7bec330a | 1771 | cmsgbuf_aligned cbuf; |
0beaa5c8 RG |
1772 | |
1773 | ComboAddress remote; | |
1774 | ComboAddress dest; | |
1775 | remote.sin4.sin_family = cs->local.sin4.sin_family; | |
7bec330a | 1776 | fillMSGHdr(&msgh, &iov, &cbuf, sizeof(cbuf), packet, sizeof(packet), &remote); |
0beaa5c8 RG |
1777 | |
1778 | for(;;) { | |
1779 | ssize_t got = recvmsg(cs->udpFD, &msgh, 0); | |
1780 | ||
1781 | if (got < 0 || static_cast<size_t>(got) < sizeof(struct dnsheader)) { | |
cb167afd | 1782 | ++g_stats.nonCompliantQueries; |
0beaa5c8 RG |
1783 | continue; |
1784 | } | |
1785 | ||
1786 | processUDPQuery(*cs, holders, &msgh, remote, dest, packet, static_cast<uint16_t>(got), s_udpIncomingBufferSize, nullptr, nullptr, nullptr, nullptr); | |
1787 | } | |
1788 | } | |
24d5cb00 | 1789 | } |
2b3eefc3 | 1790 | catch(const std::exception &e) |
a4652d55 | 1791 | { |
1792 | errlog("UDP client thread died because of exception: %s", e.what()); | |
a4652d55 | 1793 | } |
2b3eefc3 | 1794 | catch(const PDNSException &e) |
a4652d55 | 1795 | { |
1796 | errlog("UDP client thread died because of PowerDNS exception: %s", e.reason); | |
a4652d55 | 1797 | } |
1798 | catch(...) | |
1799 | { | |
1800 | errlog("UDP client thread died because of an exception: %s", "unknown"); | |
a4652d55 | 1801 | } |
24d5cb00 | 1802 | |
555970c9 RG |
1803 | uint16_t getRandomDNSID() |
1804 | { | |
1805 | #ifdef HAVE_LIBSODIUM | |
1806 | return (randombytes_random() % 65536); | |
1807 | #else | |
1808 | return (random() % 65536); | |
1809 | #endif | |
1810 | } | |
1811 | ||
4ab01344 | 1812 | static bool upCheck(const shared_ptr<DownstreamState>& ds) |
773470ca | 1813 | try |
1814 | { | |
4ab01344 RG |
1815 | DNSName checkName = ds->checkName; |
1816 | uint16_t checkType = ds->checkType.getCode(); | |
1817 | uint16_t checkClass = ds->checkClass; | |
98650fde RG |
1818 | dnsheader checkHeader; |
1819 | memset(&checkHeader, 0, sizeof(checkHeader)); | |
1820 | ||
1821 | checkHeader.qdcount = htons(1); | |
555970c9 | 1822 | checkHeader.id = getRandomDNSID(); |
98650fde RG |
1823 | |
1824 | checkHeader.rd = true; | |
4ab01344 | 1825 | if (ds->setCD) { |
98650fde RG |
1826 | checkHeader.cd = true; |
1827 | } | |
1828 | ||
4ab01344 | 1829 | if (ds->checkFunction) { |
98650fde | 1830 | std::lock_guard<std::mutex> lock(g_luamutex); |
4ab01344 | 1831 | auto ret = ds->checkFunction(checkName, checkType, checkClass, &checkHeader); |
98650fde RG |
1832 | checkName = std::get<0>(ret); |
1833 | checkType = std::get<1>(ret); | |
1834 | checkClass = std::get<2>(ret); | |
21830638 | 1835 | } |
773470ca | 1836 | |
98650fde RG |
1837 | vector<uint8_t> packet; |
1838 | DNSPacketWriter dpw(packet, checkName, checkType, checkClass); | |
1839 | dnsheader * requestHeader = dpw.getHeader(); | |
1840 | *requestHeader = checkHeader; | |
1841 | ||
4ab01344 | 1842 | Socket sock(ds->remote.sin4.sin_family, SOCK_DGRAM); |
773470ca | 1843 | sock.setNonBlocking(); |
4ab01344 | 1844 | if (!IsAnyAddress(ds->sourceAddr)) { |
fbe2a2e0 | 1845 | sock.setReuseAddr(); |
4ab01344 | 1846 | sock.bind(ds->sourceAddr); |
fbe2a2e0 | 1847 | } |
4ab01344 | 1848 | sock.connect(ds->remote); |
3e425868 | 1849 | ssize_t sent = udpClientSendRequestToBackend(ds, sock.getHandle(), reinterpret_cast<char*>(&packet[0]), packet.size(), true); |
9e87dcb8 RG |
1850 | if (sent < 0) { |
1851 | int ret = errno; | |
1852 | if (g_verboseHealthChecks) | |
4ab01344 | 1853 | infolog("Error while sending a health check query to backend %s: %d", ds->getNameWithAddr(), ret); |
fbe2a2e0 | 1854 | return false; |
9e87dcb8 | 1855 | } |
fbe2a2e0 | 1856 | |
4ab01344 | 1857 | int ret = waitForRWData(sock.getHandle(), true, /* ms to seconds */ ds->checkTimeout / 1000, /* remaining ms to us */ (ds->checkTimeout % 1000) * 1000); |
9e87dcb8 RG |
1858 | if(ret < 0 || !ret) { // error, timeout, both are down! |
1859 | if (ret < 0) { | |
1860 | ret = errno; | |
1861 | if (g_verboseHealthChecks) | |
4ab01344 | 1862 | infolog("Error while waiting for the health check response from backend %s: %d", ds->getNameWithAddr(), ret); |
9e87dcb8 RG |
1863 | } |
1864 | else { | |
1865 | if (g_verboseHealthChecks) | |
4ab01344 | 1866 | infolog("Timeout while waiting for the health check response from backend %s", ds->getNameWithAddr()); |
9e87dcb8 | 1867 | } |
773470ca | 1868 | return false; |
9e87dcb8 RG |
1869 | } |
1870 | ||
773470ca | 1871 | string reply; |
a2353842 RG |
1872 | ComboAddress from; |
1873 | sock.recvFrom(reply, from); | |
1874 | ||
1875 | /* we are using a connected socket but hey.. */ | |
4ab01344 | 1876 | if (from != ds->remote) { |
a2353842 | 1877 | if (g_verboseHealthChecks) |
4ab01344 | 1878 | infolog("Invalid health check response received from %s, expecting one from %s", from.toStringWithPort(), ds->remote.toStringWithPort()); |
a2353842 RG |
1879 | return false; |
1880 | } | |
773470ca | 1881 | |
98650fde | 1882 | const dnsheader * responseHeader = reinterpret_cast<const dnsheader *>(reply.c_str()); |
fc01e0b1 | 1883 | |
9e87dcb8 RG |
1884 | if (reply.size() < sizeof(*responseHeader)) { |
1885 | if (g_verboseHealthChecks) | |
4ab01344 | 1886 | infolog("Invalid health check response of size %d from backend %s, expecting at least %d", reply.size(), ds->getNameWithAddr(), sizeof(*responseHeader)); |
fc01e0b1 | 1887 | return false; |
9e87dcb8 | 1888 | } |
fc01e0b1 | 1889 | |
9e87dcb8 RG |
1890 | if (responseHeader->id != requestHeader->id) { |
1891 | if (g_verboseHealthChecks) | |
4ab01344 | 1892 | infolog("Invalid health check response id %d from backend %s, expecting %d", responseHeader->id, ds->getNameWithAddr(), requestHeader->id); |
fc01e0b1 | 1893 | return false; |
9e87dcb8 RG |
1894 | } |
1895 | ||
1896 | if (!responseHeader->qr) { | |
1897 | if (g_verboseHealthChecks) | |
4ab01344 | 1898 | infolog("Invalid health check response from backend %s, expecting QR to be set", ds->getNameWithAddr()); |
fc01e0b1 | 1899 | return false; |
9e87dcb8 RG |
1900 | } |
1901 | ||
1902 | if (responseHeader->rcode == RCode::ServFail) { | |
1903 | if (g_verboseHealthChecks) | |
4ab01344 | 1904 | infolog("Backend %s responded to health check with ServFail", ds->getNameWithAddr()); |
fc01e0b1 | 1905 | return false; |
9e87dcb8 RG |
1906 | } |
1907 | ||
4ab01344 | 1908 | if (ds->mustResolve && (responseHeader->rcode == RCode::NXDomain || responseHeader->rcode == RCode::Refused)) { |
9e87dcb8 | 1909 | if (g_verboseHealthChecks) |
4ab01344 | 1910 | infolog("Backend %s responded to health check with %s while mustResolve is set", ds->getNameWithAddr(), responseHeader->rcode == RCode::NXDomain ? "NXDomain" : "Refused"); |
a6e02424 | 1911 | return false; |
9e87dcb8 | 1912 | } |
fc01e0b1 | 1913 | |
98650fde RG |
1914 | uint16_t receivedType; |
1915 | uint16_t receivedClass; | |
1916 | DNSName receivedName(reply.c_str(), reply.size(), sizeof(dnsheader), false, &receivedType, &receivedClass); | |
1917 | ||
1918 | if (receivedName != checkName || receivedType != checkType || receivedClass != checkClass) { | |
1919 | if (g_verboseHealthChecks) | |
4ab01344 | 1920 | infolog("Backend %s responded to health check with an invalid qname (%s vs %s), qtype (%s vs %s) or qclass (%d vs %d)", ds->getNameWithAddr(), receivedName.toLogString(), checkName.toLogString(), QType(receivedType).getName(), QType(checkType).getName(), receivedClass, checkClass); |
98650fde RG |
1921 | return false; |
1922 | } | |
1923 | ||
773470ca | 1924 | return true; |
1925 | } | |
9e87dcb8 RG |
1926 | catch(const std::exception& e) |
1927 | { | |
1928 | if (g_verboseHealthChecks) | |
4ab01344 | 1929 | infolog("Error checking the health of backend %s: %s", ds->getNameWithAddr(), e.what()); |
9e87dcb8 RG |
1930 | return false; |
1931 | } | |
773470ca | 1932 | catch(...) |
1933 | { | |
9e87dcb8 | 1934 | if (g_verboseHealthChecks) |
4ab01344 | 1935 | infolog("Unknown exception while checking the health of backend %s", ds->getNameWithAddr()); |
773470ca | 1936 | return false; |
1937 | } | |
726ddf60 | 1938 | |
6c1ca990 | 1939 | uint64_t g_maxTCPClientThreads{10}; |
886e2cf2 | 1940 | std::atomic<uint16_t> g_cacheCleaningDelay{60}; |
f65ea0c2 | 1941 | std::atomic<uint16_t> g_cacheCleaningPercentage{100}; |
e41f8165 | 1942 | |
9b73b71c | 1943 | void maintThread() |
886e2cf2 | 1944 | { |
519f5484 | 1945 | setThreadName("dnsdist/main"); |
886e2cf2 RG |
1946 | int interval = 1; |
1947 | size_t counter = 0; | |
5f3ea719 | 1948 | int32_t secondsToWaitLog = 0; |
886e2cf2 RG |
1949 | |
1950 | for(;;) { | |
1951 | sleep(interval); | |
1952 | ||
069e59db | 1953 | { |
1954 | std::lock_guard<std::mutex> lock(g_luamutex); | |
5f3ea719 PL |
1955 | auto f = g_lua.readVariable<boost::optional<std::function<void()> > >("maintenance"); |
1956 | if(f) { | |
1957 | try { | |
1958 | (*f)(); | |
1959 | secondsToWaitLog = 0; | |
1960 | } | |
1961 | catch(std::exception &e) { | |
1962 | if (secondsToWaitLog <= 0) { | |
1963 | infolog("Error during execution of maintenance function: %s", e.what()); | |
1964 | secondsToWaitLog = 61; | |
1965 | } | |
1966 | secondsToWaitLog -= interval; | |
1967 | } | |
1968 | } | |
069e59db | 1969 | } |
886e2cf2 RG |
1970 | |
1971 | counter++; | |
1972 | if (counter >= g_cacheCleaningDelay) { | |
c1b81381 RG |
1973 | /* keep track, for each cache, of whether we should keep |
1974 | expired entries */ | |
1975 | std::map<std::shared_ptr<DNSDistPacketCache>, bool> caches; | |
1976 | ||
1977 | /* gather all caches actually used by at least one pool, and see | |
1978 | if something prevents us from cleaning the expired entries */ | |
a9c2e4ab | 1979 | auto localPools = g_pools.getLocal(); |
a9c2e4ab | 1980 | for (const auto& entry : *localPools) { |
c1b81381 RG |
1981 | auto& pool = entry.second; |
1982 | ||
1983 | auto packetCache = pool->packetCache; | |
1984 | if (!packetCache) { | |
1985 | continue; | |
1986 | } | |
1987 | ||
1988 | auto pair = caches.insert({packetCache, false}); | |
1989 | auto& iter = pair.first; | |
1990 | /* if we need to keep stale data for this cache (ie, not clear | |
1991 | expired entries when at least one pool using this cache | |
1992 | has all its backends down) */ | |
1993 | if (packetCache->keepStaleData() && iter->second == false) { | |
1994 | /* so far all pools had at least one backend up */ | |
1995 | if (pool->countServers(true) == 0) { | |
1996 | iter->second = true; | |
1997 | } | |
886e2cf2 RG |
1998 | } |
1999 | } | |
c1b81381 RG |
2000 | |
2001 | for (auto pair : caches) { | |
2002 | /* shall we keep expired entries ? */ | |
2003 | if (pair.second == true) { | |
2004 | continue; | |
886e2cf2 | 2005 | } |
c1b81381 RG |
2006 | auto& packetCache = pair.first; |
2007 | size_t upTo = (packetCache->getMaxEntries()* (100 - g_cacheCleaningPercentage)) / 100; | |
2008 | packetCache->purgeExpired(upTo); | |
886e2cf2 RG |
2009 | } |
2010 | counter = 0; | |
2011 | } | |
2012 | ||
2013 | // ponder pruning g_dynblocks of expired entries here | |
2014 | } | |
886e2cf2 RG |
2015 | } |
2016 | ||
9b73b71c | 2017 | static void secPollThread() |
5d4e1ef8 RG |
2018 | { |
2019 | setThreadName("dnsdist/secpoll"); | |
2020 | ||
2021 | for (;;) { | |
2022 | try { | |
2023 | doSecPoll(g_secPollSuffix); | |
2024 | } | |
2025 | catch(...) { | |
2026 | } | |
2027 | sleep(g_secPollInterval); | |
2028 | } | |
2029 | } | |
2030 | ||
9b73b71c | 2031 | static void healthChecksThread() |
3ae86514 | 2032 | { |
519f5484 | 2033 | setThreadName("dnsdist/healthC"); |
77c9bc9a | 2034 | |
fcadd56e | 2035 | int interval = 1; |
64e4ebb4 | 2036 | |
3ae86514 | 2037 | for(;;) { |
2038 | sleep(interval); | |
773470ca | 2039 | |
ded1985a | 2040 | if(g_tcpclientthreads->getQueuedCount() > 1 && !g_tcpclientthreads->hasReachedMaxThreads()) |
a9bf3ec4 | 2041 | g_tcpclientthreads->addTCPClientThread(); |
3ae86514 | 2042 | |
a9c2e4ab RG |
2043 | auto states = g_dstates.getLocal(); // this points to the actual shared_ptrs! |
2044 | for(auto& dss : *states) { | |
7c9bf18d | 2045 | if(++dss->lastCheck < dss->checkInterval) |
2046 | continue; | |
2047 | dss->lastCheck = 0; | |
773470ca | 2048 | if(dss->availability==DownstreamState::Availability::Auto) { |
4ab01344 | 2049 | bool newState=upCheck(dss); |
2993d58d | 2050 | if (newState) { |
1b633bec RG |
2051 | /* check succeeded */ |
2052 | dss->currentCheckFailures = 0; | |
2053 | ||
2054 | if (!dss->upStatus) { | |
2055 | /* we were marked as down */ | |
853faf61 JS |
2056 | dss->consecutiveSuccessfulChecks++; |
2057 | if (dss->consecutiveSuccessfulChecks < dss->minRiseSuccesses) { | |
1b633bec RG |
2058 | /* if we need more than one successful check to rise |
2059 | and we didn't reach the threshold yet, | |
2060 | let's stay down */ | |
2061 | newState = false; | |
2062 | } | |
2993d58d | 2063 | } |
2064 | } | |
1b633bec RG |
2065 | else { |
2066 | /* check failed */ | |
853faf61 | 2067 | dss->consecutiveSuccessfulChecks = 0; |
1b633bec RG |
2068 | |
2069 | if (dss->upStatus) { | |
2070 | /* we are currently up */ | |
2071 | dss->currentCheckFailures++; | |
2072 | if (dss->currentCheckFailures < dss->maxCheckFailures) { | |
2073 | /* we need more than one failure to be marked as down, | |
2074 | and we did not reach the threshold yet, let's stay down */ | |
2075 | newState = true; | |
2076 | } | |
2993d58d | 2077 | } |
2078 | } | |
2079 | ||
2080 | if(newState != dss->upStatus) { | |
2081 | warnlog("Marking downstream %s as '%s'", dss->getNameWithAddr(), newState ? "up" : "down"); | |
7565f4e6 RG |
2082 | |
2083 | if (newState && !dss->connected) { | |
5d7e6765 RG |
2084 | newState = dss->reconnect(); |
2085 | ||
2086 | if (dss->connected && !dss->threadStarted.test_and_set()) { | |
150105a2 | 2087 | dss->tid = thread(responderThread, dss); |
7565f4e6 RG |
2088 | } |
2089 | } | |
2090 | ||
2993d58d | 2091 | dss->upStatus = newState; |
2092 | dss->currentCheckFailures = 0; | |
853faf61 | 2093 | dss->consecutiveSuccessfulChecks = 0; |
9f4eb5cc RG |
2094 | if (g_snmpAgent && g_snmpTrapsEnabled) { |
2095 | g_snmpAgent->sendBackendStatusChangeTrap(dss); | |
2096 | } | |
2993d58d | 2097 | } |
773470ca | 2098 | } |
b076b34a | 2099 | |
773470ca | 2100 | auto delta = dss->sw.udiffAndSet()/1000000.0; |
2101 | dss->queryLoad = 1.0*(dss->queries.load() - dss->prev.queries.load())/delta; | |
2102 | dss->dropRate = 1.0*(dss->reuseds.load() - dss->prev.reuseds.load())/delta; | |
3c115e0f | 2103 | dss->prev.queries.store(dss->queries.load()); |
773470ca | 2104 | dss->prev.reuseds.store(dss->reuseds.load()); |
3c115e0f | 2105 | |
773470ca | 2106 | for(IDState& ids : dss->idStates) { // timeouts |
a9489723 | 2107 | int64_t usageIndicator = ids.usageIndicator; |
311f19d5 RG |
2108 | if(IDState::isInUse(usageIndicator) && ids.age++ > g_udpTimeout) { |
2109 | /* We mark the state as unused as soon as possible | |
51642fe3 RG |
2110 | to limit the risk of racing with the |
2111 | responder thread. | |
51642fe3 | 2112 | */ |
0956c5c5 RG |
2113 | auto oldDU = ids.du; |
2114 | ||
311f19d5 | 2115 | if (!ids.tryMarkUnused(usageIndicator)) { |
71b86bd8 RG |
2116 | /* this state has been altered in the meantime, |
2117 | don't go anywhere near it */ | |
2118 | continue; | |
2119 | } | |
fbf14b03 | 2120 | ids.du = nullptr; |
9bd1a882 | 2121 | handleDOHTimeout(oldDU); |
64e4ebb4 | 2122 | ids.age = 0; |
3c115e0f | 2123 | dss->reuseds++; |
2124 | --dss->outstanding; | |
cb167afd | 2125 | ++g_stats.downstreamTimeouts; // this is an 'actively' discovered timeout |
c08a5092 | 2126 | vinfolog("Had a downstream timeout from %s (%s) for query for %s|%s from %s", |
2127 | dss->remote.toStringWithPort(), dss->name, | |
348ef1c6 | 2128 | ids.qname.toLogString(), QType(ids.qtype).getName(), ids.origRemote.toStringWithPort()); |
51642fe3 | 2129 | |
c2fbeb27 | 2130 | struct timespec ts; |
85c7ca75 | 2131 | gettime(&ts); |
c2fbeb27 | 2132 | |
2d11d1b2 | 2133 | struct dnsheader fake; |
2134 | memset(&fake, 0, sizeof(fake)); | |
2135 | fake.id = ids.origID; | |
c2fbeb27 | 2136 | |
6d31c8b6 | 2137 | g_rings.insertResponse(ts, ids.origRemote, ids.qname, ids.qtype, std::numeric_limits<unsigned int>::max(), 0, fake, dss->remote); |
232f0877 | 2138 | } |
3ae86514 | 2139 | } |
2140 | } | |
3ae86514 | 2141 | } |
3ae86514 | 2142 | } |
2143 | ||
c2a42f97 | 2144 | static void bindAny(int af, int sock) |
2145 | { | |
18f8e493 | 2146 | __attribute__((unused)) int one = 1; |
c2a42f97 | 2147 | |
2148 | #ifdef IP_FREEBIND | |
2149 | if (setsockopt(sock, IPPROTO_IP, IP_FREEBIND, &one, sizeof(one)) < 0) | |
2150 | warnlog("Warning: IP_FREEBIND setsockopt failed: %s", strerror(errno)); | |
2151 | #endif | |
2152 | ||
2153 | #ifdef IP_BINDANY | |
2154 | if (af == AF_INET) | |
2155 | if (setsockopt(sock, IPPROTO_IP, IP_BINDANY, &one, sizeof(one)) < 0) | |
2156 | warnlog("Warning: IP_BINDANY setsockopt failed: %s", strerror(errno)); | |
2157 | #endif | |
2158 | #ifdef IPV6_BINDANY | |
2159 | if (af == AF_INET6) | |
2160 | if (setsockopt(sock, IPPROTO_IPV6, IPV6_BINDANY, &one, sizeof(one)) < 0) | |
2161 | warnlog("Warning: IPV6_BINDANY setsockopt failed: %s", strerror(errno)); | |
2162 | #endif | |
2163 | #ifdef SO_BINDANY | |
2164 | if (setsockopt(sock, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) < 0) | |
2165 | warnlog("Warning: SO_BINDANY setsockopt failed: %s", strerror(errno)); | |
2166 | #endif | |
2167 | } | |
2168 | ||
a36ce055 RG |
2169 | static void dropGroupPrivs(gid_t gid) |
2170 | { | |
2171 | if (gid) { | |
2172 | if (setgid(gid) == 0) { | |
2173 | if (setgroups(0, NULL) < 0) { | |
2174 | warnlog("Warning: Unable to drop supplementary gids: %s", strerror(errno)); | |
2175 | } | |
2176 | } | |
2177 | else { | |
2178 | warnlog("Warning: Unable to set group ID to %d: %s", gid, strerror(errno)); | |
2179 | } | |
2180 | } | |
2181 | } | |
2182 | ||
2183 | static void dropUserPrivs(uid_t uid) | |
2184 | { | |
2185 | if(uid) { | |
2186 | if(setuid(uid) < 0) { | |
2187 | warnlog("Warning: Unable to set user ID to %d: %s", uid, strerror(errno)); | |
2188 | } | |
2189 | } | |
2190 | } | |
2191 | ||
41408d3a RG |
2192 | static void checkFileDescriptorsLimits(size_t udpBindsCount, size_t tcpBindsCount) |
2193 | { | |
2194 | /* stdin, stdout, stderr */ | |
2195 | size_t requiredFDsCount = 3; | |
a9c2e4ab | 2196 | auto backends = g_dstates.getLocal(); |
cd73ceeb RG |
2197 | /* UDP sockets to backends */ |
2198 | size_t backendUDPSocketsCount = 0; | |
a9c2e4ab | 2199 | for (const auto& backend : *backends) { |
5bdbb83d | 2200 | backendUDPSocketsCount += backend->sockets.size(); |
cd73ceeb RG |
2201 | } |
2202 | requiredFDsCount += backendUDPSocketsCount; | |
2203 | /* TCP sockets to backends */ | |
a9c2e4ab | 2204 | requiredFDsCount += (backends->size() * g_maxTCPClientThreads); |
9fcd6adb | 2205 | /* listening sockets */ |
41408d3a RG |
2206 | requiredFDsCount += udpBindsCount; |
2207 | requiredFDsCount += tcpBindsCount; | |
2208 | /* max TCP connections currently served */ | |
2209 | requiredFDsCount += g_maxTCPClientThreads; | |
f2e29d04 | 2210 | /* max pipes for communicating between TCP acceptors and client threads */ |
41408d3a | 2211 | requiredFDsCount += (g_maxTCPClientThreads * 2); |
41408d3a | 2212 | /* max TCP queued connections */ |
9fcd6adb | 2213 | requiredFDsCount += g_maxTCPQueuedConnections; |
41408d3a RG |
2214 | /* DelayPipe pipe */ |
2215 | requiredFDsCount += 2; | |
2216 | /* syslog socket */ | |
2217 | requiredFDsCount++; | |
2218 | /* webserver main socket */ | |
2219 | requiredFDsCount++; | |
2220 | /* console main socket */ | |
2221 | requiredFDsCount++; | |
2222 | /* carbon export */ | |
2223 | requiredFDsCount++; | |
2224 | /* history file */ | |
2225 | requiredFDsCount++; | |
2226 | struct rlimit rl; | |
2227 | getrlimit(RLIMIT_NOFILE, &rl); | |
9fcd6adb | 2228 | if (rl.rlim_cur <= requiredFDsCount) { |
41408d3a RG |
2229 | warnlog("Warning, this configuration can use more than %d file descriptors, web server and console connections not included, and the current limit is %d.", std::to_string(requiredFDsCount), std::to_string(rl.rlim_cur)); |
2230 | #ifdef HAVE_SYSTEMD | |
2231 | warnlog("You can increase this value by using LimitNOFILE= in the systemd unit file or ulimit."); | |
2232 | #else | |
2233 | warnlog("You can increase this value by using ulimit."); | |
2234 | #endif | |
2235 | } | |
2236 | } | |
c2a42f97 | 2237 | |
6e9fd124 RG |
2238 | static void setUpLocalBind(std::unique_ptr<ClientState>& cs) |
2239 | { | |
2240 | /* skip some warnings if there is an identical UDP context */ | |
fbf14b03 | 2241 | bool warn = cs->tcp == false || cs->tlsFrontend != nullptr || cs->dohFrontend != nullptr; |
6e9fd124 RG |
2242 | int& fd = cs->tcp == false ? cs->udpFD : cs->tcpFD; |
2243 | (void) warn; | |
2244 | ||
2245 | fd = SSocket(cs->local.sin4.sin_family, cs->tcp == false ? SOCK_DGRAM : SOCK_STREAM, 0); | |
2246 | ||
2247 | if (cs->tcp) { | |
2248 | SSetsockopt(fd, SOL_SOCKET, SO_REUSEADDR, 1); | |
2249 | #ifdef TCP_DEFER_ACCEPT | |
2250 | SSetsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, 1); | |
2251 | #endif | |
2252 | if (cs->fastOpenQueueSize > 0) { | |
2253 | #ifdef TCP_FASTOPEN | |
2254 | SSetsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, cs->fastOpenQueueSize); | |
2255 | #else | |
2256 | if (warn) { | |
2257 | warnlog("TCP Fast Open has been configured on local address '%s' but is not supported", cs->local.toStringWithPort()); | |
2258 | } | |
2259 | #endif | |
2260 | } | |
2261 | } | |
2262 | ||
2263 | if(cs->local.sin4.sin_family == AF_INET6) { | |
2264 | SSetsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, 1); | |
2265 | } | |
2266 | ||
2267 | bindAny(cs->local.sin4.sin_family, fd); | |
2268 | ||
2269 | if(!cs->tcp && IsAnyAddress(cs->local)) { | |
2270 | int one=1; | |
2271 | setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one)); // linux supports this, so why not - might fail on other systems | |
2272 | #ifdef IPV6_RECVPKTINFO | |
2273 | setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)); | |
2274 | #endif | |
2275 | } | |
2276 | ||
2277 | if (cs->reuseport) { | |
2278 | #ifdef SO_REUSEPORT | |
2279 | SSetsockopt(fd, SOL_SOCKET, SO_REUSEPORT, 1); | |
2280 | #else | |
2281 | if (warn) { | |
2282 | /* no need to warn again if configured but support is not available, we already did for UDP */ | |
2283 | warnlog("SO_REUSEPORT has been configured on local address '%s' but is not supported", cs->local.toStringWithPort()); | |
2284 | } | |
2285 | #endif | |
2286 | } | |
2287 | ||
90f9fbc0 RG |
2288 | if (!cs->tcp) { |
2289 | if (cs->local.isIPv4()) { | |
2290 | try { | |
2291 | setSocketIgnorePMTU(cs->udpFD); | |
2292 | } | |
2293 | catch(const std::exception& e) { | |
2294 | warnlog("Failed to set IP_MTU_DISCOVER on UDP server socket for local address '%s': %s", cs->local.toStringWithPort(), e.what()); | |
2295 | } | |
2296 | } | |
2297 | } | |
2298 | ||
6e9fd124 RG |
2299 | const std::string& itf = cs->interface; |
2300 | if (!itf.empty()) { | |
2301 | #ifdef SO_BINDTODEVICE | |
2302 | int res = setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, itf.c_str(), itf.length()); | |
2303 | if (res != 0) { | |
2304 | warnlog("Error setting up the interface on local address '%s': %s", cs->local.toStringWithPort(), strerror(errno)); | |
2305 | } | |
2306 | #else | |
2307 | if (warn) { | |
2308 | warnlog("An interface has been configured on local address '%s' but SO_BINDTODEVICE is not supported", cs->local.toStringWithPort()); | |
2309 | } | |
2310 | #endif | |
2311 | } | |
2312 | ||
2313 | #ifdef HAVE_EBPF | |
2314 | if (g_defaultBPFFilter) { | |
2315 | cs->attachFilter(g_defaultBPFFilter); | |
2316 | vinfolog("Attaching default BPF Filter to %s frontend %s", (!cs->tcp ? "UDP" : "TCP"), cs->local.toStringWithPort()); | |
2317 | } | |
2318 | #endif /* HAVE_EBPF */ | |
2319 | ||
2320 | if (cs->tlsFrontend != nullptr) { | |
2321 | if (!cs->tlsFrontend->setupTLS()) { | |
2322 | errlog("Error while setting up TLS on local address '%s', exiting", cs->local.toStringWithPort()); | |
2323 | _exit(EXIT_FAILURE); | |
2324 | } | |
2325 | } | |
2326 | ||
fbf14b03 RG |
2327 | if (cs->dohFrontend != nullptr) { |
2328 | cs->dohFrontend->setup(); | |
2329 | } | |
2330 | ||
6e9fd124 RG |
2331 | SBind(fd, cs->local); |
2332 | ||
2333 | if (cs->tcp) { | |
fbf14b03 | 2334 | SListen(cs->tcpFD, SOMAXCONN); |
6e9fd124 RG |
2335 | if (cs->tlsFrontend != nullptr) { |
2336 | warnlog("Listening on %s for TLS", cs->local.toStringWithPort()); | |
2337 | } | |
fbf14b03 RG |
2338 | else if (cs->dohFrontend != nullptr) { |
2339 | warnlog("Listening on %s for DoH", cs->local.toStringWithPort()); | |
2340 | } | |
6e9fd124 RG |
2341 | else if (cs->dnscryptCtx != nullptr) { |
2342 | warnlog("Listening on %s for DNSCrypt", cs->local.toStringWithPort()); | |
2343 | } | |
2344 | else { | |
2345 | warnlog("Listening on %s", cs->local.toStringWithPort()); | |
2346 | } | |
2347 | } | |
2348 | ||
2349 | cs->ready = true; | |
2350 | } | |
2351 | ||
7cc68f53 | 2352 | struct |
2353 | { | |
2354 | vector<string> locals; | |
2355 | vector<string> remotes; | |
5efcfa63 | 2356 | bool checkConfig{false}; |
7cc68f53 | 2357 | bool beClient{false}; |
505ca3d1 | 2358 | bool beSupervised{false}; |
7cc68f53 | 2359 | string command; |
2360 | string config; | |
a36ce055 RG |
2361 | string uid; |
2362 | string gid; | |
7cc68f53 | 2363 | } g_cmdLine; |
520eb5a0 | 2364 | |
e41f8165 | 2365 | std::atomic<bool> g_configurationDone{false}; |
520eb5a0 | 2366 | |
7d3ee2bb PL |
2367 | static void usage() |
2368 | { | |
2369 | cout<<endl; | |
b82a127e RG |
2370 | cout<<"Syntax: dnsdist [-C,--config file] [-c,--client [IP[:PORT]]]\n"; |
2371 | cout<<"[-e,--execute cmd] [-h,--help] [-l,--local addr]\n"; | |
4406b79b | 2372 | cout<<"[-v,--verbose] [--check-config] [--version]\n"; |
7d3ee2bb PL |
2373 | cout<<"\n"; |
2374 | cout<<"-a,--acl netmask Add this netmask to the ACL\n"; | |
2375 | cout<<"-C,--config file Load configuration from 'file'\n"; | |
2376 | cout<<"-c,--client Operate as a client, connect to dnsdist. This reads\n"; | |
2377 | cout<<" controlSocket from your configuration file, but also\n"; | |
2378 | cout<<" accepts an IP:PORT argument\n"; | |
2379 | #ifdef HAVE_LIBSODIUM | |
2380 | cout<<"-k,--setkey KEY Use KEY for encrypted communication to dnsdist. This\n"; | |
2381 | cout<<" is similar to setting setKey in the configuration file.\n"; | |
17a0ddad CH |
2382 | cout<<" NOTE: this will leak this key in your shell's history\n"; |
2383 | cout<<" and in the systems running process list.\n"; | |
7d3ee2bb PL |
2384 | #endif |
2385 | cout<<"--check-config Validate the configuration file and exit. The exit-code\n"; | |
2386 | cout<<" reflects the validation, 0 is OK, 1 means an error.\n"; | |
2387 | cout<<" Any errors are printed as well.\n"; | |
7d3ee2bb PL |
2388 | cout<<"-e,--execute cmd Connect to dnsdist and execute 'cmd'\n"; |
2389 | cout<<"-g,--gid gid Change the process group ID after binding sockets\n"; | |
2390 | cout<<"-h,--help Display this helpful message\n"; | |
2391 | cout<<"-l,--local address Listen on this local address\n"; | |
2392 | cout<<"--supervised Don't open a console, I'm supervised\n"; | |
2393 | cout<<" (use with e.g. systemd and daemontools)\n"; | |
2394 | cout<<"--disable-syslog Don't log to syslog, only to stdout\n"; | |
2395 | cout<<" (use with e.g. systemd)\n"; | |
7d3ee2bb PL |
2396 | cout<<"-u,--uid uid Change the process user ID after binding sockets\n"; |
2397 | cout<<"-v,--verbose Enable verbose mode\n"; | |
4406b79b | 2398 | cout<<"-V,--version Show dnsdist version information and exit\n"; |
7d3ee2bb PL |
2399 | } |
2400 | ||
24d5cb00 | 2401 | int main(int argc, char** argv) |
2402 | try | |
2403 | { | |
41408d3a RG |
2404 | size_t udpBindsCount = 0; |
2405 | size_t tcpBindsCount = 0; | |
94721140 | 2406 | rl_attempted_completion_function = my_completion; |
2407 | rl_completion_append_character = 0; | |
2408 | ||
726ddf60 | 2409 | signal(SIGPIPE, SIG_IGN); |
6d01c80c | 2410 | signal(SIGCHLD, SIG_IGN); |
0ca6a67f | 2411 | openlog("dnsdist", LOG_PID|LOG_NDELAY, LOG_DAEMON); |
6d01c80c | 2412 | |
0b62ec78 | 2413 | #ifdef HAVE_LIBSODIUM |
6d01c80c | 2414 | if (sodium_init() == -1) { |
2415 | cerr<<"Unable to initialize crypto library"<<endl; | |
2416 | exit(EXIT_FAILURE); | |
2417 | } | |
7691e7df | 2418 | g_hashperturb=randombytes_uniform(0xffffffff); |
2419 | srandom(randombytes_uniform(0xffffffff)); | |
2420 | #else | |
2421 | { | |
2422 | struct timeval tv; | |
2423 | gettimeofday(&tv, 0); | |
2424 | srandom(tv.tv_sec ^ tv.tv_usec ^ getpid()); | |
2425 | g_hashperturb=random(); | |
2426 | } | |
2427 | ||
0b62ec78 | 2428 | #endif |
094b6aff | 2429 | ComboAddress clientAddress = ComboAddress(); |
11058bd6 | 2430 | g_cmdLine.config=SYSCONFDIR "/dnsdist.conf"; |
359bdba5 | 2431 | struct option longopts[]={ |
8f2d5ec3 | 2432 | {"acl", required_argument, 0, 'a'}, |
359bdba5 CH |
2433 | {"check-config", no_argument, 0, 1}, |
2434 | {"client", no_argument, 0, 'c'}, | |
7cc68f53 | 2435 | {"config", required_argument, 0, 'C'}, |
359bdba5 | 2436 | {"disable-syslog", no_argument, 0, 2}, |
7cc68f53 | 2437 | {"execute", required_argument, 0, 'e'}, |
359bdba5 CH |
2438 | {"gid", required_argument, 0, 'g'}, |
2439 | {"help", no_argument, 0, 'h'}, | |
2440 | {"local", required_argument, 0, 'l'}, | |
359bdba5 | 2441 | {"setkey", required_argument, 0, 'k'}, |
359bdba5 CH |
2442 | {"supervised", no_argument, 0, 3}, |
2443 | {"uid", required_argument, 0, 'u'}, | |
2444 | {"verbose", no_argument, 0, 'v'}, | |
2445 | {"version", no_argument, 0, 'V'}, | |
2446 | {0,0,0,0} | |
7cc68f53 | 2447 | }; |
2448 | int longindex=0; | |
8f2d5ec3 | 2449 | string optstring; |
7cc68f53 | 2450 | for(;;) { |
359bdba5 | 2451 | int c=getopt_long(argc, argv, "a:cC:e:g:hk:l:u:vV", longopts, &longindex); |
7cc68f53 | 2452 | if(c==-1) |
2453 | break; | |
2454 | switch(c) { | |
5efcfa63 PL |
2455 | case 1: |
2456 | g_cmdLine.checkConfig=true; | |
2457 | break; | |
bbfaaa6f PL |
2458 | case 2: |
2459 | g_syslog=false; | |
2460 | break; | |
b7165327 CH |
2461 | case 3: |
2462 | g_cmdLine.beSupervised=true; | |
2463 | break; | |
7cc68f53 | 2464 | case 'C': |
2465 | g_cmdLine.config=optarg; | |
2466 | break; | |
2467 | case 'c': | |
2468 | g_cmdLine.beClient=true; | |
2469 | break; | |
7cc68f53 | 2470 | case 'e': |
2471 | g_cmdLine.command=optarg; | |
2472 | break; | |
a36ce055 RG |
2473 | case 'g': |
2474 | g_cmdLine.gid=optarg; | |
2475 | break; | |
7cc68f53 | 2476 | case 'h': |
6306c282 | 2477 | cout<<"dnsdist "<<VERSION<<endl; |
7d3ee2bb | 2478 | usage(); |
7cc68f53 | 2479 | cout<<"\n"; |
2480 | exit(EXIT_SUCCESS); | |
2481 | break; | |
8f2d5ec3 | 2482 | case 'a': |
2483 | optstring=optarg; | |
2484 | g_ACL.modify([optstring](NetmaskGroup& nmg) { nmg.addMask(optstring); }); | |
2485 | break; | |
ddb14ec9 | 2486 | case 'k': |
b4b5edbd | 2487 | #ifdef HAVE_LIBSODIUM |
b5521206 | 2488 | if (B64Decode(string(optarg), g_consoleKey) < 0) { |
ddb14ec9 PL |
2489 | cerr<<"Unable to decode key '"<<optarg<<"'."<<endl; |
2490 | exit(EXIT_FAILURE); | |
2491 | } | |
b4b5edbd CH |
2492 | #else |
2493 | cerr<<"dnsdist has been built without libsodium, -k/--setkey is unsupported."<<endl; | |
2494 | exit(EXIT_FAILURE); | |
ddb14ec9 | 2495 | #endif |
b4b5edbd | 2496 | break; |
7cc68f53 | 2497 | case 'l': |
6a363878 | 2498 | g_cmdLine.locals.push_back(trim_copy(string(optarg))); |
7cc68f53 | 2499 | break; |
a36ce055 RG |
2500 | case 'u': |
2501 | g_cmdLine.uid=optarg; | |
2502 | break; | |
7cc68f53 | 2503 | case 'v': |
2504 | g_verbose=true; | |
2505 | break; | |
6306c282 | 2506 | case 'V': |
d4d796e5 PD |
2507 | #ifdef LUAJIT_VERSION |
2508 | cout<<"dnsdist "<<VERSION<<" ("<<LUA_RELEASE<<" ["<<LUAJIT_VERSION<<"])"<<endl; | |
2509 | #else | |
2510 | cout<<"dnsdist "<<VERSION<<" ("<<LUA_RELEASE<<")"<<endl; | |
2511 | #endif | |
70829d97 | 2512 | cout<<"Enabled features: "; |
a227f47d RG |
2513 | #ifdef HAVE_DNS_OVER_TLS |
2514 | cout<<"dns-over-tls("; | |
2515 | #ifdef HAVE_GNUTLS | |
3909bf10 CH |
2516 | cout<<"gnutls"; |
2517 | #ifdef HAVE_LIBSSL | |
2518 | cout<<" "; | |
2519 | #endif | |
a227f47d RG |
2520 | #endif |
2521 | #ifdef HAVE_LIBSSL | |
2522 | cout<<"openssl"; | |
2523 | #endif | |
2524 | cout<<") "; | |
2525 | #endif | |
fbf14b03 RG |
2526 | #ifdef HAVE_DNS_OVER_HTTPS |
2527 | cout<<"dns-over-https(DOH) "; | |
2528 | #endif | |
70829d97 PL |
2529 | #ifdef HAVE_DNSCRYPT |
2530 | cout<<"dnscrypt "; | |
2531 | #endif | |
0beaa5c8 RG |
2532 | #ifdef HAVE_EBPF |
2533 | cout<<"ebpf "; | |
2534 | #endif | |
82a91ddf CH |
2535 | #ifdef HAVE_FSTRM |
2536 | cout<<"fstrm "; | |
2537 | #endif | |
f4b1f1fd RG |
2538 | #ifdef HAVE_LIBCRYPTO |
2539 | cout<<"ipcipher "; | |
2540 | #endif | |
3909bf10 CH |
2541 | #ifdef HAVE_LIBSODIUM |
2542 | cout<<"libsodium "; | |
2543 | #endif | |
70829d97 PL |
2544 | #ifdef HAVE_PROTOBUF |
2545 | cout<<"protobuf "; | |
2546 | #endif | |
2547 | #ifdef HAVE_RE2 | |
2548 | cout<<"re2 "; | |
2549 | #endif | |
0beaa5c8 RG |
2550 | #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) |
2551 | cout<<"recvmmsg/sendmmsg "; | |
2552 | #endif | |
2553 | #ifdef HAVE_NET_SNMP | |
2554 | cout<<"snmp "; | |
2555 | #endif | |
70829d97 PL |
2556 | #ifdef HAVE_SYSTEMD |
2557 | cout<<"systemd"; | |
2558 | #endif | |
2559 | cout<<endl; | |
6306c282 PL |
2560 | exit(EXIT_SUCCESS); |
2561 | break; | |
7d3ee2bb PL |
2562 | case '?': |
2563 | //getopt_long printed an error message. | |
2564 | usage(); | |
2565 | exit(EXIT_FAILURE); | |
2566 | break; | |
7cc68f53 | 2567 | } |
24d5cb00 | 2568 | } |
6ab65223 | 2569 | |
7cc68f53 | 2570 | argc-=optind; |
2571 | argv+=optind; | |
2572 | for(auto p = argv; *p; ++p) { | |
094b6aff PL |
2573 | if(g_cmdLine.beClient) { |
2574 | clientAddress = ComboAddress(*p, 5199); | |
2575 | } else { | |
2576 | g_cmdLine.remotes.push_back(*p); | |
2577 | } | |
7cc68f53 | 2578 | } |
2579 | ||
a1b1a29d | 2580 | ServerPolicy leastOutstandingPol{"leastOutstanding", leastOutstanding, false}; |
cd29dcb1 | 2581 | |
e5a14b2b | 2582 | g_policy.setState(leastOutstandingPol); |
7cc68f53 | 2583 | if(g_cmdLine.beClient || !g_cmdLine.command.empty()) { |
2584 | setupLua(true, g_cmdLine.config); | |
094b6aff PL |
2585 | if (clientAddress != ComboAddress()) |
2586 | g_serverControl = clientAddress; | |
7cc68f53 | 2587 | doClient(g_serverControl, g_cmdLine.command); |
e16fd59c | 2588 | _exit(EXIT_SUCCESS); |
6d01c80c | 2589 | } |
2e72cc0e | 2590 | |
8f133915 | 2591 | auto acl = g_ACL.getCopy(); |
8f2d5ec3 | 2592 | if(acl.empty()) { |
2593 | for(auto& addr : {"127.0.0.0/8", "10.0.0.0/8", "100.64.0.0/10", "169.254.0.0/16", "192.168.0.0/16", "172.16.0.0/12", "::1/128", "fc00::/7", "fe80::/10"}) | |
2594 | acl.addMask(addr); | |
2595 | g_ACL.setState(acl); | |
2596 | } | |
8f133915 | 2597 | |
b5521206 | 2598 | auto consoleACL = g_consoleACL.getCopy(); |
5ceea33e RG |
2599 | for (const auto& mask : { "127.0.0.1/8", "::1/128" }) { |
2600 | consoleACL.addMask(mask); | |
2601 | } | |
b5521206 RG |
2602 | g_consoleACL.setState(consoleACL); |
2603 | ||
5efcfa63 PL |
2604 | if (g_cmdLine.checkConfig) { |
2605 | setupLua(true, g_cmdLine.config); | |
2606 | // No exception was thrown | |
2607 | infolog("Configuration '%s' OK!", g_cmdLine.config); | |
d8c19b98 | 2608 | _exit(EXIT_SUCCESS); |
5efcfa63 PL |
2609 | } |
2610 | ||
7cc68f53 | 2611 | auto todo=setupLua(false, g_cmdLine.config); |
2e72cc0e | 2612 | |
636cc544 CHB |
2613 | auto localPools = g_pools.getCopy(); |
2614 | { | |
2615 | bool precompute = false; | |
2616 | if (g_policy.getLocal()->name == "chashed") { | |
2617 | precompute = true; | |
2618 | } else { | |
2619 | for (const auto& entry: localPools) { | |
2620 | if (entry.second->policy != nullptr && entry.second->policy->name == "chashed") { | |
2621 | precompute = true; | |
2622 | break ; | |
2623 | } | |
2624 | } | |
2625 | } | |
2626 | if (precompute) { | |
2627 | vinfolog("Pre-computing hashes for consistent hash load-balancing policy"); | |
2628 | // pre compute hashes | |
2629 | auto backends = g_dstates.getLocal(); | |
2630 | for (auto& backend: *backends) { | |
2631 | backend->hash(); | |
2632 | } | |
d58e616a CHB |
2633 | } |
2634 | } | |
2635 | ||
6e9fd124 RG |
2636 | if (!g_cmdLine.locals.empty()) { |
2637 | for (auto it = g_frontends.begin(); it != g_frontends.end(); ) { | |
fbf14b03 RG |
2638 | /* DoH, DoT and DNSCrypt frontends are separate */ |
2639 | if ((*it)->dohFrontend == nullptr && (*it)->tlsFrontend == nullptr && (*it)->dnscryptCtx == nullptr) { | |
6e9fd124 | 2640 | it = g_frontends.erase(it); |
9f67b883 | 2641 | } |
6e9fd124 RG |
2642 | else { |
2643 | ++it; | |
9f67b883 | 2644 | } |
9f67b883 RG |
2645 | } |
2646 | ||
6e9fd124 RG |
2647 | for(const auto& loc : g_cmdLine.locals) { |
2648 | /* UDP */ | |
2649 | g_frontends.push_back(std::unique_ptr<ClientState>(new ClientState(ComboAddress(loc, 53), false, false, 0, "", {}))); | |
2650 | /* TCP */ | |
2651 | g_frontends.push_back(std::unique_ptr<ClientState>(new ClientState(ComboAddress(loc, 53), true, false, 0, "", {}))); | |
87b515ed | 2652 | } |
a36ce055 RG |
2653 | } |
2654 | ||
6e9fd124 RG |
2655 | if (g_frontends.empty()) { |
2656 | /* UDP */ | |
2657 | g_frontends.push_back(std::unique_ptr<ClientState>(new ClientState(ComboAddress("127.0.0.1", 53), false, false, 0, "", {}))); | |
2658 | /* TCP */ | |
2659 | g_frontends.push_back(std::unique_ptr<ClientState>(new ClientState(ComboAddress("127.0.0.1", 53), true, false, 0, "", {}))); | |
11e1e08b | 2660 | } |
11e1e08b | 2661 | |
6e9fd124 | 2662 | g_configurationDone = true; |
a227f47d | 2663 | |
6e9fd124 RG |
2664 | for(auto& frontend : g_frontends) { |
2665 | setUpLocalBind(frontend); | |
a227f47d | 2666 | |
6e9fd124 RG |
2667 | if (frontend->tcp == false) { |
2668 | ++udpBindsCount; | |
a227f47d RG |
2669 | } |
2670 | else { | |
6e9fd124 | 2671 | ++tcpBindsCount; |
a227f47d RG |
2672 | } |
2673 | } | |
2674 | ||
b82a127e | 2675 | warnlog("dnsdist %s comes with ABSOLUTELY NO WARRANTY. This is free software, and you are welcome to redistribute it according to the terms of the GPL version 2", VERSION); |
9c9b4998 | 2676 | |
b82a127e RG |
2677 | vector<string> vec; |
2678 | std::string acls; | |
2679 | g_ACL.getLocal()->toStringVector(&vec); | |
2680 | for(const auto& s : vec) { | |
2681 | if (!acls.empty()) | |
2682 | acls += ", "; | |
2683 | acls += s; | |
b076b34a | 2684 | } |
b82a127e | 2685 | infolog("ACL allowing queries from: %s", acls.c_str()); |
b5521206 RG |
2686 | vec.clear(); |
2687 | acls.clear(); | |
2688 | g_consoleACL.getLocal()->toStringVector(&vec); | |
2689 | for (const auto& entry : vec) { | |
2690 | if (!acls.empty()) { | |
2691 | acls += ", "; | |
2692 | } | |
2693 | acls += entry; | |
2694 | } | |
2695 | infolog("Console ACL allowing connections from: %s", acls.c_str()); | |
6d01c80c | 2696 | |
9c9b4998 RG |
2697 | #ifdef HAVE_LIBSODIUM |
2698 | if (g_consoleEnabled && g_consoleKey.empty()) { | |
2699 | warnlog("Warning, the console has been enabled via 'controlSocket()' but no key has been set with 'setKey()' so all connections will fail until a key has been set"); | |
2700 | } | |
2701 | #endif | |
2702 | ||
aac59883 RG |
2703 | uid_t newgid=0; |
2704 | gid_t newuid=0; | |
2705 | ||
2706 | if(!g_cmdLine.gid.empty()) | |
2707 | newgid = strToGID(g_cmdLine.gid.c_str()); | |
2708 | ||
2709 | if(!g_cmdLine.uid.empty()) | |
2710 | newuid = strToUID(g_cmdLine.uid.c_str()); | |
2711 | ||
2712 | dropGroupPrivs(newgid); | |
2713 | dropUserPrivs(newuid); | |
fdc3ea42 RG |
2714 | try { |
2715 | /* we might still have capabilities remaining, | |
2716 | for example if we have been started as root | |
2717 | without --uid or --gid (please don't do that) | |
2718 | or as an unprivileged user with ambient | |
2719 | capabilities like CAP_NET_BIND_SERVICE. | |
2720 | */ | |
2721 | dropCapabilities(); | |
2722 | } | |
2723 | catch(const std::exception& e) { | |
2724 | warnlog("%s", e.what()); | |
2725 | } | |
aac59883 | 2726 | |
a36ce055 RG |
2727 | /* this need to be done _after_ dropping privileges */ |
2728 | g_delay = new DelayPipe<DelayedPacket>(); | |
2729 | ||
9f4eb5cc RG |
2730 | if (g_snmpAgent) { |
2731 | g_snmpAgent->run(); | |
2732 | } | |
2733 | ||
1f7646c2 | 2734 | g_tcpclientthreads = std::unique_ptr<TCPClientCollection>(new TCPClientCollection(g_maxTCPClientThreads, g_useTCPSinglePipe)); |
a9bf3ec4 | 2735 | |
2e72cc0e | 2736 | for(auto& t : todo) |
2737 | t(); | |
2738 | ||
636cc544 | 2739 | localPools = g_pools.getCopy(); |
8f4f5186 RG |
2740 | /* create the default pool no matter what */ |
2741 | createPoolIfNotExists(localPools, ""); | |
7cc68f53 | 2742 | if(g_cmdLine.remotes.size()) { |
2743 | for(const auto& address : g_cmdLine.remotes) { | |
c9262563 | 2744 | auto ret=std::make_shared<DownstreamState>(ComboAddress(address, 53)); |
886e2cf2 | 2745 | addServerToPool(localPools, "", ret); |
5d7e6765 | 2746 | if (ret->connected && !ret->threadStarted.test_and_set()) { |
2717a92f | 2747 | ret->tid = thread(responderThread, ret); |
7565f4e6 | 2748 | } |
ecbe9133 | 2749 | g_dstates.modify([ret](servers_t& servers) { servers.push_back(ret); }); |
6d01c80c | 2750 | } |
2751 | } | |
886e2cf2 | 2752 | g_pools.setState(localPools); |
6d01c80c | 2753 | |
a9c2e4ab | 2754 | if(g_dstates.getLocal()->empty()) { |
e73ec7d3 | 2755 | errlog("No downstream servers defined: all packets will get dropped"); |
2756 | // you might define them later, but you need to know | |
2757 | } | |
2758 | ||
41408d3a RG |
2759 | checkFileDescriptorsLimits(udpBindsCount, tcpBindsCount); |
2760 | ||
e5a14b2b | 2761 | for(auto& dss : g_dstates.getCopy()) { // it is a copy, but the internal shared_ptrs are the real deal |
773470ca | 2762 | if(dss->availability==DownstreamState::Availability::Auto) { |
4ab01344 | 2763 | bool newState=upCheck(dss); |
a7940c06 | 2764 | warnlog("Marking downstream %s as '%s'", dss->getNameWithAddr(), newState ? "up" : "down"); |
773470ca | 2765 | dss->upStatus = newState; |
2766 | } | |
2767 | } | |
b076b34a | 2768 | |
6e9fd124 | 2769 | for(auto& cs : g_frontends) { |
fbf14b03 RG |
2770 | if (cs->dohFrontend != nullptr) { |
2771 | #ifdef HAVE_DNS_OVER_HTTPS | |
2772 | std::thread t1(dohThread, cs.get()); | |
2b0cb8f8 RG |
2773 | if (!cs->cpus.empty()) { |
2774 | mapThreadToCPUList(t1.native_handle(), cs->cpus); | |
2775 | } | |
fbf14b03 RG |
2776 | t1.detach(); |
2777 | #endif /* HAVE_DNS_OVER_HTTPS */ | |
2778 | continue; | |
2779 | } | |
a36ce055 | 2780 | if (cs->udpFD >= 0) { |
6e9fd124 | 2781 | thread t1(udpClientThread, cs.get()); |
f0e4dcba RG |
2782 | if (!cs->cpus.empty()) { |
2783 | mapThreadToCPUList(t1.native_handle(), cs->cpus); | |
2784 | } | |
a36ce055 | 2785 | t1.detach(); |
652a7355 | 2786 | } |
a36ce055 | 2787 | else if (cs->tcpFD >= 0) { |
6e9fd124 | 2788 | thread t1(tcpAcceptorThread, cs.get()); |
f0e4dcba RG |
2789 | if (!cs->cpus.empty()) { |
2790 | mapThreadToCPUList(t1.native_handle(), cs->cpus); | |
2791 | } | |
a36ce055 | 2792 | t1.detach(); |
726ddf60 | 2793 | } |
24d5cb00 | 2794 | } |
7730131a | 2795 | |
42fae326 | 2796 | thread carbonthread(carbonDumpThread); |
2797 | carbonthread.detach(); | |
2798 | ||
3c115e0f | 2799 | thread stattid(maintThread); |
886e2cf2 | 2800 | stattid.detach(); |
6d01c80c | 2801 | |
886e2cf2 RG |
2802 | thread healththread(healthChecksThread); |
2803 | ||
5d4e1ef8 RG |
2804 | if (!g_secPollSuffix.empty()) { |
2805 | thread secpollthread(secPollThread); | |
2806 | secpollthread.detach(); | |
2807 | } | |
2808 | ||
b82a127e | 2809 | if(g_cmdLine.beSupervised) { |
6ab65223 PL |
2810 | #ifdef HAVE_SYSTEMD |
2811 | sd_notify(0, "READY=1"); | |
2812 | #endif | |
886e2cf2 | 2813 | healththread.join(); |
773470ca | 2814 | } |
6d01c80c | 2815 | else { |
886e2cf2 | 2816 | healththread.detach(); |
505ca3d1 | 2817 | doConsole(); |
3c115e0f | 2818 | } |
9cf811d1 | 2819 | _exit(EXIT_SUCCESS); |
3c115e0f | 2820 | |
6d01c80c | 2821 | } |
3f5c3f1d PD |
2822 | catch(const LuaContext::ExecutionErrorException& e) { |
2823 | try { | |
2824 | errlog("Fatal Lua error: %s", e.what()); | |
2825 | std::rethrow_if_nested(e); | |
2010ac95 RG |
2826 | } catch(const std::exception& ne) { |
2827 | errlog("Details: %s", ne.what()); | |
3f5c3f1d PD |
2828 | } |
2829 | catch(PDNSException &ae) | |
2830 | { | |
2831 | errlog("Fatal pdns error: %s", ae.reason); | |
2832 | } | |
2833 | _exit(EXIT_FAILURE); | |
2834 | } | |
24d5cb00 | 2835 | catch(std::exception &e) |
2836 | { | |
6d01c80c | 2837 | errlog("Fatal error: %s", e.what()); |
4a966472 | 2838 | _exit(EXIT_FAILURE); |
24d5cb00 | 2839 | } |
3f81d239 | 2840 | catch(PDNSException &ae) |
7730131a | 2841 | { |
6d01c80c | 2842 | errlog("Fatal pdns error: %s", ae.reason); |
4a966472 | 2843 | _exit(EXIT_FAILURE); |
7730131a | 2844 | } |
eb0335ff MC |
2845 | |
2846 | uint64_t getLatencyCount(const std::string&) | |
2847 | { | |
2848 | return g_stats.responses + g_stats.selfAnswered + g_stats.cacheHits; | |
2849 | } |