]>
Commit | Line | Data |
---|---|---|
24d5cb00 | 1 | /* |
12471842 PL |
2 | * This file is part of PowerDNS or dnsdist. |
3 | * Copyright -- PowerDNS.COM B.V. and its contributors | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify | |
6 | * it under the terms of version 2 of the GNU General Public License as | |
7 | * published by the Free Software Foundation. | |
8 | * | |
9 | * In addition, for the avoidance of any doubt, permission is granted to | |
10 | * link this program with OpenSSL and to (re)distribute the binaries | |
11 | * produced as the result of such linking. | |
12 | * | |
13 | * This program is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | * GNU General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU General Public License | |
19 | * along with this program; if not, write to the Free Software | |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |
21 | */ | |
df111b53 | 22 | #include "dnsdist.hh" |
ca404e94 | 23 | #include "dnsdist-ecs.hh" |
24d5cb00 | 24 | #include "sstuff.hh" |
25 | #include "misc.hh" | |
24d5cb00 | 26 | #include <netinet/tcp.h> |
2c5db3f7 | 27 | #include <limits> |
64e4ebb4 | 28 | #include "dolog.hh" |
424bdfb1 PD |
29 | |
30 | #if defined (__OpenBSD__) | |
31 | #include <readline/readline.h> | |
32 | #else | |
33 | #include <editline/readline.h> | |
34 | #endif | |
35 | ||
3c115e0f | 36 | #include "dnsname.hh" |
773470ca | 37 | #include "dnswriter.hh" |
6d01c80c | 38 | #include "base64.hh" |
3f25bce6 | 39 | #include <fstream> |
7b3865cd | 40 | #include "delaypipe.hh" |
7cc68f53 | 41 | #include <unistd.h> |
6d01c80c | 42 | #include "sodcrypto.hh" |
6bb38cd6 | 43 | #include "dnsdist-lua.hh" |
a36ce055 RG |
44 | #include <grp.h> |
45 | #include <pwd.h> | |
0e41337b | 46 | #include "lock.hh" |
7cc68f53 | 47 | #include <getopt.h> |
41408d3a | 48 | #include <sys/resource.h> |
886e2cf2 | 49 | #include "dnsdist-cache.hh" |
85c7ca75 | 50 | #include "gettime.hh" |
6bb38cd6 | 51 | #include "ednsoptions.hh" |
3f25bce6 | 52 | |
6ab65223 PL |
53 | #ifdef HAVE_SYSTEMD |
54 | #include <systemd/sd-daemon.h> | |
55 | #endif | |
56 | ||
0beaa5c8 RG |
57 | #ifdef HAVE_PROTOBUF |
58 | thread_local boost::uuids::random_generator t_uuidGenerator; | |
59 | #endif | |
60 | ||
a40df301 | 61 | /* Known sins: |
e48090d1 | 62 | |
d12cea01 | 63 | Receiver is currently single threaded |
e5a14b2b | 64 | not *that* bad actually, but now that we are thread safe, might want to scale |
a40df301 | 65 | */ |
24d5cb00 | 66 | |
0940e4eb | 67 | /* the Rulaction plan |
68 | Set of Rules, if one matches, it leads to an Action | |
69 | Both rules and actions could conceivably be Lua based. | |
70 | On the C++ side, both could be inherited from a class Rule and a class Action, | |
71 | on the Lua side we can't do that. */ | |
72 | ||
64e4ebb4 | 73 | using std::atomic; |
74 | using std::thread; | |
7730131a | 75 | bool g_verbose; |
b065f701 | 76 | |
e48090d1 | 77 | struct DNSDistStats g_stats; |
13fe35db | 78 | uint16_t g_maxOutstanding{10240}; |
b076b34a | 79 | bool g_console; |
9e87dcb8 | 80 | bool g_verboseHealthChecks{false}; |
1ea747c0 | 81 | uint32_t g_staleCacheEntriesTTL{0}; |
bbfaaa6f | 82 | bool g_syslog{true}; |
cffde2fd | 83 | |
84 | GlobalStateHolder<NetmaskGroup> g_ACL; | |
c9262563 | 85 | string g_outputBuffer; |
a227f47d | 86 | |
f0e4dcba | 87 | vector<std::tuple<ComboAddress, bool, bool, int, string, std::set<int>>> g_locals; |
a227f47d | 88 | std::vector<std::shared_ptr<TLSFrontend>> g_tlslocals; |
11e1e08b | 89 | #ifdef HAVE_DNSCRYPT |
f0e4dcba | 90 | std::vector<std::tuple<ComboAddress,DnsCryptContext,bool, int, string, std::set<int>>> g_dnsCryptLocals; |
11e1e08b | 91 | #endif |
87b515ed RG |
92 | #ifdef HAVE_EBPF |
93 | shared_ptr<BPFFilter> g_defaultBPFFilter; | |
8429ad04 | 94 | std::vector<std::shared_ptr<DynBPFFilter> > g_dynBPFFilters; |
87b515ed | 95 | #endif /* HAVE_EBPF */ |
963bef8d | 96 | vector<ClientState *> g_frontends; |
886e2cf2 | 97 | GlobalStateHolder<pools_t> g_pools; |
0beaa5c8 | 98 | size_t g_udpVectorSize{1}; |
7c0860e1 | 99 | |
9f4eb5cc RG |
100 | bool g_snmpEnabled{false}; |
101 | bool g_snmpTrapsEnabled{false}; | |
102 | DNSDistSNMPAgent* g_snmpAgent{nullptr}; | |
103 | ||
726ddf60 | 104 | /* UDP: the grand design. Per socket we listen on for incoming queries there is one thread. |
7730131a | 105 | Then we have a bunch of connected sockets for talking to downstream servers. |
106 | We send directly to those sockets. | |
24d5cb00 | 107 | |
7730131a | 108 | For the return path, per downstream server we have a thread that listens to responses. |
24d5cb00 | 109 | |
7730131a | 110 | Per socket there is an array of 2^16 states, when we send out a packet downstream, we note |
111 | there the original requestor and the original id. The new ID is the offset in the array. | |
112 | ||
113 | When an answer comes in on a socket, we look up the offset by the id, and lob it to the | |
114 | original requestor. | |
115 | ||
116 | IDs are assigned by atomic increments of the socket offset. | |
117 | */ | |
118 | ||
3f25bce6 | 119 | /* for our load balancing, we want to support: |
120 | Round-robin | |
121 | Round-robin with basic uptime checks | |
122 | Send to least loaded server (least outstanding) | |
123 | Send it to the first server that is not overloaded | |
839f3021 | 124 | Hashed weighted random |
3f25bce6 | 125 | */ |
126 | ||
773470ca | 127 | /* Idea: |
128 | Multiple server groups, by default we load balance to the group with no name. | |
129 | Each instance is either 'up', 'down' or 'auto', where 'auto' means that dnsdist | |
130 | determines if the instance is up or not. Auto should be the default and very very good. | |
131 | ||
132 | In addition, to each instance you can attach a QPS object with rate & burst, which will optionally | |
133 | limit the amount of queries we send there. | |
134 | ||
135 | If all downstreams are over QPS, we pick the fastest server */ | |
136 | ||
0940e4eb | 137 | GlobalStateHolder<vector<pair<std::shared_ptr<DNSRule>, std::shared_ptr<DNSAction> > > > g_rulactions; |
8146444b | 138 | GlobalStateHolder<vector<pair<std::shared_ptr<DNSRule>, std::shared_ptr<DNSResponseAction> > > > g_resprulactions; |
cf48b0ce | 139 | GlobalStateHolder<vector<pair<std::shared_ptr<DNSRule>, std::shared_ptr<DNSResponseAction> > > > g_cachehitresprulactions; |
df111b53 | 140 | Rings g_rings; |
786e4d8c | 141 | QueryCount g_qcount; |
7c0860e1 | 142 | |
ecbe9133 | 143 | GlobalStateHolder<servers_t> g_dstates; |
78ffa782 | 144 | GlobalStateHolder<NetmaskTree<DynBlock>> g_dynblockNMG; |
71c94675 | 145 | GlobalStateHolder<SuffixMatchTree<DynBlock>> g_dynblockSMT; |
dd46e5e3 | 146 | DNSAction::Action g_dynBlockAction = DNSAction::Action::Drop; |
3f6d07a4 RG |
147 | int g_tcpRecvTimeout{2}; |
148 | int g_tcpSendTimeout{2}; | |
e0b5e49d | 149 | int g_udpTimeout{2}; |
3f6d07a4 | 150 | |
26a3cdb7 | 151 | bool g_servFailOnNoPolicy{false}; |
e72fbfc4 | 152 | bool g_truncateTC{false}; |
b29edbee | 153 | bool g_fixupCase{0}; |
0beaa5c8 RG |
154 | |
155 | static const size_t s_udpIncomingBufferSize{1500}; | |
156 | ||
b50ee135 | 157 | static void truncateTC(const char* packet, uint16_t* len) |
6ad8b29a | 158 | try |
159 | { | |
160 | unsigned int consumed; | |
ca404e94 | 161 | DNSName qname(packet, *len, sizeof(dnsheader), false, 0, 0, &consumed); |
a683e8bd | 162 | *len=(uint16_t) (sizeof(dnsheader)+consumed+DNS_TYPE_SIZE+DNS_CLASS_SIZE); |
6ad8b29a | 163 | struct dnsheader* dh =(struct dnsheader*)packet; |
164 | dh->ancount = dh->arcount = dh->nscount=0; | |
165 | } | |
166 | catch(...) | |
167 | { | |
168 | g_stats.truncFail++; | |
169 | } | |
170 | ||
7b3865cd | 171 | struct DelayedPacket |
172 | { | |
173 | int fd; | |
174 | string packet; | |
175 | ComboAddress destination; | |
68f3eb4d | 176 | ComboAddress origDest; |
7b3865cd | 177 | void operator()() |
178 | { | |
20b019fa RG |
179 | ssize_t res; |
180 | if(origDest.sin4.sin_family == 0) { | |
181 | res = sendto(fd, packet.c_str(), packet.size(), 0, (struct sockaddr*)&destination, destination.getSocklen()); | |
182 | } | |
183 | else { | |
184 | res = sendfromto(fd, packet.c_str(), packet.size(), 0, origDest, destination); | |
185 | } | |
186 | if (res == -1) { | |
187 | int err = errno; | |
188 | vinfolog("Error sending delayed response to %s: %s", destination.toStringWithPort(), strerror(err)); | |
189 | } | |
7b3865cd | 190 | } |
191 | }; | |
192 | ||
a36ce055 | 193 | DelayPipe<DelayedPacket> * g_delay = 0; |
7b3865cd | 194 | |
daacd477 | 195 | static void doLatencyAverages(double udiff) |
196 | { | |
197 | auto doAvg = [](double& var, double n, double weight) { | |
198 | var = (weight -1) * var/weight + n/weight; | |
199 | }; | |
200 | ||
201 | doAvg(g_stats.latencyAvg100, udiff, 100); | |
202 | doAvg(g_stats.latencyAvg1000, udiff, 1000); | |
203 | doAvg(g_stats.latencyAvg10000, udiff, 10000); | |
204 | doAvg(g_stats.latencyAvg1000000, udiff, 1000000); | |
205 | } | |
ca404e94 | 206 | |
fcffc585 RG |
207 | bool responseContentMatches(const char* response, const uint16_t responseLen, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const ComboAddress& remote) |
208 | { | |
209 | uint16_t rqtype, rqclass; | |
210 | unsigned int consumed; | |
211 | DNSName rqname; | |
212 | const struct dnsheader* dh = (struct dnsheader*) response; | |
213 | ||
214 | if (responseLen < sizeof(dnsheader)) { | |
215 | return false; | |
216 | } | |
217 | ||
c8c3d4e4 RG |
218 | if (dh->qdcount == 0) { |
219 | if (dh->rcode != RCode::NoError && dh->rcode != RCode::NXDomain) { | |
220 | return true; | |
221 | } | |
222 | else { | |
223 | g_stats.nonCompliantResponses++; | |
224 | return false; | |
225 | } | |
226 | } | |
227 | ||
fcffc585 RG |
228 | try { |
229 | rqname=DNSName(response, responseLen, sizeof(dnsheader), false, &rqtype, &rqclass, &consumed); | |
230 | } | |
231 | catch(std::exception& e) { | |
232 | if(responseLen > (ssize_t)sizeof(dnsheader)) | |
233 | infolog("Backend %s sent us a response with id %d that did not parse: %s", remote.toStringWithPort(), ntohs(dh->id), e.what()); | |
234 | g_stats.nonCompliantResponses++; | |
235 | return false; | |
236 | } | |
237 | ||
238 | if (rqtype != qtype || rqclass != qclass || rqname != qname) { | |
239 | return false; | |
240 | } | |
241 | ||
242 | return true; | |
243 | } | |
244 | ||
0f72fd5c | 245 | void restoreFlags(struct dnsheader* dh, uint16_t origFlags) |
fcffc585 RG |
246 | { |
247 | static const uint16_t rdMask = 1 << FLAGS_RD_OFFSET; | |
248 | static const uint16_t cdMask = 1 << FLAGS_CD_OFFSET; | |
249 | static const uint16_t restoreFlagsMask = UINT16_MAX & ~(rdMask | cdMask); | |
0f72fd5c RG |
250 | uint16_t * flags = getFlagsFromDNSHeader(dh); |
251 | /* clear the flags we are about to restore */ | |
252 | *flags &= restoreFlagsMask; | |
253 | /* only keep the flags we want to restore */ | |
254 | origFlags &= ~restoreFlagsMask; | |
255 | /* set the saved flags as they were */ | |
256 | *flags |= origFlags; | |
257 | } | |
258 | ||
ff73f02b | 259 | bool fixUpResponse(char** response, uint16_t* responseLen, size_t* responseSize, const DNSName& qname, uint16_t origFlags, bool ednsAdded, bool ecsAdded, std::vector<uint8_t>& rewrittenResponse, uint16_t addRoom) |
0f72fd5c | 260 | { |
fcffc585 RG |
261 | struct dnsheader* dh = (struct dnsheader*) *response; |
262 | ||
263 | if (*responseLen < sizeof(dnsheader)) { | |
264 | return false; | |
265 | } | |
266 | ||
c8c3d4e4 RG |
267 | restoreFlags(dh, origFlags); |
268 | ||
269 | if (*responseLen == sizeof(dnsheader)) { | |
270 | return true; | |
271 | } | |
272 | ||
fcffc585 RG |
273 | if(g_fixupCase) { |
274 | string realname = qname.toDNSString(); | |
275 | if (*responseLen >= (sizeof(dnsheader) + realname.length())) { | |
276 | memcpy(*response + sizeof(dnsheader), realname.c_str(), realname.length()); | |
277 | } | |
278 | } | |
279 | ||
ff73f02b RG |
280 | if (ednsAdded || ecsAdded) { |
281 | char * optStart = NULL; | |
fcffc585 RG |
282 | size_t optLen = 0; |
283 | bool last = false; | |
284 | ||
285 | int res = locateEDNSOptRR(*response, *responseLen, &optStart, &optLen, &last); | |
286 | ||
287 | if (res == 0) { | |
ff73f02b RG |
288 | if (ednsAdded) { |
289 | /* we added the entire OPT RR, | |
290 | therefore we need to remove it entirely */ | |
291 | if (last) { | |
292 | /* simply remove the last AR */ | |
293 | *responseLen -= optLen; | |
294 | uint16_t arcount = ntohs(dh->arcount); | |
295 | arcount--; | |
296 | dh->arcount = htons(arcount); | |
297 | } | |
298 | else { | |
299 | /* Removing an intermediary RR could lead to compression error */ | |
300 | if (rewriteResponseWithoutEDNS(*response, *responseLen, rewrittenResponse) == 0) { | |
301 | *responseLen = rewrittenResponse.size(); | |
302 | if (addRoom && (UINT16_MAX - *responseLen) > addRoom) { | |
303 | rewrittenResponse.reserve(*responseLen + addRoom); | |
304 | } | |
305 | *responseSize = rewrittenResponse.capacity(); | |
306 | *response = reinterpret_cast<char*>(rewrittenResponse.data()); | |
307 | } | |
308 | else { | |
309 | warnlog("Error rewriting content"); | |
310 | } | |
311 | } | |
fcffc585 RG |
312 | } |
313 | else { | |
ff73f02b RG |
314 | /* the OPT RR was already present, but without ECS, |
315 | we need to remove the ECS option if any */ | |
316 | if (last) { | |
317 | /* nothing after the OPT RR, we can simply remove the | |
318 | ECS option */ | |
319 | size_t existingOptLen = optLen; | |
320 | removeEDNSOptionFromOPT(optStart, &optLen, EDNSOptionCode::ECS); | |
321 | *responseLen -= (existingOptLen - optLen); | |
fcffc585 RG |
322 | } |
323 | else { | |
ff73f02b RG |
324 | /* Removing an intermediary RR could lead to compression error */ |
325 | if (rewriteResponseWithoutEDNSOption(*response, *responseLen, EDNSOptionCode::ECS, rewrittenResponse) == 0) { | |
326 | *responseLen = rewrittenResponse.size(); | |
327 | if (addRoom && (UINT16_MAX - *responseLen) > addRoom) { | |
328 | rewrittenResponse.reserve(*responseLen + addRoom); | |
329 | } | |
330 | *responseSize = rewrittenResponse.capacity(); | |
331 | *response = reinterpret_cast<char*>(rewrittenResponse.data()); | |
332 | } | |
333 | else { | |
334 | warnlog("Error rewriting content"); | |
335 | } | |
fcffc585 RG |
336 | } |
337 | } | |
338 | } | |
339 | } | |
340 | ||
341 | return true; | |
342 | } | |
343 | ||
fcffc585 | 344 | #ifdef HAVE_DNSCRYPT |
57847d65 | 345 | bool encryptResponse(char* response, uint16_t* responseLen, size_t responseSize, bool tcp, std::shared_ptr<DnsCryptQuery> dnsCryptQuery, dnsheader** dh, dnsheader* dhCopy) |
fcffc585 | 346 | { |
0f72fd5c RG |
347 | if (dnsCryptQuery) { |
348 | uint16_t encryptedResponseLen = 0; | |
57847d65 RG |
349 | |
350 | /* save the original header before encrypting it in place */ | |
351 | if (dh != nullptr && *dh != nullptr && dhCopy != nullptr) { | |
352 | memcpy(dhCopy, *dh, sizeof(dnsheader)); | |
353 | *dh = dhCopy; | |
354 | } | |
355 | ||
0f72fd5c | 356 | int res = dnsCryptQuery->ctx->encryptResponse(response, *responseLen, responseSize, dnsCryptQuery, tcp, &encryptedResponseLen); |
fcffc585 | 357 | if (res == 0) { |
0f72fd5c | 358 | *responseLen = encryptedResponseLen; |
fcffc585 RG |
359 | } else { |
360 | /* dropping response */ | |
361 | vinfolog("Error encrypting the response, dropping."); | |
362 | return false; | |
363 | } | |
364 | } | |
0f72fd5c RG |
365 | return true; |
366 | } | |
fcffc585 RG |
367 | #endif |
368 | ||
0f72fd5c RG |
369 | static bool sendUDPResponse(int origFD, char* response, uint16_t responseLen, int delayMsec, const ComboAddress& origDest, const ComboAddress& origRemote) |
370 | { | |
fcffc585 RG |
371 | if(delayMsec && g_delay) { |
372 | DelayedPacket dp{origFD, string(response,responseLen), origRemote, origDest}; | |
373 | g_delay->submit(dp, delayMsec); | |
374 | } | |
375 | else { | |
20b019fa RG |
376 | ssize_t res; |
377 | if(origDest.sin4.sin_family == 0) { | |
378 | res = sendto(origFD, response, responseLen, 0, (struct sockaddr*)&origRemote, origRemote.getSocklen()); | |
379 | } | |
380 | else { | |
381 | res = sendfromto(origFD, response, responseLen, 0, origDest, origRemote); | |
382 | } | |
383 | if (res == -1) { | |
384 | int err = errno; | |
385 | vinfolog("Error sending response to %s: %s", origRemote.toStringWithPort(), strerror(err)); | |
386 | } | |
fcffc585 RG |
387 | } |
388 | ||
389 | return true; | |
390 | } | |
391 | ||
4632045b | 392 | // listens on a dedicated socket, lobs answers from downstream servers to original requestors |
3c115e0f | 393 | void* responderThread(std::shared_ptr<DownstreamState> state) |
66d1c0bf | 394 | try { |
d8c19b98 | 395 | auto localRespRulactions = g_resprulactions.getLocal(); |
11e1e08b RG |
396 | #ifdef HAVE_DNSCRYPT |
397 | char packet[4096 + DNSCRYPT_MAX_RESPONSE_PADDING_AND_MAC_SIZE]; | |
57847d65 RG |
398 | /* when the answer is encrypted in place, we need to get a copy |
399 | of the original header before encryption to fill the ring buffer */ | |
400 | dnsheader dhCopy; | |
11e1e08b | 401 | #else |
64e4ebb4 | 402 | char packet[4096]; |
11e1e08b | 403 | #endif |
b50ee135 | 404 | static_assert(sizeof(packet) <= UINT16_MAX, "Packet size should fit in a uint16_t"); |
ca404e94 | 405 | vector<uint8_t> rewrittenResponse; |
7267213a | 406 | |
66d1c0bf | 407 | uint16_t queryId = 0; |
7730131a | 408 | for(;;) { |
57847d65 | 409 | dnsheader* dh = reinterpret_cast<struct dnsheader*>(packet); |
df560083 | 410 | bool outstandingDecreased = false; |
66d1c0bf RG |
411 | try { |
412 | ssize_t got = recv(state->fd, packet, sizeof(packet), 0); | |
413 | char * response = packet; | |
414 | size_t responseSize = sizeof(packet); | |
ca404e94 | 415 | |
66d1c0bf RG |
416 | if (got < (ssize_t) sizeof(dnsheader)) |
417 | continue; | |
4f380af3 | 418 | |
66d1c0bf RG |
419 | uint16_t responseLen = (uint16_t) got; |
420 | queryId = dh->id; | |
b50ee135 | 421 | |
66d1c0bf RG |
422 | if(queryId >= state->idStates.size()) |
423 | continue; | |
4f380af3 | 424 | |
66d1c0bf RG |
425 | IDState* ids = &state->idStates[queryId]; |
426 | int origFD = ids->origFD; | |
c9ba8478 | 427 | |
66d1c0bf RG |
428 | if(origFD < 0) // duplicate |
429 | continue; | |
7267213a | 430 | |
66d1c0bf RG |
431 | /* setting age to 0 to prevent the maintainer thread from |
432 | cleaning this IDS while we process the response. | |
433 | We have already a copy of the origFD, so it would | |
434 | mostly mess up the outstanding counter. | |
435 | */ | |
436 | ids->age = 0; | |
fcffc585 | 437 | |
66d1c0bf RG |
438 | if (!responseContentMatches(response, responseLen, ids->qname, ids->qtype, ids->qclass, state->remote)) { |
439 | continue; | |
440 | } | |
7267213a | 441 | |
66d1c0bf | 442 | --state->outstanding; // you'd think an attacker could game this, but we're using connected socket |
df560083 | 443 | outstandingDecreased = true; |
2c5db3f7 | 444 | |
66d1c0bf RG |
445 | if(dh->tc && g_truncateTC) { |
446 | truncateTC(response, &responseLen); | |
447 | } | |
aeb36780 | 448 | |
66d1c0bf | 449 | dh->id = ids->origID; |
ca404e94 | 450 | |
66d1c0bf RG |
451 | uint16_t addRoom = 0; |
452 | DNSResponse dr(&ids->qname, ids->qtype, ids->qclass, &ids->origDest, &ids->origRemote, dh, sizeof(packet), responseLen, false, &ids->sentTime.d_start); | |
d8c19b98 | 453 | #ifdef HAVE_PROTOBUF |
66d1c0bf | 454 | dr.uniqueId = ids->uniqueId; |
d8c19b98 | 455 | #endif |
a76b0d63 RG |
456 | dr.qTag = ids->qTag; |
457 | ||
66d1c0bf RG |
458 | if (!processResponse(localRespRulactions, dr, &ids->delayMsec)) { |
459 | continue; | |
460 | } | |
d8c19b98 | 461 | |
11e1e08b | 462 | #ifdef HAVE_DNSCRYPT |
66d1c0bf RG |
463 | if (ids->dnsCryptQuery) { |
464 | addRoom = DNSCRYPT_MAX_RESPONSE_PADDING_AND_MAC_SIZE; | |
465 | } | |
11e1e08b | 466 | #endif |
66d1c0bf RG |
467 | if (!fixUpResponse(&response, &responseLen, &responseSize, ids->qname, ids->origFlags, ids->ednsAdded, ids->ecsAdded, rewrittenResponse, addRoom)) { |
468 | continue; | |
469 | } | |
ca404e94 | 470 | |
66d1c0bf | 471 | if (ids->packetCache && !ids->skipCache) { |
2714396e | 472 | ids->packetCache->insert(ids->cacheKey, ids->qname, ids->qtype, ids->qclass, response, responseLen, false, dh->rcode); |
66d1c0bf | 473 | } |
886e2cf2 | 474 | |
b5b93e0b | 475 | if (ids->cs && !ids->cs->muted) { |
11e1e08b | 476 | #ifdef HAVE_DNSCRYPT |
57847d65 | 477 | if (!encryptResponse(response, &responseLen, responseSize, false, ids->dnsCryptQuery, &dh, &dhCopy)) { |
b5b93e0b RG |
478 | continue; |
479 | } | |
11e1e08b | 480 | #endif |
b5b93e0b RG |
481 | |
482 | ComboAddress empty; | |
483 | empty.sin4.sin_family = 0; | |
484 | /* if ids->destHarvested is false, origDest holds the listening address. | |
485 | We don't want to use that as a source since it could be 0.0.0.0 for example. */ | |
486 | sendUDPResponse(origFD, response, responseLen, ids->delayMsec, ids->destHarvested ? ids->origDest : empty, ids->origRemote); | |
487 | } | |
66d1c0bf RG |
488 | |
489 | g_stats.responses++; | |
490 | ||
491 | double udiff = ids->sentTime.udiff(); | |
492 | vinfolog("Got answer from %s, relayed to %s, took %f usec", state->remote.toStringWithPort(), ids->origRemote.toStringWithPort(), udiff); | |
493 | ||
494 | { | |
495 | struct timespec ts; | |
496 | gettime(&ts); | |
497 | std::lock_guard<std::mutex> lock(g_rings.respMutex); | |
498 | g_rings.respRing.push_back({ts, ids->origRemote, ids->qname, ids->qtype, (unsigned int)udiff, (unsigned int)got, *dh, state->remote}); | |
499 | } | |
500 | ||
501 | if(dh->rcode == RCode::ServFail) | |
502 | g_stats.servfailResponses++; | |
503 | state->latencyUsec = (127.0 * state->latencyUsec / 128.0) + udiff/128.0; | |
504 | ||
505 | if(udiff < 1000) g_stats.latency0_1++; | |
506 | else if(udiff < 10000) g_stats.latency1_10++; | |
507 | else if(udiff < 50000) g_stats.latency10_50++; | |
508 | else if(udiff < 100000) g_stats.latency50_100++; | |
509 | else if(udiff < 1000000) g_stats.latency100_1000++; | |
510 | else g_stats.latencySlow++; | |
42fae326 | 511 | |
66d1c0bf | 512 | doLatencyAverages(udiff); |
fcadd56e | 513 | |
66d1c0bf | 514 | if (ids->origFD == origFD) { |
11e1e08b | 515 | #ifdef HAVE_DNSCRYPT |
0beaa5c8 | 516 | ids->dnsCryptQuery = nullptr; |
11e1e08b | 517 | #endif |
66d1c0bf | 518 | ids->origFD = -1; |
df560083 | 519 | outstandingDecreased = false; |
66d1c0bf | 520 | } |
ca404e94 | 521 | |
66d1c0bf RG |
522 | rewrittenResponse.clear(); |
523 | } | |
df560083 | 524 | catch(const std::exception& e){ |
66d1c0bf | 525 | vinfolog("Got an error in UDP responder thread while parsing a response from %s, id %d: %s", state->remote.toStringWithPort(), queryId, e.what()); |
df560083 RG |
526 | if (outstandingDecreased) { |
527 | /* so an exception was raised after we decreased the outstanding queries counter, | |
528 | but before we could set ids->origFD to -1 (because we also set outstandingDecreased | |
fe0b6bba | 529 | to false then), meaning the IDS is still considered active and we will decrease the |
df560083 RG |
530 | counter again on a duplicate, or simply while reaping downstream timeouts, so let's |
531 | increase it back. */ | |
532 | state->outstanding++; | |
533 | } | |
66d1c0bf | 534 | } |
7730131a | 535 | } |
536 | return 0; | |
537 | } | |
66d1c0bf RG |
538 | catch(const std::exception& e) |
539 | { | |
540 | errlog("UDP responder thread died because of exception: %s", e.what()); | |
541 | return 0; | |
542 | } | |
543 | catch(const PDNSException& e) | |
544 | { | |
545 | errlog("UDP responder thread died because of PowerDNS exception: %s", e.reason); | |
546 | return 0; | |
547 | } | |
548 | catch(...) | |
549 | { | |
550 | errlog("UDP responder thread died because of an exception: %s", "unknown"); | |
551 | return 0; | |
552 | } | |
24d5cb00 | 553 | |
b58f08e5 | 554 | void DownstreamState::reconnect() |
3c115e0f | 555 | { |
b58f08e5 RG |
556 | connected = false; |
557 | if (fd != -1) { | |
558 | /* shutdown() is needed to wake up recv() in the responderThread */ | |
559 | shutdown(fd, SHUT_RDWR); | |
560 | close(fd); | |
561 | fd = -1; | |
562 | } | |
f99e3aaf RG |
563 | if (!IsAnyAddress(remote)) { |
564 | fd = SSocket(remote.sin4.sin_family, SOCK_DGRAM, 0); | |
565 | if (!IsAnyAddress(sourceAddr)) { | |
566 | SSetsockopt(fd, SOL_SOCKET, SO_REUSEADDR, 1); | |
567 | SBind(fd, sourceAddr); | |
568 | } | |
7565f4e6 RG |
569 | try { |
570 | SConnect(fd, remote); | |
571 | connected = true; | |
572 | } | |
573 | catch(const std::runtime_error& error) { | |
574 | infolog("Error connecting to new server with address %s: %s", remote.toStringWithPort(), error.what()); | |
575 | } | |
b58f08e5 RG |
576 | } |
577 | } | |
578 | ||
579 | DownstreamState::DownstreamState(const ComboAddress& remote_, const ComboAddress& sourceAddr_, unsigned int sourceItf_): remote(remote_), sourceAddr(sourceAddr_), sourceItf(sourceItf_) | |
580 | { | |
581 | if (!IsAnyAddress(remote)) { | |
582 | reconnect(); | |
f99e3aaf RG |
583 | idStates.resize(g_maxOutstanding); |
584 | sw.start(); | |
585 | infolog("Added downstream server %s", remote.toStringWithPort()); | |
fbe2a2e0 | 586 | } |
3c115e0f | 587 | } |
588 | ||
773470ca | 589 | std::mutex g_luamutex; |
3f25bce6 | 590 | LuaContext g_lua; |
3f25bce6 | 591 | |
ecbe9133 | 592 | GlobalStateHolder<ServerPolicy> g_policy; |
773470ca | 593 | |
497a6e3a | 594 | shared_ptr<DownstreamState> firstAvailable(const NumberedServerVector& servers, const DNSQuestion* dq) |
773470ca | 595 | { |
22b2b326 | 596 | for(auto& d : servers) { |
da4e7813 | 597 | if(d.second->isUp() && d.second->qps.check()) |
598 | return d.second; | |
773470ca | 599 | } |
497a6e3a | 600 | return leastOutstanding(servers, dq); |
2c5db3f7 | 601 | } |
602 | ||
d1fba58e | 603 | // get server with least outstanding queries, and within those, with the lowest order, and within those: the fastest |
497a6e3a | 604 | shared_ptr<DownstreamState> leastOutstanding(const NumberedServerVector& servers, const DNSQuestion* dq) |
c9262563 | 605 | { |
886e2cf2 RG |
606 | if (servers.size() == 1 && servers[0].second->isUp()) { |
607 | return servers[0].second; | |
608 | } | |
609 | ||
d1fba58e | 610 | vector<pair<tuple<int,int,double>, shared_ptr<DownstreamState>>> poss; |
611 | /* so you might wonder, why do we go through this trouble? The data on which we sort could change during the sort, | |
612 | which would suck royally and could even lead to crashes. So first we snapshot on what we sort, and then we sort */ | |
478ce172 | 613 | poss.reserve(servers.size()); |
0e41337b | 614 | for(auto& d : servers) { |
da4e7813 | 615 | if(d.second->isUp()) { |
d1fba58e | 616 | poss.push_back({make_tuple(d.second->outstanding.load(), d.second->order, d.second->latencyUsec), d.second}); |
c9262563 | 617 | } |
618 | } | |
619 | if(poss.empty()) | |
620 | return shared_ptr<DownstreamState>(); | |
621 | nth_element(poss.begin(), poss.begin(), poss.end(), [](const decltype(poss)::value_type& a, const decltype(poss)::value_type& b) { return a.first < b.first; }); | |
622 | return poss.begin()->second; | |
623 | } | |
624 | ||
497a6e3a | 625 | shared_ptr<DownstreamState> valrandom(unsigned int val, const NumberedServerVector& servers, const DNSQuestion* dq) |
c9262563 | 626 | { |
627 | vector<pair<int, shared_ptr<DownstreamState>>> poss; | |
628 | int sum=0; | |
22b2b326 | 629 | for(auto& d : servers) { // w=1, w=10 -> 1, 11 |
da4e7813 | 630 | if(d.second->isUp()) { |
631 | sum+=d.second->weight; | |
632 | poss.push_back({sum, d.second}); | |
c9262563 | 633 | } |
634 | } | |
d2894425 NJ |
635 | |
636 | // Catch poss & sum are empty to avoid SIGFPE | |
637 | if(poss.empty()) | |
638 | return shared_ptr<DownstreamState>(); | |
639 | ||
a7f3108c | 640 | int r = val % sum; |
af619119 | 641 | auto p = upper_bound(poss.begin(), poss.end(),r, [](int r_, const decltype(poss)::value_type& a) { return r_ < a.first;}); |
c9262563 | 642 | if(p==poss.end()) |
643 | return shared_ptr<DownstreamState>(); | |
644 | return p->second; | |
645 | } | |
646 | ||
497a6e3a | 647 | shared_ptr<DownstreamState> wrandom(const NumberedServerVector& servers, const DNSQuestion* dq) |
a7f3108c | 648 | { |
497a6e3a | 649 | return valrandom(random(), servers, dq); |
a7f3108c | 650 | } |
651 | ||
36e763fa | 652 | uint32_t g_hashperturb; |
497a6e3a | 653 | shared_ptr<DownstreamState> whashed(const NumberedServerVector& servers, const DNSQuestion* dq) |
a7f3108c | 654 | { |
497a6e3a | 655 | return valrandom(dq->qname->hash(g_hashperturb), servers, dq); |
a7f3108c | 656 | } |
657 | ||
658 | ||
497a6e3a | 659 | shared_ptr<DownstreamState> roundrobin(const NumberedServerVector& servers, const DNSQuestion* dq) |
5f504638 | 660 | { |
da4e7813 | 661 | NumberedServerVector poss; |
c9262563 | 662 | |
22b2b326 | 663 | for(auto& d : servers) { |
da4e7813 | 664 | if(d.second->isUp()) { |
5f504638 | 665 | poss.push_back(d); |
c9262563 | 666 | } |
5f504638 | 667 | } |
c9262563 | 668 | |
ecbe9133 | 669 | const auto *res=&poss; |
5f504638 | 670 | if(poss.empty()) |
ecbe9133 | 671 | res = &servers; |
c9262563 | 672 | |
673 | if(res->empty()) | |
674 | return shared_ptr<DownstreamState>(); | |
675 | ||
676 | static unsigned int counter; | |
677 | ||
da4e7813 | 678 | return (*res)[(counter++) % res->size()].second; |
5f504638 | 679 | } |
680 | ||
c7b1d943 PL |
681 | static void writepid(string pidfile) { |
682 | if (!pidfile.empty()) { | |
683 | // Clean up possible stale file | |
684 | unlink(pidfile.c_str()); | |
685 | ||
686 | // Write the pidfile | |
687 | ofstream of(pidfile.c_str()); | |
688 | if (of) { | |
689 | of << getpid(); | |
690 | } else { | |
691 | errlog("Unable to write PID-file to '%s'.", pidfile); | |
692 | } | |
693 | of.close(); | |
694 | } | |
695 | } | |
696 | ||
b076b34a | 697 | static void daemonize(void) |
698 | { | |
699 | if(fork()) | |
700 | _exit(0); // bye bye | |
c7b1d943 PL |
701 | /* We are child */ |
702 | ||
b076b34a | 703 | setsid(); |
704 | ||
705 | int i=open("/dev/null",O_RDWR); /* open stdin */ | |
706 | if(i < 0) | |
707 | ; // L<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl; | |
708 | else { | |
709 | dup2(i,0); /* stdin */ | |
710 | dup2(i,1); /* stderr */ | |
711 | dup2(i,2); /* stderr */ | |
712 | close(i); | |
713 | } | |
714 | } | |
715 | ||
e6841c9e | 716 | ComboAddress g_serverControl{"127.0.0.1:5199"}; |
b076b34a | 717 | |
886e2cf2 RG |
718 | std::shared_ptr<ServerPool> createPoolIfNotExists(pools_t& pools, const string& poolName) |
719 | { | |
720 | std::shared_ptr<ServerPool> pool; | |
721 | pools_t::iterator it = pools.find(poolName); | |
722 | if (it != pools.end()) { | |
723 | pool = it->second; | |
724 | } | |
725 | else { | |
8f4f5186 RG |
726 | if (!poolName.empty()) |
727 | vinfolog("Creating pool %s", poolName); | |
886e2cf2 RG |
728 | pool = std::make_shared<ServerPool>(); |
729 | pools.insert(std::pair<std::string,std::shared_ptr<ServerPool> >(poolName, pool)); | |
730 | } | |
731 | return pool; | |
732 | } | |
78c8047b | 733 | |
742c079a RG |
734 | void setPoolPolicy(pools_t& pools, const string& poolName, std::shared_ptr<ServerPolicy> policy) |
735 | { | |
736 | std::shared_ptr<ServerPool> pool = createPoolIfNotExists(pools, poolName); | |
737 | if (!poolName.empty()) { | |
738 | vinfolog("Setting pool %s server selection policy to %s", poolName, policy->name); | |
739 | } else { | |
740 | vinfolog("Setting default pool server selection policy to %s", policy->name); | |
741 | } | |
742 | pool->policy = policy; | |
743 | } | |
744 | ||
886e2cf2 | 745 | void addServerToPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server) |
22b2b326 | 746 | { |
886e2cf2 | 747 | std::shared_ptr<ServerPool> pool = createPoolIfNotExists(pools, poolName); |
a683e8bd | 748 | unsigned int count = (unsigned int) pool->servers.size(); |
c218b223 | 749 | if (!poolName.empty()) { |
8f4f5186 | 750 | vinfolog("Adding server to pool %s", poolName); |
c218b223 | 751 | } else { |
8f4f5186 | 752 | vinfolog("Adding server to default pool"); |
c218b223 | 753 | } |
886e2cf2 | 754 | pool->servers.push_back(make_pair(++count, server)); |
d12cd8e9 RG |
755 | /* we need to reorder based on the server 'order' */ |
756 | std::stable_sort(pool->servers.begin(), pool->servers.end(), [](const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& a, const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& b) { | |
757 | return a.second->order < b.second->order; | |
758 | }); | |
759 | /* and now we need to renumber for Lua (custom policies) */ | |
760 | size_t idx = 1; | |
af619119 RG |
761 | for (auto& serv : pool->servers) { |
762 | serv.first = idx++; | |
d12cd8e9 | 763 | } |
886e2cf2 RG |
764 | } |
765 | ||
766 | void removeServerFromPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server) | |
767 | { | |
8f4f5186 | 768 | std::shared_ptr<ServerPool> pool = getPool(pools, poolName); |
886e2cf2 | 769 | |
c218b223 | 770 | if (!poolName.empty()) { |
8f4f5186 | 771 | vinfolog("Removing server from pool %s", poolName); |
c218b223 | 772 | } |
773 | else { | |
8f4f5186 | 774 | vinfolog("Removing server from default pool"); |
c218b223 | 775 | } |
886e2cf2 | 776 | |
d12cd8e9 RG |
777 | size_t idx = 1; |
778 | bool found = false; | |
779 | for (NumberedVector<shared_ptr<DownstreamState> >::iterator it = pool->servers.begin(); it != pool->servers.end();) { | |
780 | if (found) { | |
781 | /* we need to renumber the servers placed | |
782 | after the removed one, for Lua (custom policies) */ | |
783 | it->first = idx++; | |
784 | it++; | |
785 | } | |
786 | else if (it->second == server) { | |
787 | it = pool->servers.erase(it); | |
788 | found = true; | |
789 | } else { | |
790 | idx++; | |
791 | it++; | |
886e2cf2 RG |
792 | } |
793 | } | |
794 | } | |
795 | ||
796 | std::shared_ptr<ServerPool> getPool(const pools_t& pools, const std::string& poolName) | |
797 | { | |
798 | pools_t::const_iterator it = pools.find(poolName); | |
799 | ||
800 | if (it == pools.end()) { | |
801 | throw std::out_of_range("No pool named " + poolName); | |
802 | } | |
803 | ||
804 | return it->second; | |
805 | } | |
806 | ||
807 | const NumberedServerVector& getDownstreamCandidates(const pools_t& pools, const std::string& poolName) | |
808 | { | |
809 | std::shared_ptr<ServerPool> pool = getPool(pools, poolName); | |
810 | return pool->servers; | |
22b2b326 | 811 | } |
c9262563 | 812 | |
520eb5a0 | 813 | // goal in life - if you send us a reasonably normal packet, we'll get Z for you, otherwise 0 |
814 | int getEDNSZ(const char* packet, unsigned int len) | |
d08b1cdf | 815 | try |
520eb5a0 | 816 | { |
817 | struct dnsheader* dh =(struct dnsheader*)packet; | |
818 | ||
ca404e94 | 819 | if(ntohs(dh->qdcount) != 1 || dh->ancount!=0 || ntohs(dh->arcount)!=1 || dh->nscount!=0) |
520eb5a0 | 820 | return 0; |
ca404e94 RG |
821 | |
822 | if (len <= sizeof(dnsheader)) | |
823 | return 0; | |
824 | ||
520eb5a0 | 825 | unsigned int consumed; |
ca404e94 RG |
826 | DNSName qname(packet, len, sizeof(dnsheader), false, 0, 0, &consumed); |
827 | size_t pos = consumed + DNS_TYPE_SIZE + DNS_CLASS_SIZE; | |
520eb5a0 | 828 | uint16_t qtype, qclass; |
829 | ||
ca404e94 | 830 | if (len <= (sizeof(dnsheader)+pos)) |
520eb5a0 | 831 | return 0; |
832 | ||
ca404e94 RG |
833 | DNSName aname(packet, len, sizeof(dnsheader)+pos, true, &qtype, &qclass, &consumed); |
834 | ||
835 | if(qtype!=QType::OPT || sizeof(dnsheader)+pos+consumed+DNS_TYPE_SIZE+DNS_CLASS_SIZE+EDNS_EXTENDED_RCODE_SIZE+EDNS_VERSION_SIZE+1 >= len) | |
836 | return 0; | |
837 | ||
838 | uint8_t* z = (uint8_t*)packet+sizeof(dnsheader)+pos+consumed+DNS_TYPE_SIZE+DNS_CLASS_SIZE+EDNS_EXTENDED_RCODE_SIZE+EDNS_VERSION_SIZE; | |
520eb5a0 | 839 | return 0x100 * (*z) + *(z+1); |
840 | } | |
d08b1cdf | 841 | catch(...) |
842 | { | |
843 | return 0; | |
844 | } | |
0940e4eb | 845 | |
614239d0 | 846 | static void spoofResponseFromString(DNSQuestion& dq, const string& spoofContent) |
7791f83a RG |
847 | { |
848 | string result; | |
614239d0 RG |
849 | |
850 | std::vector<std::string> addrs; | |
851 | stringtok(addrs, spoofContent, " ,"); | |
852 | ||
853 | if (addrs.size() == 1) { | |
854 | try { | |
855 | ComboAddress spoofAddr(spoofContent); | |
856 | SpoofAction sa({spoofAddr}); | |
857 | sa(&dq, &result); | |
858 | } | |
859 | catch(const PDNSException &e) { | |
860 | SpoofAction sa(spoofContent); // CNAME then | |
861 | sa(&dq, &result); | |
862 | } | |
863 | } else { | |
864 | std::vector<ComboAddress> cas; | |
865 | for (const auto& addr : addrs) { | |
866 | try { | |
867 | cas.push_back(ComboAddress(addr)); | |
868 | } | |
869 | catch (...) { | |
870 | } | |
871 | } | |
872 | SpoofAction sa(cas); | |
6ca7a40a | 873 | sa(&dq, &result); |
7791f83a RG |
874 | } |
875 | } | |
876 | ||
0beaa5c8 | 877 | bool processQuery(LocalHolders& holders, DNSQuestion& dq, string& poolname, int* delayMsec, const struct timespec& now) |
e91084ce RG |
878 | { |
879 | { | |
880 | WriteLock wl(&g_rings.queryLock); | |
8146444b | 881 | g_rings.queryRing.push_back({now,*dq.remote,*dq.qname,dq.len,dq.qtype,*dq.dh}); |
e91084ce RG |
882 | } |
883 | ||
786e4d8c RS |
884 | if(g_qcount.enabled) { |
885 | string qname = (*dq.qname).toString("."); | |
886 | bool countQuery{true}; | |
887 | if(g_qcount.filter) { | |
888 | std::lock_guard<std::mutex> lock(g_luamutex); | |
889 | std::tie (countQuery, qname) = g_qcount.filter(dq); | |
890 | } | |
891 | ||
892 | if(countQuery) { | |
893 | WriteLock wl(&g_qcount.queryLock); | |
894 | if(!g_qcount.records.count(qname)) { | |
895 | g_qcount.records[qname] = 0; | |
896 | } | |
897 | g_qcount.records[qname]++; | |
898 | } | |
899 | } | |
900 | ||
0beaa5c8 | 901 | if(auto got = holders.dynNMGBlock->lookup(*dq.remote)) { |
701f690b | 902 | auto updateBlockStats = [&got]() { |
903 | g_stats.dynBlocked++; | |
904 | got->second.blocks++; | |
905 | }; | |
906 | ||
e91084ce | 907 | if(now < got->second.until) { |
7b925432 RG |
908 | DNSAction::Action action = got->second.action; |
909 | if (action == DNSAction::Action::None) { | |
910 | action = g_dynBlockAction; | |
911 | } | |
912 | if (action == DNSAction::Action::Refused) { | |
dd46e5e3 | 913 | vinfolog("Query from %s refused because of dynamic block", dq.remote->toStringWithPort()); |
701f690b | 914 | updateBlockStats(); |
8477236d | 915 | |
dd46e5e3 RG |
916 | dq.dh->rcode = RCode::Refused; |
917 | dq.dh->qr=true; | |
918 | return true; | |
919 | } | |
8477236d | 920 | else if (action == DNSAction::Action::Truncate) { |
921 | if(!dq.tcp) { | |
701f690b | 922 | updateBlockStats(); |
8477236d | 923 | vinfolog("Query from %s truncated because of dynamic block", dq.remote->toStringWithPort()); |
924 | dq.dh->tc = true; | |
925 | dq.dh->qr = true; | |
926 | return true; | |
927 | } | |
928 | else { | |
929 | vinfolog("Query from %s for %s over TCP *not* truncated because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toString()); | |
930 | } | |
931 | ||
c4f5aeff | 932 | } |
dd46e5e3 | 933 | else { |
701f690b | 934 | updateBlockStats(); |
dd46e5e3 RG |
935 | vinfolog("Query from %s dropped because of dynamic block", dq.remote->toStringWithPort()); |
936 | return false; | |
937 | } | |
e91084ce RG |
938 | } |
939 | } | |
940 | ||
0beaa5c8 | 941 | if(auto got = holders.dynSMTBlock->lookup(*dq.qname)) { |
701f690b | 942 | auto updateBlockStats = [&got]() { |
943 | g_stats.dynBlocked++; | |
944 | got->blocks++; | |
945 | }; | |
946 | ||
71c94675 | 947 | if(now < got->until) { |
7b925432 RG |
948 | DNSAction::Action action = got->action; |
949 | if (action == DNSAction::Action::None) { | |
950 | action = g_dynBlockAction; | |
951 | } | |
952 | if (action == DNSAction::Action::Refused) { | |
dd46e5e3 | 953 | vinfolog("Query from %s for %s refused because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toString()); |
701f690b | 954 | updateBlockStats(); |
8477236d | 955 | |
dd46e5e3 RG |
956 | dq.dh->rcode = RCode::Refused; |
957 | dq.dh->qr=true; | |
958 | return true; | |
959 | } | |
8477236d | 960 | else if (action == DNSAction::Action::Truncate) { |
961 | if(!dq.tcp) { | |
701f690b | 962 | updateBlockStats(); |
8477236d | 963 | |
964 | vinfolog("Query from %s for %s truncated because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toString()); | |
965 | dq.dh->tc = true; | |
966 | dq.dh->qr = true; | |
967 | return true; | |
968 | } | |
969 | else { | |
970 | vinfolog("Query from %s for %s over TCP *not* truncated because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toString()); | |
971 | } | |
c4f5aeff | 972 | } |
dd46e5e3 | 973 | else { |
701f690b | 974 | updateBlockStats(); |
dd46e5e3 RG |
975 | vinfolog("Query from %s for %s dropped because of dynamic block", dq.remote->toStringWithPort(), dq.qname->toString()); |
976 | return false; | |
977 | } | |
71c94675 | 978 | } |
979 | } | |
980 | ||
e91084ce RG |
981 | DNSAction::Action action=DNSAction::Action::None; |
982 | string ruleresult; | |
0beaa5c8 | 983 | for(const auto& lr : *holders.rulactions) { |
e91084ce RG |
984 | if(lr.first->matches(&dq)) { |
985 | lr.first->d_matches++; | |
986 | action=(*lr.second)(&dq, &ruleresult); | |
987 | ||
988 | switch(action) { | |
989 | case DNSAction::Action::Allow: | |
990 | return true; | |
991 | break; | |
992 | case DNSAction::Action::Drop: | |
993 | g_stats.ruleDrop++; | |
994 | return false; | |
995 | break; | |
996 | case DNSAction::Action::Nxdomain: | |
997 | dq.dh->rcode = RCode::NXDomain; | |
998 | dq.dh->qr=true; | |
999 | g_stats.ruleNXDomain++; | |
1000 | return true; | |
1001 | break; | |
dd46e5e3 RG |
1002 | case DNSAction::Action::Refused: |
1003 | dq.dh->rcode = RCode::Refused; | |
1004 | dq.dh->qr=true; | |
1005 | g_stats.ruleRefused++; | |
1006 | return true; | |
1007 | break; | |
e91084ce RG |
1008 | case DNSAction::Action::Spoof: |
1009 | spoofResponseFromString(dq, ruleresult); | |
1010 | return true; | |
1011 | break; | |
c4f5aeff RG |
1012 | case DNSAction::Action::Truncate: |
1013 | dq.dh->tc = true; | |
1014 | dq.dh->qr = true; | |
1015 | return true; | |
1016 | break; | |
e91084ce RG |
1017 | case DNSAction::Action::HeaderModify: |
1018 | return true; | |
1019 | break; | |
1020 | case DNSAction::Action::Pool: | |
1021 | poolname=ruleresult; | |
1022 | return true; | |
1023 | break; | |
1024 | /* non-terminal actions follow */ | |
1025 | case DNSAction::Action::Delay: | |
1026 | *delayMsec = static_cast<int>(pdns_stou(ruleresult)); // sorry | |
1027 | break; | |
1028 | case DNSAction::Action::None: | |
1029 | break; | |
1030 | } | |
1031 | } | |
1032 | } | |
1033 | ||
1034 | return true; | |
1035 | } | |
1036 | ||
788c3243 | 1037 | bool processResponse(LocalStateHolder<vector<pair<std::shared_ptr<DNSRule>, std::shared_ptr<DNSResponseAction> > > >& localRespRulactions, DNSResponse& dr, int* delayMsec) |
8146444b | 1038 | { |
788c3243 | 1039 | DNSResponseAction::Action action=DNSResponseAction::Action::None; |
8146444b RG |
1040 | std::string ruleresult; |
1041 | for(const auto& lr : *localRespRulactions) { | |
1042 | if(lr.first->matches(&dr)) { | |
1043 | lr.first->d_matches++; | |
788c3243 RG |
1044 | action=(*lr.second)(&dr, &ruleresult); |
1045 | switch(action) { | |
1046 | case DNSResponseAction::Action::Allow: | |
1047 | return true; | |
1048 | break; | |
1049 | case DNSResponseAction::Action::Drop: | |
1050 | return false; | |
1051 | break; | |
1052 | case DNSResponseAction::Action::HeaderModify: | |
1053 | return true; | |
1054 | break; | |
1055 | /* non-terminal actions follow */ | |
1056 | case DNSResponseAction::Action::Delay: | |
1057 | *delayMsec = static_cast<int>(pdns_stou(ruleresult)); // sorry | |
1058 | break; | |
1059 | case DNSResponseAction::Action::None: | |
1060 | break; | |
1061 | } | |
8146444b RG |
1062 | } |
1063 | } | |
1064 | ||
1065 | return true; | |
1066 | } | |
1067 | ||
fbe2a2e0 RG |
1068 | static ssize_t udpClientSendRequestToBackend(DownstreamState* ss, const int sd, const char* request, const size_t requestLen) |
1069 | { | |
b58f08e5 RG |
1070 | ssize_t result; |
1071 | ||
fbe2a2e0 | 1072 | if (ss->sourceItf == 0) { |
b58f08e5 RG |
1073 | result = send(sd, request, requestLen, 0); |
1074 | } | |
1075 | else { | |
1076 | struct msghdr msgh; | |
1077 | struct iovec iov; | |
1078 | char cbuf[256]; | |
1079 | fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), const_cast<char*>(request), requestLen, &ss->remote); | |
1080 | addCMsgSrcAddr(&msgh, cbuf, &ss->sourceAddr, ss->sourceItf); | |
1081 | result = sendmsg(sd, &msgh, 0); | |
fbe2a2e0 RG |
1082 | } |
1083 | ||
b58f08e5 RG |
1084 | if (result == -1) { |
1085 | int savederrno = errno; | |
1086 | vinfolog("Error sending request to backend %s: %d", ss->remote.toStringWithPort(), savederrno); | |
1087 | ||
1088 | /* This might sound silly, but on Linux send() might fail with EINVAL | |
1089 | if the interface the socket was bound to doesn't exist anymore. */ | |
1090 | if (savederrno == EINVAL) { | |
1091 | ss->reconnect(); | |
1092 | } | |
fbe2a2e0 RG |
1093 | } |
1094 | ||
b58f08e5 | 1095 | return result; |
fbe2a2e0 RG |
1096 | } |
1097 | ||
0beaa5c8 | 1098 | static bool isUDPQueryAcceptable(ClientState& cs, LocalHolders& holders, const struct msghdr* msgh, const ComboAddress& remote, ComboAddress& dest) |
24d5cb00 | 1099 | { |
0beaa5c8 RG |
1100 | if (msgh->msg_flags & MSG_TRUNC) { |
1101 | /* message was too large for our buffer */ | |
1102 | vinfolog("Dropping message too large for our buffer"); | |
1103 | g_stats.nonCompliantQueries++; | |
1104 | return false; | |
1105 | } | |
a4652d55 | 1106 | |
0beaa5c8 RG |
1107 | if(!holders.acl->match(remote)) { |
1108 | vinfolog("Query from %s dropped because of ACL", remote.toStringWithPort()); | |
1109 | g_stats.aclDrops++; | |
1110 | return false; | |
2b3eefc3 | 1111 | } |
2b3eefc3 | 1112 | |
0beaa5c8 RG |
1113 | cs.queries++; |
1114 | g_stats.queries++; | |
2b3eefc3 | 1115 | |
0beaa5c8 RG |
1116 | if (HarvestDestinationAddress(msgh, &dest)) { |
1117 | /* we don't get the port, only the address */ | |
1118 | dest.sin4.sin_port = cs.local.sin4.sin_port; | |
1119 | } | |
1120 | else { | |
1121 | dest.sin4.sin_family = 0; | |
2b3eefc3 | 1122 | } |
549d63c9 | 1123 | |
0beaa5c8 RG |
1124 | return true; |
1125 | } | |
1126 | ||
11e1e08b | 1127 | #ifdef HAVE_DNSCRYPT |
0beaa5c8 RG |
1128 | static bool checkDNSCryptQuery(const ClientState& cs, const char* query, uint16_t& len, std::shared_ptr<DnsCryptQuery>& dnsCryptQuery, const ComboAddress& dest, const ComboAddress& remote) |
1129 | { | |
1130 | if (cs.dnscryptCtx) { | |
1131 | vector<uint8_t> response; | |
1132 | uint16_t decryptedQueryLen = 0; | |
2b3eefc3 | 1133 | |
0beaa5c8 RG |
1134 | dnsCryptQuery = std::make_shared<DnsCryptQuery>(); |
1135 | ||
1136 | bool decrypted = handleDnsCryptQuery(cs.dnscryptCtx, const_cast<char*>(query), len, dnsCryptQuery, &decryptedQueryLen, false, response); | |
1137 | ||
1138 | if (!decrypted) { | |
1139 | if (response.size() > 0) { | |
1140 | sendUDPResponse(cs.udpFD, reinterpret_cast<char*>(response.data()), static_cast<uint16_t>(response.size()), 0, dest, remote); | |
2b3eefc3 | 1141 | } |
0beaa5c8 RG |
1142 | return false; |
1143 | } | |
2b3eefc3 | 1144 | |
0beaa5c8 RG |
1145 | len = decryptedQueryLen; |
1146 | } | |
1147 | return true; | |
1148 | } | |
1149 | #endif /* HAVE_DNSCRYPT */ | |
2b3eefc3 | 1150 | |
0beaa5c8 RG |
1151 | bool checkQueryHeaders(const struct dnsheader* dh) |
1152 | { | |
1153 | if (dh->qr) { // don't respond to responses | |
1154 | g_stats.nonCompliantQueries++; | |
1155 | return false; | |
1156 | } | |
2b3eefc3 | 1157 | |
0beaa5c8 RG |
1158 | if (dh->qdcount == 0) { |
1159 | g_stats.emptyQueries++; | |
1160 | return false; | |
1161 | } | |
0ba5eecf | 1162 | |
0beaa5c8 RG |
1163 | if (dh->rd) { |
1164 | g_stats.rdQueries++; | |
1165 | } | |
e91084ce | 1166 | |
0beaa5c8 RG |
1167 | return true; |
1168 | } | |
963bef8d | 1169 | |
0beaa5c8 RG |
1170 | #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) |
1171 | static void queueResponse(const ClientState& cs, const char* response, uint16_t responseLen, const ComboAddress& dest, const ComboAddress& remote, struct mmsghdr& outMsg, struct iovec* iov, char* cbuf) | |
1172 | { | |
1173 | outMsg.msg_len = 0; | |
1174 | fillMSGHdr(&outMsg.msg_hdr, iov, nullptr, 0, const_cast<char*>(response), responseLen, const_cast<ComboAddress*>(&remote)); | |
11e1e08b | 1175 | |
0beaa5c8 RG |
1176 | if (dest.sin4.sin_family == 0) { |
1177 | outMsg.msg_hdr.msg_control = nullptr; | |
1178 | } | |
1179 | else { | |
1180 | addCMsgSrcAddr(&outMsg.msg_hdr, cbuf, &dest, 0); | |
1181 | } | |
1182 | } | |
1183 | #endif /* defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) */ | |
43eeadc1 | 1184 | |
0beaa5c8 RG |
1185 | static void processUDPQuery(ClientState& cs, LocalHolders& holders, const struct msghdr* msgh, const ComboAddress& remote, ComboAddress& dest, char* query, uint16_t len, size_t queryBufferSize, struct mmsghdr* responsesVect, unsigned int* queuedResponses, struct iovec* respIOV, char* respCBuf) |
1186 | { | |
1187 | assert(responsesVect == nullptr || (queuedResponses != nullptr && respIOV != nullptr && respCBuf != nullptr)); | |
1188 | uint16_t queryId = 0; | |
2b3eefc3 | 1189 | |
0beaa5c8 RG |
1190 | try { |
1191 | if (!isUDPQueryAcceptable(cs, holders, msgh, remote, dest)) { | |
1192 | return; | |
1193 | } | |
7cea4e39 | 1194 | |
11e1e08b | 1195 | #ifdef HAVE_DNSCRYPT |
0beaa5c8 | 1196 | std::shared_ptr<DnsCryptQuery> dnsCryptQuery = nullptr; |
11e1e08b | 1197 | |
0beaa5c8 RG |
1198 | if (!checkDNSCryptQuery(cs, query, len, dnsCryptQuery, dest, remote)) { |
1199 | return; | |
1200 | } | |
11e1e08b RG |
1201 | #endif |
1202 | ||
0beaa5c8 RG |
1203 | struct dnsheader* dh = reinterpret_cast<struct dnsheader*>(query); |
1204 | queryId = ntohs(dh->id); | |
11e1e08b | 1205 | |
0beaa5c8 RG |
1206 | if (!checkQueryHeaders(dh)) { |
1207 | return; | |
1208 | } | |
2efd427d | 1209 | |
0beaa5c8 RG |
1210 | const uint16_t * flags = getFlagsFromDNSHeader(dh); |
1211 | const uint16_t origFlags = *flags; | |
1212 | uint16_t qtype, qclass; | |
1213 | unsigned int consumed = 0; | |
1214 | DNSName qname(query, len, sizeof(dnsheader), false, &qtype, &qclass, &consumed); | |
1215 | DNSQuestion dq(&qname, qtype, qclass, dest.sin4.sin_family != 0 ? &dest : &cs.local, &remote, dh, queryBufferSize, len, false); | |
11e1e08b | 1216 | |
0beaa5c8 RG |
1217 | string poolname; |
1218 | int delayMsec = 0; | |
1219 | /* we need an accurate ("real") value for the response and | |
1220 | to store into the IDS, but not for insertion into the | |
1221 | rings for example */ | |
1222 | struct timespec realTime; | |
1223 | struct timespec now; | |
1224 | gettime(&now); | |
1225 | gettime(&realTime, true); | |
1226 | ||
1227 | if (!processQuery(holders, dq, poolname, &delayMsec, now)) | |
1228 | { | |
1229 | return; | |
1230 | } | |
b63add03 | 1231 | |
0beaa5c8 RG |
1232 | if(dq.dh->qr) { // something turned it into a response |
1233 | g_stats.selfAnswered++; | |
1234 | restoreFlags(dh, origFlags); | |
1235 | ||
1236 | if (!cs.muted) { | |
11e1e08b | 1237 | char* response = query; |
497a6e3a | 1238 | uint16_t responseLen = dq.len; |
11e1e08b | 1239 | |
fcffc585 | 1240 | #ifdef HAVE_DNSCRYPT |
0beaa5c8 RG |
1241 | if (!encryptResponse(response, &responseLen, dq.size, false, dnsCryptQuery, nullptr, nullptr)) { |
1242 | return; | |
1243 | } | |
11e1e08b | 1244 | #endif |
0beaa5c8 RG |
1245 | #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) |
1246 | if (delayMsec == 0 && responsesVect != nullptr) { | |
1247 | queueResponse(cs, response, responseLen, dest, remote, responsesVect[*queuedResponses], respIOV, respCBuf); | |
1248 | (*queuedResponses)++; | |
1249 | } | |
1250 | else | |
1251 | #endif /* defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) */ | |
1252 | { | |
1253 | sendUDPResponse(cs.udpFD, response, responseLen, delayMsec, dest, remote); | |
b5b93e0b | 1254 | } |
22b2b326 | 1255 | } |
5f504638 | 1256 | |
0beaa5c8 RG |
1257 | return; |
1258 | } | |
1259 | ||
1260 | DownstreamState* ss = nullptr; | |
1261 | std::shared_ptr<ServerPool> serverPool = getPool(*holders.pools, poolname); | |
1262 | std::shared_ptr<DNSDistPacketCache> packetCache = nullptr; | |
1263 | auto policy = holders.policy->policy; | |
1264 | if (serverPool->policy != nullptr) { | |
1265 | policy = serverPool->policy->policy; | |
1266 | } | |
1267 | { | |
1268 | std::lock_guard<std::mutex> lock(g_luamutex); | |
1269 | ss = policy(serverPool->servers, &dq).get(); | |
1270 | packetCache = serverPool->packetCache; | |
1271 | } | |
228e4fe8 | 1272 | |
0beaa5c8 RG |
1273 | bool ednsAdded = false; |
1274 | bool ecsAdded = false; | |
1275 | if (dq.useECS && ss && ss->useECS) { | |
1276 | if (!handleEDNSClientSubnet(query, dq.size, consumed, &dq.len, &(ednsAdded), &(ecsAdded), remote, dq.ecsOverride, dq.ecsPrefixLength)) { | |
126dcf66 | 1277 | vinfolog("Dropping query from %s because we couldn't insert the ECS value", remote.toStringWithPort()); |
0beaa5c8 | 1278 | return; |
886e2cf2 | 1279 | } |
0beaa5c8 | 1280 | } |
886e2cf2 | 1281 | |
0beaa5c8 RG |
1282 | uint32_t cacheKey = 0; |
1283 | if (packetCache && !dq.skipCache) { | |
1284 | uint16_t cachedResponseSize = dq.size; | |
1285 | uint32_t allowExpired = ss ? 0 : g_staleCacheEntriesTTL; | |
1286 | if (packetCache->get(dq, consumed, dh->id, query, &cachedResponseSize, &cacheKey, allowExpired)) { | |
1287 | DNSResponse dr(dq.qname, dq.qtype, dq.qclass, dq.local, dq.remote, reinterpret_cast<dnsheader*>(query), dq.size, cachedResponseSize, false, &realTime); | |
cf48b0ce | 1288 | #ifdef HAVE_PROTOBUF |
0beaa5c8 | 1289 | dr.uniqueId = dq.uniqueId; |
cf48b0ce | 1290 | #endif |
a76b0d63 RG |
1291 | dr.qTag = dq.qTag; |
1292 | ||
0beaa5c8 RG |
1293 | if (!processResponse(holders.cacheHitRespRulactions, dr, &delayMsec)) { |
1294 | return; | |
1295 | } | |
cf48b0ce | 1296 | |
0beaa5c8 | 1297 | if (!cs.muted) { |
fcffc585 | 1298 | #ifdef HAVE_DNSCRYPT |
0beaa5c8 RG |
1299 | if (!encryptResponse(query, &cachedResponseSize, dq.size, false, dnsCryptQuery, nullptr, nullptr)) { |
1300 | return; | |
1301 | } | |
fcffc585 | 1302 | #endif |
0beaa5c8 RG |
1303 | #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) |
1304 | if (delayMsec == 0 && responsesVect != nullptr) { | |
1305 | queueResponse(cs, query, cachedResponseSize, dest, remote, responsesVect[*queuedResponses], respIOV, respCBuf); | |
1306 | (*queuedResponses)++; | |
1307 | } | |
1308 | else | |
1309 | #endif /* defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) */ | |
1310 | { | |
1311 | sendUDPResponse(cs.udpFD, query, cachedResponseSize, delayMsec, dest, remote); | |
b5b93e0b | 1312 | } |
886e2cf2 | 1313 | } |
0beaa5c8 RG |
1314 | |
1315 | g_stats.cacheHits++; | |
1316 | g_stats.latency0_1++; // we're not going to measure this | |
1317 | doLatencyAverages(0); // same | |
1318 | return; | |
886e2cf2 | 1319 | } |
0beaa5c8 RG |
1320 | g_stats.cacheMisses++; |
1321 | } | |
886e2cf2 | 1322 | |
0beaa5c8 RG |
1323 | if(!ss) { |
1324 | g_stats.noPolicy++; | |
26a3cdb7 | 1325 | |
0beaa5c8 RG |
1326 | if (g_servFailOnNoPolicy && !cs.muted) { |
1327 | char* response = query; | |
1328 | uint16_t responseLen = dq.len; | |
1329 | restoreFlags(dh, origFlags); | |
26a3cdb7 | 1330 | |
0beaa5c8 RG |
1331 | dq.dh->rcode = RCode::ServFail; |
1332 | dq.dh->qr = true; | |
26a3cdb7 RG |
1333 | |
1334 | #ifdef HAVE_DNSCRYPT | |
0beaa5c8 RG |
1335 | if (!encryptResponse(response, &responseLen, dq.size, false, dnsCryptQuery, nullptr, nullptr)) { |
1336 | return; | |
1337 | } | |
26a3cdb7 | 1338 | #endif |
0beaa5c8 RG |
1339 | #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) |
1340 | if (responsesVect != nullptr) { | |
1341 | queueResponse(cs, response, responseLen, dest, remote, responsesVect[*queuedResponses], respIOV, respCBuf); | |
1342 | (*queuedResponses)++; | |
1343 | } | |
1344 | else | |
1345 | #endif /* defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) */ | |
1346 | { | |
1347 | sendUDPResponse(cs.udpFD, response, responseLen, 0, dest, remote); | |
26a3cdb7 | 1348 | } |
1ea747c0 | 1349 | } |
b9c3c5df | 1350 | vinfolog("%s query for %s|%s from %s, no policy applied", g_servFailOnNoPolicy ? "ServFailed" : "Dropped", dq.qname->toString(), QType(dq.qtype).getName(), remote.toStringWithPort()); |
0beaa5c8 RG |
1351 | return; |
1352 | } | |
1ea747c0 | 1353 | |
0beaa5c8 | 1354 | ss->queries++; |
11e1e08b | 1355 | |
0beaa5c8 RG |
1356 | unsigned int idOffset = (ss->idOffset++) % ss->idStates.size(); |
1357 | IDState* ids = &ss->idStates[idOffset]; | |
1358 | ids->age = 0; | |
c9ba8478 | 1359 | |
0beaa5c8 RG |
1360 | if(ids->origFD < 0) // if we are reusing, no change in outstanding |
1361 | ss->outstanding++; | |
1362 | else { | |
1363 | ss->reuseds++; | |
1364 | g_stats.downstreamTimeouts++; | |
1365 | } | |
0e41337b | 1366 | |
0beaa5c8 RG |
1367 | ids->cs = &cs; |
1368 | ids->origFD = cs.udpFD; | |
1369 | ids->origID = dh->id; | |
1370 | ids->origRemote = remote; | |
1371 | ids->sentTime.set(realTime); | |
1372 | ids->qname = qname; | |
1373 | ids->qtype = dq.qtype; | |
1374 | ids->qclass = dq.qclass; | |
1375 | ids->delayMsec = delayMsec; | |
1376 | ids->origFlags = origFlags; | |
1377 | ids->cacheKey = cacheKey; | |
1378 | ids->skipCache = dq.skipCache; | |
1379 | ids->packetCache = packetCache; | |
1380 | ids->ednsAdded = ednsAdded; | |
1381 | ids->ecsAdded = ecsAdded; | |
a76b0d63 | 1382 | ids->qTag = dq.qTag; |
0beaa5c8 RG |
1383 | |
1384 | /* If we couldn't harvest the real dest addr, still | |
1385 | write down the listening addr since it will be useful | |
1386 | (especially if it's not an 'any' one). | |
1387 | We need to keep track of which one it is since we may | |
1388 | want to use the real but not the listening addr to reply. | |
1389 | */ | |
1390 | if (dest.sin4.sin_family != 0) { | |
1391 | ids->origDest = dest; | |
1392 | ids->destHarvested = true; | |
1393 | } | |
1394 | else { | |
1395 | ids->origDest = cs.local; | |
1396 | ids->destHarvested = false; | |
1397 | } | |
11e1e08b | 1398 | #ifdef HAVE_DNSCRYPT |
0beaa5c8 | 1399 | ids->dnsCryptQuery = dnsCryptQuery; |
d8c19b98 RG |
1400 | #endif |
1401 | #ifdef HAVE_PROTOBUF | |
0beaa5c8 | 1402 | ids->uniqueId = dq.uniqueId; |
11e1e08b | 1403 | #endif |
c9ba8478 | 1404 | |
0beaa5c8 | 1405 | dh->id = idOffset; |
ca404e94 | 1406 | |
0beaa5c8 | 1407 | ssize_t ret = udpClientSendRequestToBackend(ss, ss->fd, query, dq.len); |
11e1e08b | 1408 | |
0beaa5c8 RG |
1409 | if(ret < 0) { |
1410 | ss->sendErrors++; | |
1411 | g_stats.downstreamSendErrors++; | |
1412 | } | |
ca404e94 | 1413 | |
0beaa5c8 RG |
1414 | vinfolog("Got query for %s|%s from %s, relayed to %s", ids->qname.toString(), QType(ids->qtype).getName(), remote.toStringWithPort(), ss->getName()); |
1415 | } | |
1416 | catch(const std::exception& e){ | |
1417 | vinfolog("Got an error in UDP question thread while parsing a query from %s, id %d: %s", remote.toStringWithPort(), queryId, e.what()); | |
1418 | } | |
1419 | } | |
1420 | ||
1421 | #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) | |
1422 | static void MultipleMessagesUDPClientThread(ClientState* cs, LocalHolders& holders) | |
1423 | { | |
1424 | struct MMReceiver | |
1425 | { | |
1426 | char packet[4096]; | |
1427 | /* used by HarvestDestinationAddress */ | |
1428 | char cbuf[256]; | |
1429 | ComboAddress remote; | |
1430 | ComboAddress dest; | |
1431 | struct iovec iov; | |
1432 | }; | |
1433 | const size_t vectSize = g_udpVectorSize; | |
1434 | /* the actual buffer is larger because: | |
1435 | - we may have to add EDNS and/or ECS | |
1436 | - we use it for self-generated responses (from rule or cache) | |
1437 | but we only accept incoming payloads up to that size | |
1438 | */ | |
1439 | static_assert(s_udpIncomingBufferSize <= sizeof(MMReceiver::packet), "the incoming buffer size should not be larger than sizeof(MMReceiver::packet)"); | |
1440 | ||
1441 | auto recvData = std::unique_ptr<MMReceiver[]>(new MMReceiver[vectSize]); | |
1442 | auto msgVec = std::unique_ptr<struct mmsghdr[]>(new struct mmsghdr[vectSize]); | |
1443 | auto outMsgVec = std::unique_ptr<struct mmsghdr[]>(new struct mmsghdr[vectSize]); | |
1444 | ||
1445 | /* initialize the structures needed to receive our messages */ | |
1446 | for (size_t idx = 0; idx < vectSize; idx++) { | |
1447 | recvData[idx].remote.sin4.sin_family = cs->local.sin4.sin_family; | |
1448 | fillMSGHdr(&msgVec[idx].msg_hdr, &recvData[idx].iov, recvData[idx].cbuf, sizeof(recvData[idx].cbuf), recvData[idx].packet, s_udpIncomingBufferSize, &recvData[idx].remote); | |
1449 | } | |
1450 | ||
1451 | /* go now */ | |
1452 | for(;;) { | |
1453 | ||
1454 | /* reset the IO vector, since it's also used to send the vector of responses | |
1455 | to avoid having to copy the data around */ | |
1456 | for (size_t idx = 0; idx < vectSize; idx++) { | |
1457 | recvData[idx].iov.iov_base = recvData[idx].packet; | |
1458 | recvData[idx].iov.iov_len = sizeof(recvData[idx].packet); | |
5f504638 | 1459 | } |
0beaa5c8 RG |
1460 | |
1461 | /* block until we have at least one message ready, but return | |
1462 | as many as possible to save the syscall costs */ | |
1463 | int msgsGot = recvmmsg(cs->udpFD, msgVec.get(), vectSize, MSG_WAITFORONE | MSG_TRUNC, nullptr); | |
1464 | ||
1465 | if (msgsGot <= 0) { | |
1466 | vinfolog("Getting UDP messages via recvmmsg() failed with: %s", strerror(errno)); | |
1467 | continue; | |
773470ca | 1468 | } |
0beaa5c8 RG |
1469 | |
1470 | unsigned int msgsToSend = 0; | |
1471 | ||
1472 | /* process the received messages */ | |
1473 | for (int msgIdx = 0; msgIdx < msgsGot; msgIdx++) { | |
1474 | const struct msghdr* msgh = &msgVec[msgIdx].msg_hdr; | |
1475 | unsigned int got = msgVec[msgIdx].msg_len; | |
1476 | const ComboAddress& remote = recvData[msgIdx].remote; | |
1477 | ||
1478 | if (got < sizeof(struct dnsheader)) { | |
1479 | g_stats.nonCompliantQueries++; | |
1480 | continue; | |
1481 | } | |
1482 | ||
1483 | processUDPQuery(*cs, holders, msgh, remote, recvData[msgIdx].dest, recvData[msgIdx].packet, static_cast<uint16_t>(got), sizeof(recvData[msgIdx].packet), outMsgVec.get(), &msgsToSend, &recvData[msgIdx].iov, recvData[msgIdx].cbuf); | |
1484 | ||
2b3eefc3 | 1485 | } |
0beaa5c8 RG |
1486 | |
1487 | /* immediate (not delayed or sent to a backend) responses (mostly from a rule, dynamic block | |
1488 | or the cache) can be sent in batch too */ | |
1489 | ||
1490 | if (msgsToSend > 0 && msgsToSend <= static_cast<unsigned int>(msgsGot)) { | |
1491 | int sent = sendmmsg(cs->udpFD, outMsgVec.get(), msgsToSend, 0); | |
1492 | ||
2b3eefc3 | 1493 | if (sent < 0 || static_cast<unsigned int>(sent) != msgsToSend) { |
0beaa5c8 | 1494 | vinfolog("Error sending responses with sendmmsg() (%d on %u): %s", sent, msgsToSend, strerror(errno)); |
2b3eefc3 | 1495 | } |
2b3eefc3 | 1496 | } |
0beaa5c8 | 1497 | |
24d5cb00 | 1498 | } |
0beaa5c8 RG |
1499 | } |
1500 | #endif /* defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) */ | |
1501 | ||
1502 | // listens to incoming queries, sends out to downstream servers, noting the intended return path | |
1503 | static void* udpClientThread(ClientState* cs) | |
1504 | try | |
1505 | { | |
1506 | LocalHolders holders; | |
1507 | ||
1508 | #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) | |
1509 | if (g_udpVectorSize > 1) { | |
1510 | MultipleMessagesUDPClientThread(cs, holders); | |
1511 | ||
1512 | } | |
1513 | else | |
1514 | #endif /* defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) */ | |
1515 | { | |
1516 | char packet[4096]; | |
1517 | /* the actual buffer is larger because: | |
1518 | - we may have to add EDNS and/or ECS | |
1519 | - we use it for self-generated responses (from rule or cache) | |
1520 | but we only accept incoming payloads up to that size | |
1521 | */ | |
1522 | static_assert(s_udpIncomingBufferSize <= sizeof(packet), "the incoming buffer size should not be larger than sizeof(MMReceiver::packet)"); | |
1523 | struct msghdr msgh; | |
1524 | struct iovec iov; | |
1525 | /* used by HarvestDestinationAddress */ | |
1526 | char cbuf[256]; | |
1527 | ||
1528 | ComboAddress remote; | |
1529 | ComboAddress dest; | |
1530 | remote.sin4.sin_family = cs->local.sin4.sin_family; | |
1531 | fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), packet, sizeof(packet), &remote); | |
1532 | ||
1533 | for(;;) { | |
1534 | ssize_t got = recvmsg(cs->udpFD, &msgh, 0); | |
1535 | ||
1536 | if (got < 0 || static_cast<size_t>(got) < sizeof(struct dnsheader)) { | |
1537 | g_stats.nonCompliantQueries++; | |
1538 | continue; | |
1539 | } | |
1540 | ||
1541 | processUDPQuery(*cs, holders, &msgh, remote, dest, packet, static_cast<uint16_t>(got), s_udpIncomingBufferSize, nullptr, nullptr, nullptr, nullptr); | |
1542 | } | |
1543 | } | |
1544 | ||
1545 | return nullptr; | |
24d5cb00 | 1546 | } |
2b3eefc3 | 1547 | catch(const std::exception &e) |
a4652d55 | 1548 | { |
1549 | errlog("UDP client thread died because of exception: %s", e.what()); | |
0beaa5c8 | 1550 | return nullptr; |
a4652d55 | 1551 | } |
2b3eefc3 | 1552 | catch(const PDNSException &e) |
a4652d55 | 1553 | { |
1554 | errlog("UDP client thread died because of PowerDNS exception: %s", e.reason); | |
0beaa5c8 | 1555 | return nullptr; |
a4652d55 | 1556 | } |
1557 | catch(...) | |
1558 | { | |
1559 | errlog("UDP client thread died because of an exception: %s", "unknown"); | |
0beaa5c8 | 1560 | return nullptr; |
a4652d55 | 1561 | } |
24d5cb00 | 1562 | |
fbe2a2e0 | 1563 | static bool upCheck(DownstreamState& ds) |
773470ca | 1564 | try |
1565 | { | |
1566 | vector<uint8_t> packet; | |
de9f7157 | 1567 | DNSPacketWriter dpw(packet, ds.checkName, ds.checkType.getCode(), ds.checkClass); |
fc01e0b1 RG |
1568 | dnsheader * requestHeader = dpw.getHeader(); |
1569 | requestHeader->rd=true; | |
21830638 RG |
1570 | if (ds.setCD) { |
1571 | requestHeader->cd = true; | |
1572 | } | |
773470ca | 1573 | |
fbe2a2e0 | 1574 | Socket sock(ds.remote.sin4.sin_family, SOCK_DGRAM); |
773470ca | 1575 | sock.setNonBlocking(); |
fbe2a2e0 RG |
1576 | if (!IsAnyAddress(ds.sourceAddr)) { |
1577 | sock.setReuseAddr(); | |
1578 | sock.bind(ds.sourceAddr); | |
1579 | } | |
1580 | sock.connect(ds.remote); | |
1581 | ssize_t sent = udpClientSendRequestToBackend(&ds, sock.getHandle(), (char*)&packet[0], packet.size()); | |
9e87dcb8 RG |
1582 | if (sent < 0) { |
1583 | int ret = errno; | |
1584 | if (g_verboseHealthChecks) | |
fd23d9bb | 1585 | infolog("Error while sending a health check query to backend %s: %d", ds.getNameWithAddr(), ret); |
fbe2a2e0 | 1586 | return false; |
9e87dcb8 | 1587 | } |
fbe2a2e0 | 1588 | |
773470ca | 1589 | int ret=waitForRWData(sock.getHandle(), true, 1, 0); |
9e87dcb8 RG |
1590 | if(ret < 0 || !ret) { // error, timeout, both are down! |
1591 | if (ret < 0) { | |
1592 | ret = errno; | |
1593 | if (g_verboseHealthChecks) | |
fd23d9bb | 1594 | infolog("Error while waiting for the health check response from backend %s: %d", ds.getNameWithAddr(), ret); |
9e87dcb8 RG |
1595 | } |
1596 | else { | |
1597 | if (g_verboseHealthChecks) | |
fd23d9bb | 1598 | infolog("Timeout while waiting for the health check response from backend %s", ds.getNameWithAddr()); |
9e87dcb8 | 1599 | } |
773470ca | 1600 | return false; |
9e87dcb8 RG |
1601 | } |
1602 | ||
773470ca | 1603 | string reply; |
fbe2a2e0 | 1604 | sock.recvFrom(reply, ds.remote); |
773470ca | 1605 | |
231d3b6b | 1606 | const dnsheader * responseHeader = (const dnsheader *) reply.c_str(); |
fc01e0b1 | 1607 | |
9e87dcb8 RG |
1608 | if (reply.size() < sizeof(*responseHeader)) { |
1609 | if (g_verboseHealthChecks) | |
fd23d9bb | 1610 | infolog("Invalid health check response of size %d from backend %s, expecting at least %d", reply.size(), ds.getNameWithAddr(), sizeof(*responseHeader)); |
fc01e0b1 | 1611 | return false; |
9e87dcb8 | 1612 | } |
fc01e0b1 | 1613 | |
9e87dcb8 RG |
1614 | if (responseHeader->id != requestHeader->id) { |
1615 | if (g_verboseHealthChecks) | |
fd23d9bb | 1616 | infolog("Invalid health check response id %d from backend %s, expecting %d", responseHeader->id, ds.getNameWithAddr(), requestHeader->id); |
fc01e0b1 | 1617 | return false; |
9e87dcb8 RG |
1618 | } |
1619 | ||
1620 | if (!responseHeader->qr) { | |
1621 | if (g_verboseHealthChecks) | |
fd23d9bb | 1622 | infolog("Invalid health check response from backend %s, expecting QR to be set", ds.getNameWithAddr()); |
fc01e0b1 | 1623 | return false; |
9e87dcb8 RG |
1624 | } |
1625 | ||
1626 | if (responseHeader->rcode == RCode::ServFail) { | |
1627 | if (g_verboseHealthChecks) | |
fd23d9bb | 1628 | infolog("Backend %s responded to health check with ServFail", ds.getNameWithAddr()); |
fc01e0b1 | 1629 | return false; |
9e87dcb8 RG |
1630 | } |
1631 | ||
1632 | if (ds.mustResolve && (responseHeader->rcode == RCode::NXDomain || responseHeader->rcode == RCode::Refused)) { | |
1633 | if (g_verboseHealthChecks) | |
fd23d9bb | 1634 | infolog("Backend %s responded to health check with %s while mustResolve is set", ds.getNameWithAddr(), responseHeader->rcode == RCode::NXDomain ? "NXDomain" : "Refused"); |
a6e02424 | 1635 | return false; |
9e87dcb8 | 1636 | } |
fc01e0b1 | 1637 | |
773470ca | 1638 | // XXX fixme do bunch of checking here etc |
1639 | return true; | |
1640 | } | |
9e87dcb8 RG |
1641 | catch(const std::exception& e) |
1642 | { | |
1643 | if (g_verboseHealthChecks) | |
fd23d9bb | 1644 | infolog("Error checking the health of backend %s: %s", ds.getNameWithAddr(), e.what()); |
9e87dcb8 RG |
1645 | return false; |
1646 | } | |
773470ca | 1647 | catch(...) |
1648 | { | |
9e87dcb8 | 1649 | if (g_verboseHealthChecks) |
fd23d9bb | 1650 | infolog("Unknown exception while checking the health of backend %s", ds.getNameWithAddr()); |
773470ca | 1651 | return false; |
1652 | } | |
726ddf60 | 1653 | |
6c1ca990 | 1654 | uint64_t g_maxTCPClientThreads{10}; |
886e2cf2 | 1655 | std::atomic<uint16_t> g_cacheCleaningDelay{60}; |
f65ea0c2 | 1656 | std::atomic<uint16_t> g_cacheCleaningPercentage{100}; |
e41f8165 | 1657 | |
3c115e0f | 1658 | void* maintThread() |
886e2cf2 RG |
1659 | { |
1660 | int interval = 1; | |
1661 | size_t counter = 0; | |
5f3ea719 | 1662 | int32_t secondsToWaitLog = 0; |
886e2cf2 RG |
1663 | |
1664 | for(;;) { | |
1665 | sleep(interval); | |
1666 | ||
069e59db | 1667 | { |
1668 | std::lock_guard<std::mutex> lock(g_luamutex); | |
5f3ea719 PL |
1669 | auto f = g_lua.readVariable<boost::optional<std::function<void()> > >("maintenance"); |
1670 | if(f) { | |
1671 | try { | |
1672 | (*f)(); | |
1673 | secondsToWaitLog = 0; | |
1674 | } | |
1675 | catch(std::exception &e) { | |
1676 | if (secondsToWaitLog <= 0) { | |
1677 | infolog("Error during execution of maintenance function: %s", e.what()); | |
1678 | secondsToWaitLog = 61; | |
1679 | } | |
1680 | secondsToWaitLog -= interval; | |
1681 | } | |
1682 | } | |
069e59db | 1683 | } |
886e2cf2 RG |
1684 | |
1685 | counter++; | |
1686 | if (counter >= g_cacheCleaningDelay) { | |
1687 | const auto localPools = g_pools.getCopy(); | |
e6557ec8 | 1688 | std::shared_ptr<DNSDistPacketCache> packetCache = nullptr; |
886e2cf2 | 1689 | for (const auto& entry : localPools) { |
e6557ec8 RG |
1690 | { |
1691 | std::lock_guard<std::mutex> lock(g_luamutex); | |
1692 | packetCache = entry.second->packetCache; | |
1693 | } | |
1694 | if (packetCache) { | |
f65ea0c2 RG |
1695 | size_t upTo = (packetCache->getMaxEntries()* (100 - g_cacheCleaningPercentage)) / 100; |
1696 | packetCache->purgeExpired(upTo); | |
886e2cf2 RG |
1697 | } |
1698 | } | |
1699 | counter = 0; | |
1700 | } | |
1701 | ||
1702 | // ponder pruning g_dynblocks of expired entries here | |
1703 | } | |
1704 | return 0; | |
1705 | } | |
1706 | ||
1707 | void* healthChecksThread() | |
3ae86514 | 1708 | { |
fcadd56e | 1709 | int interval = 1; |
64e4ebb4 | 1710 | |
3ae86514 | 1711 | for(;;) { |
1712 | sleep(interval); | |
773470ca | 1713 | |
ded1985a | 1714 | if(g_tcpclientthreads->getQueuedCount() > 1 && !g_tcpclientthreads->hasReachedMaxThreads()) |
a9bf3ec4 | 1715 | g_tcpclientthreads->addTCPClientThread(); |
3ae86514 | 1716 | |
e5a14b2b | 1717 | for(auto& dss : g_dstates.getCopy()) { // this points to the actual shared_ptrs! |
773470ca | 1718 | if(dss->availability==DownstreamState::Availability::Auto) { |
2993d58d | 1719 | bool newState=upCheck(*dss); |
1720 | if (newState) { | |
1721 | if (dss->currentCheckFailures != 0) { | |
1722 | dss->currentCheckFailures = 0; | |
1723 | } | |
1724 | } | |
1725 | else if (!newState && dss->upStatus) { | |
1726 | dss->currentCheckFailures++; | |
1727 | if (dss->currentCheckFailures < dss->maxCheckFailures) { | |
1728 | newState = true; | |
1729 | } | |
1730 | } | |
1731 | ||
1732 | if(newState != dss->upStatus) { | |
1733 | warnlog("Marking downstream %s as '%s'", dss->getNameWithAddr(), newState ? "up" : "down"); | |
7565f4e6 RG |
1734 | |
1735 | if (newState && !dss->connected) { | |
1736 | try { | |
1737 | SConnect(dss->fd, dss->remote); | |
1738 | dss->connected = true; | |
2717a92f | 1739 | dss->tid = thread(responderThread, dss); |
7565f4e6 RG |
1740 | } |
1741 | catch(const std::runtime_error& error) { | |
1742 | infolog("Error connecting to new server with address %s: %s", dss->remote.toStringWithPort(), error.what()); | |
b58f08e5 RG |
1743 | newState = false; |
1744 | dss->connected = false; | |
7565f4e6 RG |
1745 | } |
1746 | } | |
1747 | ||
2993d58d | 1748 | dss->upStatus = newState; |
1749 | dss->currentCheckFailures = 0; | |
9f4eb5cc RG |
1750 | if (g_snmpAgent && g_snmpTrapsEnabled) { |
1751 | g_snmpAgent->sendBackendStatusChangeTrap(dss); | |
1752 | } | |
2993d58d | 1753 | } |
773470ca | 1754 | } |
b076b34a | 1755 | |
773470ca | 1756 | auto delta = dss->sw.udiffAndSet()/1000000.0; |
1757 | dss->queryLoad = 1.0*(dss->queries.load() - dss->prev.queries.load())/delta; | |
1758 | dss->dropRate = 1.0*(dss->reuseds.load() - dss->prev.reuseds.load())/delta; | |
3c115e0f | 1759 | dss->prev.queries.store(dss->queries.load()); |
773470ca | 1760 | dss->prev.reuseds.store(dss->reuseds.load()); |
3c115e0f | 1761 | |
773470ca | 1762 | for(IDState& ids : dss->idStates) { // timeouts |
e0b5e49d | 1763 | if(ids.origFD >=0 && ids.age++ > g_udpTimeout) { |
51642fe3 RG |
1764 | /* We set origFD to -1 as soon as possible |
1765 | to limit the risk of racing with the | |
1766 | responder thread. | |
1767 | The UDP client thread only checks origFD to | |
1768 | know whether outstanding has to be incremented, | |
1769 | so the sooner the better any way since we _will_ | |
1770 | decrement it. | |
1771 | */ | |
1772 | ids.origFD = -1; | |
64e4ebb4 | 1773 | ids.age = 0; |
3c115e0f | 1774 | dss->reuseds++; |
1775 | --dss->outstanding; | |
51642fe3 | 1776 | g_stats.downstreamTimeouts++; // this is an 'actively' discovered timeout |
c08a5092 | 1777 | vinfolog("Had a downstream timeout from %s (%s) for query for %s|%s from %s", |
1778 | dss->remote.toStringWithPort(), dss->name, | |
1779 | ids.qname.toString(), QType(ids.qtype).getName(), ids.origRemote.toStringWithPort()); | |
51642fe3 | 1780 | |
c2fbeb27 | 1781 | struct timespec ts; |
85c7ca75 | 1782 | gettime(&ts); |
c2fbeb27 | 1783 | |
2d11d1b2 | 1784 | struct dnsheader fake; |
1785 | memset(&fake, 0, sizeof(fake)); | |
1786 | fake.id = ids.origID; | |
c2fbeb27 | 1787 | |
1788 | std::lock_guard<std::mutex> lock(g_rings.respMutex); | |
1789 | g_rings.respRing.push_back({ts, ids.origRemote, ids.qname, ids.qtype, std::numeric_limits<unsigned int>::max(), 0, fake, dss->remote}); | |
232f0877 | 1790 | } |
3ae86514 | 1791 | } |
1792 | } | |
3ae86514 | 1793 | } |
1794 | return 0; | |
1795 | } | |
1796 | ||
8c732ebf | 1797 | string g_key; |
3ae86514 | 1798 | |
6d01c80c | 1799 | |
1800 | void controlThread(int fd, ComboAddress local) | |
1801 | try | |
1802 | { | |
1803 | ComboAddress client; | |
1804 | int sock; | |
1805 | warnlog("Accepting control connections on %s", local.toStringWithPort()); | |
1806 | while((sock=SAccept(fd, client)) >= 0) { | |
506bb661 RG |
1807 | if (g_logConsoleConnections) { |
1808 | warnlog("Got control connection from %s", client.toStringWithPort()); | |
1809 | } | |
1810 | ||
6d01c80c | 1811 | thread t(controlClientThread, sock, client); |
1812 | t.detach(); | |
1813 | } | |
1814 | } | |
1815 | catch(std::exception& e) | |
1816 | { | |
1817 | close(fd); | |
1818 | errlog("Control connection died: %s", e.what()); | |
1819 | } | |
3f25bce6 | 1820 | |
b076b34a | 1821 | |
3c115e0f | 1822 | |
c2a42f97 | 1823 | static void bindAny(int af, int sock) |
1824 | { | |
18f8e493 | 1825 | __attribute__((unused)) int one = 1; |
c2a42f97 | 1826 | |
1827 | #ifdef IP_FREEBIND | |
1828 | if (setsockopt(sock, IPPROTO_IP, IP_FREEBIND, &one, sizeof(one)) < 0) | |
1829 | warnlog("Warning: IP_FREEBIND setsockopt failed: %s", strerror(errno)); | |
1830 | #endif | |
1831 | ||
1832 | #ifdef IP_BINDANY | |
1833 | if (af == AF_INET) | |
1834 | if (setsockopt(sock, IPPROTO_IP, IP_BINDANY, &one, sizeof(one)) < 0) | |
1835 | warnlog("Warning: IP_BINDANY setsockopt failed: %s", strerror(errno)); | |
1836 | #endif | |
1837 | #ifdef IPV6_BINDANY | |
1838 | if (af == AF_INET6) | |
1839 | if (setsockopt(sock, IPPROTO_IPV6, IPV6_BINDANY, &one, sizeof(one)) < 0) | |
1840 | warnlog("Warning: IPV6_BINDANY setsockopt failed: %s", strerror(errno)); | |
1841 | #endif | |
1842 | #ifdef SO_BINDANY | |
1843 | if (setsockopt(sock, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) < 0) | |
1844 | warnlog("Warning: SO_BINDANY setsockopt failed: %s", strerror(errno)); | |
1845 | #endif | |
1846 | } | |
1847 | ||
a36ce055 RG |
1848 | static void dropGroupPrivs(gid_t gid) |
1849 | { | |
1850 | if (gid) { | |
1851 | if (setgid(gid) == 0) { | |
1852 | if (setgroups(0, NULL) < 0) { | |
1853 | warnlog("Warning: Unable to drop supplementary gids: %s", strerror(errno)); | |
1854 | } | |
1855 | } | |
1856 | else { | |
1857 | warnlog("Warning: Unable to set group ID to %d: %s", gid, strerror(errno)); | |
1858 | } | |
1859 | } | |
1860 | } | |
1861 | ||
1862 | static void dropUserPrivs(uid_t uid) | |
1863 | { | |
1864 | if(uid) { | |
1865 | if(setuid(uid) < 0) { | |
1866 | warnlog("Warning: Unable to set user ID to %d: %s", uid, strerror(errno)); | |
1867 | } | |
1868 | } | |
1869 | } | |
1870 | ||
41408d3a RG |
1871 | static void checkFileDescriptorsLimits(size_t udpBindsCount, size_t tcpBindsCount) |
1872 | { | |
1873 | /* stdin, stdout, stderr */ | |
1874 | size_t requiredFDsCount = 3; | |
1875 | size_t backendsCount = g_dstates.getCopy().size(); | |
9fcd6adb | 1876 | /* listening sockets */ |
41408d3a RG |
1877 | requiredFDsCount += udpBindsCount; |
1878 | requiredFDsCount += tcpBindsCount; | |
1879 | /* max TCP connections currently served */ | |
1880 | requiredFDsCount += g_maxTCPClientThreads; | |
f2e29d04 | 1881 | /* max pipes for communicating between TCP acceptors and client threads */ |
41408d3a RG |
1882 | requiredFDsCount += (g_maxTCPClientThreads * 2); |
1883 | /* UDP sockets to backends */ | |
1884 | requiredFDsCount += backendsCount; | |
1885 | /* TCP sockets to backends */ | |
1886 | requiredFDsCount += (backendsCount * g_maxTCPClientThreads); | |
1887 | /* max TCP queued connections */ | |
9fcd6adb | 1888 | requiredFDsCount += g_maxTCPQueuedConnections; |
41408d3a RG |
1889 | /* DelayPipe pipe */ |
1890 | requiredFDsCount += 2; | |
1891 | /* syslog socket */ | |
1892 | requiredFDsCount++; | |
1893 | /* webserver main socket */ | |
1894 | requiredFDsCount++; | |
1895 | /* console main socket */ | |
1896 | requiredFDsCount++; | |
1897 | /* carbon export */ | |
1898 | requiredFDsCount++; | |
1899 | /* history file */ | |
1900 | requiredFDsCount++; | |
1901 | struct rlimit rl; | |
1902 | getrlimit(RLIMIT_NOFILE, &rl); | |
9fcd6adb | 1903 | if (rl.rlim_cur <= requiredFDsCount) { |
41408d3a RG |
1904 | warnlog("Warning, this configuration can use more than %d file descriptors, web server and console connections not included, and the current limit is %d.", std::to_string(requiredFDsCount), std::to_string(rl.rlim_cur)); |
1905 | #ifdef HAVE_SYSTEMD | |
1906 | warnlog("You can increase this value by using LimitNOFILE= in the systemd unit file or ulimit."); | |
1907 | #else | |
1908 | warnlog("You can increase this value by using ulimit."); | |
1909 | #endif | |
1910 | } | |
1911 | } | |
c2a42f97 | 1912 | |
7cc68f53 | 1913 | struct |
1914 | { | |
1915 | vector<string> locals; | |
1916 | vector<string> remotes; | |
5efcfa63 | 1917 | bool checkConfig{false}; |
7cc68f53 | 1918 | bool beDaemon{false}; |
1919 | bool beClient{false}; | |
505ca3d1 | 1920 | bool beSupervised{false}; |
c7b1d943 | 1921 | string pidfile; |
7cc68f53 | 1922 | string command; |
1923 | string config; | |
a36ce055 RG |
1924 | string uid; |
1925 | string gid; | |
7cc68f53 | 1926 | } g_cmdLine; |
520eb5a0 | 1927 | |
e41f8165 | 1928 | std::atomic<bool> g_configurationDone{false}; |
520eb5a0 | 1929 | |
24d5cb00 | 1930 | int main(int argc, char** argv) |
1931 | try | |
1932 | { | |
41408d3a RG |
1933 | size_t udpBindsCount = 0; |
1934 | size_t tcpBindsCount = 0; | |
94721140 | 1935 | rl_attempted_completion_function = my_completion; |
1936 | rl_completion_append_character = 0; | |
1937 | ||
726ddf60 | 1938 | signal(SIGPIPE, SIG_IGN); |
6d01c80c | 1939 | signal(SIGCHLD, SIG_IGN); |
b076b34a | 1940 | openlog("dnsdist", LOG_PID, LOG_DAEMON); |
a4652d55 | 1941 | g_console=true; |
6d01c80c | 1942 | |
0b62ec78 | 1943 | #ifdef HAVE_LIBSODIUM |
6d01c80c | 1944 | if (sodium_init() == -1) { |
1945 | cerr<<"Unable to initialize crypto library"<<endl; | |
1946 | exit(EXIT_FAILURE); | |
1947 | } | |
7691e7df | 1948 | g_hashperturb=randombytes_uniform(0xffffffff); |
1949 | srandom(randombytes_uniform(0xffffffff)); | |
1950 | #else | |
1951 | { | |
1952 | struct timeval tv; | |
1953 | gettimeofday(&tv, 0); | |
1954 | srandom(tv.tv_sec ^ tv.tv_usec ^ getpid()); | |
1955 | g_hashperturb=random(); | |
1956 | } | |
1957 | ||
0b62ec78 | 1958 | #endif |
094b6aff | 1959 | ComboAddress clientAddress = ComboAddress(); |
11058bd6 | 1960 | g_cmdLine.config=SYSCONFDIR "/dnsdist.conf"; |
7cc68f53 | 1961 | struct option longopts[]={ |
8f2d5ec3 | 1962 | {"acl", required_argument, 0, 'a'}, |
7cc68f53 | 1963 | {"config", required_argument, 0, 'C'}, |
5efcfa63 | 1964 | {"check-config", 0, 0, 1}, |
7cc68f53 | 1965 | {"execute", required_argument, 0, 'e'}, |
4fdb62a4 | 1966 | {"client", 0, 0, 'c'}, |
a36ce055 | 1967 | {"gid", required_argument, 0, 'g'}, |
ddb14ec9 PL |
1968 | #ifdef HAVE_LIBSODIUM |
1969 | {"setkey", required_argument, 0, 'k'}, | |
1970 | #endif | |
7cc68f53 | 1971 | {"local", required_argument, 0, 'l'}, |
98dd1cda | 1972 | {"daemon", 0, 0, 'd'}, |
c7b1d943 | 1973 | {"pidfile", required_argument, 0, 'p'}, |
505ca3d1 | 1974 | {"supervised", 0, 0, 's'}, |
bbfaaa6f | 1975 | {"disable-syslog", 0, 0, 2}, |
a36ce055 | 1976 | {"uid", required_argument, 0, 'u'}, |
359d7ca4 | 1977 | {"verbose", 0, 0, 'v'}, |
6306c282 | 1978 | {"version", 0, 0, 'V'}, |
c7b1d943 | 1979 | {"help", 0, 0, 'h'}, |
7cc68f53 | 1980 | {0,0,0,0} |
1981 | }; | |
1982 | int longindex=0; | |
8f2d5ec3 | 1983 | string optstring; |
7cc68f53 | 1984 | for(;;) { |
ddb14ec9 PL |
1985 | #ifdef HAVE_LIBSODIUM |
1986 | int c=getopt_long(argc, argv, "a:hcde:C:k:l:vp:g:u:V", longopts, &longindex); | |
1987 | #else | |
6306c282 | 1988 | int c=getopt_long(argc, argv, "a:hcde:C:l:vp:g:u:V", longopts, &longindex); |
ddb14ec9 | 1989 | #endif |
7cc68f53 | 1990 | if(c==-1) |
1991 | break; | |
1992 | switch(c) { | |
5efcfa63 PL |
1993 | case 1: |
1994 | g_cmdLine.checkConfig=true; | |
1995 | break; | |
bbfaaa6f PL |
1996 | case 2: |
1997 | g_syslog=false; | |
1998 | break; | |
7cc68f53 | 1999 | case 'C': |
2000 | g_cmdLine.config=optarg; | |
2001 | break; | |
2002 | case 'c': | |
2003 | g_cmdLine.beClient=true; | |
2004 | break; | |
2005 | case 'd': | |
98dd1cda | 2006 | g_cmdLine.beDaemon=true; |
7cc68f53 | 2007 | break; |
2008 | case 'e': | |
2009 | g_cmdLine.command=optarg; | |
2010 | break; | |
a36ce055 RG |
2011 | case 'g': |
2012 | g_cmdLine.gid=optarg; | |
2013 | break; | |
7cc68f53 | 2014 | case 'h': |
6306c282 PL |
2015 | cout<<"dnsdist "<<VERSION<<endl; |
2016 | cout<<endl; | |
094b6aff | 2017 | cout<<"Syntax: dnsdist [-C,--config file] [-c,--client [IP[:PORT]]] [-d,--daemon]\n"; |
c7b1d943 | 2018 | cout<<"[-p,--pidfile file] [-e,--execute cmd] [-h,--help] [-l,--local addr]\n"; |
c68aae2e | 2019 | cout<<"[-v,--verbose] [--check-config]\n"; |
7cc68f53 | 2020 | cout<<"\n"; |
8f2d5ec3 | 2021 | cout<<"-a,--acl netmask Add this netmask to the ACL\n"; |
7cc68f53 | 2022 | cout<<"-C,--config file Load configuration from 'file'\n"; |
094b6aff PL |
2023 | cout<<"-c,--client Operate as a client, connect to dnsdist. This reads\n"; |
2024 | cout<<" controlSocket from your configuration file, but also\n"; | |
2025 | cout<<" accepts an IP:PORT argument\n"; | |
ddb14ec9 PL |
2026 | #ifdef HAVE_LIBSODIUM |
2027 | cout<<"-k,--setkey KEY Use KEY for encrypted communication to dnsdist. This\n"; | |
2028 | cout<<" is similar to setting setKey in the configuration file.\n"; | |
2029 | cout<<" NOTE: this will leak this key in your shell's history!\n"; | |
2030 | #endif | |
c68aae2e PL |
2031 | cout<<"--check-config Validate the configuration file and exit. The exit-code\n"; |
2032 | cout<<" reflects the validation, 0 is OK, 1 means an error.\n"; | |
2033 | cout<<" Any errors are printed as well.\n"; | |
98dd1cda | 2034 | cout<<"-d,--daemon Operate as a daemon\n"; |
7cc68f53 | 2035 | cout<<"-e,--execute cmd Connect to dnsdist and execute 'cmd'\n"; |
a36ce055 | 2036 | cout<<"-g,--gid gid Change the process group ID after binding sockets\n"; |
7cc68f53 | 2037 | cout<<"-h,--help Display this helpful message\n"; |
2038 | cout<<"-l,--local address Listen on this local address\n"; | |
505ca3d1 PL |
2039 | cout<<"--supervised Don't open a console, I'm supervised\n"; |
2040 | cout<<" (use with e.g. systemd and daemontools)\n"; | |
bbfaaa6f PL |
2041 | cout<<"--disable-syslog Don't log to syslog, only to stdout\n"; |
2042 | cout<<" (use with e.g. systemd)\n"; | |
c7b1d943 | 2043 | cout<<"-p,--pidfile file Write a pidfile, works only with --daemon\n"; |
a36ce055 | 2044 | cout<<"-u,--uid uid Change the process user ID after binding sockets\n"; |
359d7ca4 | 2045 | cout<<"-v,--verbose Enable verbose mode\n"; |
7cc68f53 | 2046 | cout<<"\n"; |
2047 | exit(EXIT_SUCCESS); | |
2048 | break; | |
8f2d5ec3 | 2049 | case 'a': |
2050 | optstring=optarg; | |
2051 | g_ACL.modify([optstring](NetmaskGroup& nmg) { nmg.addMask(optstring); }); | |
2052 | break; | |
ddb14ec9 PL |
2053 | #ifdef HAVE_LIBSODIUM |
2054 | case 'k': | |
6f6b4d69 | 2055 | if (B64Decode(string(optarg), g_key) < 0) { |
ddb14ec9 PL |
2056 | cerr<<"Unable to decode key '"<<optarg<<"'."<<endl; |
2057 | exit(EXIT_FAILURE); | |
2058 | } | |
2059 | break; | |
2060 | #endif | |
7cc68f53 | 2061 | case 'l': |
6a363878 | 2062 | g_cmdLine.locals.push_back(trim_copy(string(optarg))); |
7cc68f53 | 2063 | break; |
c7b1d943 PL |
2064 | case 'p': |
2065 | g_cmdLine.pidfile=optarg; | |
2cd28293 | 2066 | break; |
505ca3d1 PL |
2067 | case 's': |
2068 | g_cmdLine.beSupervised=true; | |
2cd28293 | 2069 | break; |
a36ce055 RG |
2070 | case 'u': |
2071 | g_cmdLine.uid=optarg; | |
2072 | break; | |
7cc68f53 | 2073 | case 'v': |
2074 | g_verbose=true; | |
2075 | break; | |
6306c282 | 2076 | case 'V': |
d4d796e5 PD |
2077 | #ifdef LUAJIT_VERSION |
2078 | cout<<"dnsdist "<<VERSION<<" ("<<LUA_RELEASE<<" ["<<LUAJIT_VERSION<<"])"<<endl; | |
2079 | #else | |
2080 | cout<<"dnsdist "<<VERSION<<" ("<<LUA_RELEASE<<")"<<endl; | |
2081 | #endif | |
70829d97 | 2082 | cout<<"Enabled features: "; |
a227f47d RG |
2083 | #ifdef HAVE_DNS_OVER_TLS |
2084 | cout<<"dns-over-tls("; | |
2085 | #ifdef HAVE_GNUTLS | |
2086 | cout<<"gnutls "; | |
2087 | #endif | |
2088 | #ifdef HAVE_LIBSSL | |
2089 | cout<<"openssl"; | |
2090 | #endif | |
2091 | cout<<") "; | |
2092 | #endif | |
70829d97 PL |
2093 | #ifdef HAVE_DNSCRYPT |
2094 | cout<<"dnscrypt "; | |
2095 | #endif | |
0beaa5c8 RG |
2096 | #ifdef HAVE_EBPF |
2097 | cout<<"ebpf "; | |
2098 | #endif | |
70829d97 PL |
2099 | #ifdef HAVE_LIBSODIUM |
2100 | cout<<"libsodium "; | |
2101 | #endif | |
2102 | #ifdef HAVE_PROTOBUF | |
2103 | cout<<"protobuf "; | |
2104 | #endif | |
2105 | #ifdef HAVE_RE2 | |
2106 | cout<<"re2 "; | |
2107 | #endif | |
0beaa5c8 RG |
2108 | #if defined(HAVE_RECVMMSG) && defined(HAVE_SENDMMSG) && defined(MSG_WAITFORONE) |
2109 | cout<<"recvmmsg/sendmmsg "; | |
2110 | #endif | |
2111 | #ifdef HAVE_NET_SNMP | |
2112 | cout<<"snmp "; | |
2113 | #endif | |
70829d97 PL |
2114 | #ifdef HAVE_SYSTEMD |
2115 | cout<<"systemd"; | |
2116 | #endif | |
2117 | cout<<endl; | |
6306c282 PL |
2118 | exit(EXIT_SUCCESS); |
2119 | break; | |
7cc68f53 | 2120 | } |
24d5cb00 | 2121 | } |
6ab65223 | 2122 | |
7cc68f53 | 2123 | argc-=optind; |
2124 | argv+=optind; | |
2125 | for(auto p = argv; *p; ++p) { | |
094b6aff PL |
2126 | if(g_cmdLine.beClient) { |
2127 | clientAddress = ComboAddress(*p, 5199); | |
2128 | } else { | |
2129 | g_cmdLine.remotes.push_back(*p); | |
2130 | } | |
7cc68f53 | 2131 | } |
2132 | ||
cd29dcb1 | 2133 | ServerPolicy leastOutstandingPol{"leastOutstanding", leastOutstanding}; |
2134 | ||
e5a14b2b | 2135 | g_policy.setState(leastOutstandingPol); |
7cc68f53 | 2136 | if(g_cmdLine.beClient || !g_cmdLine.command.empty()) { |
2137 | setupLua(true, g_cmdLine.config); | |
094b6aff PL |
2138 | if (clientAddress != ComboAddress()) |
2139 | g_serverControl = clientAddress; | |
7cc68f53 | 2140 | doClient(g_serverControl, g_cmdLine.command); |
e16fd59c | 2141 | _exit(EXIT_SUCCESS); |
6d01c80c | 2142 | } |
2e72cc0e | 2143 | |
8f133915 | 2144 | auto acl = g_ACL.getCopy(); |
8f2d5ec3 | 2145 | if(acl.empty()) { |
2146 | for(auto& addr : {"127.0.0.0/8", "10.0.0.0/8", "100.64.0.0/10", "169.254.0.0/16", "192.168.0.0/16", "172.16.0.0/12", "::1/128", "fc00::/7", "fe80::/10"}) | |
2147 | acl.addMask(addr); | |
2148 | g_ACL.setState(acl); | |
2149 | } | |
8f133915 | 2150 | |
5efcfa63 PL |
2151 | if (g_cmdLine.checkConfig) { |
2152 | setupLua(true, g_cmdLine.config); | |
2153 | // No exception was thrown | |
2154 | infolog("Configuration '%s' OK!", g_cmdLine.config); | |
d8c19b98 | 2155 | _exit(EXIT_SUCCESS); |
5efcfa63 PL |
2156 | } |
2157 | ||
7cc68f53 | 2158 | auto todo=setupLua(false, g_cmdLine.config); |
2e72cc0e | 2159 | |
7cc68f53 | 2160 | if(g_cmdLine.locals.size()) { |
2e72cc0e | 2161 | g_locals.clear(); |
7cc68f53 | 2162 | for(auto loc : g_cmdLine.locals) |
f0e4dcba | 2163 | g_locals.push_back(std::make_tuple(ComboAddress(loc, 53), true, false, 0, "", std::set<int>())); |
2e72cc0e | 2164 | } |
2165 | ||
2166 | if(g_locals.empty()) | |
f0e4dcba | 2167 | g_locals.push_back(std::make_tuple(ComboAddress("127.0.0.1", 53), true, false, 0, "", std::set<int>())); |
2e72cc0e | 2168 | |
e41f8165 RG |
2169 | g_configurationDone = true; |
2170 | ||
2e72cc0e | 2171 | vector<ClientState*> toLaunch; |
2172 | for(const auto& local : g_locals) { | |
2173 | ClientState* cs = new ClientState; | |
4c34246d | 2174 | cs->local= std::get<0>(local); |
2e72cc0e | 2175 | cs->udpFD = SSocket(cs->local.sin4.sin_family, SOCK_DGRAM, 0); |
2176 | if(cs->local.sin4.sin_family == AF_INET6) { | |
2177 | SSetsockopt(cs->udpFD, IPPROTO_IPV6, IPV6_V6ONLY, 1); | |
2178 | } | |
7cc68f53 | 2179 | //if(g_vm.count("bind-non-local")) |
4c34246d | 2180 | bindAny(cs->local.sin4.sin_family, cs->udpFD); |
549d63c9 | 2181 | |
915b0c39 AT |
2182 | // if (!setSocketTimestamps(cs->udpFD)) |
2183 | // L<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl; | |
2184 | ||
549d63c9 | 2185 | |
4c34246d | 2186 | if(IsAnyAddress(cs->local)) { |
549d63c9 | 2187 | int one=1; |
2188 | setsockopt(cs->udpFD, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one)); // linux supports this, so why not - might fail on other systems | |
d98d3951 | 2189 | #ifdef IPV6_RECVPKTINFO |
11e1e08b | 2190 | setsockopt(cs->udpFD, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)); |
d98d3951 | 2191 | #endif |
549d63c9 | 2192 | } |
9f67b883 | 2193 | |
4c34246d | 2194 | if (std::get<2>(local)) { |
5f084036 | 2195 | #ifdef SO_REUSEPORT |
4c34246d | 2196 | SSetsockopt(cs->udpFD, SOL_SOCKET, SO_REUSEPORT, 1); |
5f084036 RG |
2197 | #else |
2198 | warnlog("SO_REUSEPORT has been configured on local address '%s' but is not supported", std::get<0>(local).toStringWithPort()); | |
4c34246d | 2199 | #endif |
5f084036 | 2200 | } |
549d63c9 | 2201 | |
9f67b883 RG |
2202 | const std::string& itf = std::get<4>(local); |
2203 | if (!itf.empty()) { | |
2204 | #ifdef SO_BINDTODEVICE | |
2205 | int res = setsockopt(cs->udpFD, SOL_SOCKET, SO_BINDTODEVICE, itf.c_str(), itf.length()); | |
2206 | if (res != 0) { | |
2207 | warnlog("Error setting up the interface on local address '%s': %s", std::get<0>(local).toStringWithPort(), strerror(errno)); | |
2208 | } | |
2209 | #else | |
2210 | warnlog("An interface has been configured on local address '%s' but SO_BINDTODEVICE is not supported", std::get<0>(local).toStringWithPort()); | |
2211 | #endif | |
2212 | } | |
2213 | ||
87b515ed RG |
2214 | #ifdef HAVE_EBPF |
2215 | if (g_defaultBPFFilter) { | |
8429ad04 | 2216 | cs->attachFilter(g_defaultBPFFilter); |
87b515ed RG |
2217 | vinfolog("Attaching default BPF Filter to UDP frontend %s", cs->local.toStringWithPort()); |
2218 | } | |
2219 | #endif /* HAVE_EBPF */ | |
2220 | ||
f0e4dcba RG |
2221 | cs->cpus = std::get<5>(local); |
2222 | ||
11e1e08b | 2223 | SBind(cs->udpFD, cs->local); |
2e72cc0e | 2224 | toLaunch.push_back(cs); |
963bef8d | 2225 | g_frontends.push_back(cs); |
41408d3a | 2226 | udpBindsCount++; |
2e72cc0e | 2227 | } |
2228 | ||
a36ce055 | 2229 | for(const auto& local : g_locals) { |
4c34246d RG |
2230 | if(!std::get<1>(local)) { // no TCP/IP |
2231 | warnlog("Not providing TCP/IP service on local address '%s'", std::get<0>(local).toStringWithPort()); | |
a36ce055 RG |
2232 | continue; |
2233 | } | |
963bef8d | 2234 | ClientState* cs = new ClientState; |
4c34246d | 2235 | cs->local= std::get<0>(local); |
a36ce055 RG |
2236 | |
2237 | cs->tcpFD = SSocket(cs->local.sin4.sin_family, SOCK_STREAM, 0); | |
2238 | ||
2239 | SSetsockopt(cs->tcpFD, SOL_SOCKET, SO_REUSEADDR, 1); | |
2240 | #ifdef TCP_DEFER_ACCEPT | |
2241 | SSetsockopt(cs->tcpFD, SOL_TCP,TCP_DEFER_ACCEPT, 1); | |
2242 | #endif | |
9e284f31 | 2243 | if (std::get<3>(local) > 0) { |
5f084036 | 2244 | #ifdef TCP_FASTOPEN |
1654ece3 | 2245 | SSetsockopt(cs->tcpFD, IPPROTO_TCP, TCP_FASTOPEN, std::get<3>(local)); |
5f084036 RG |
2246 | #else |
2247 | warnlog("TCP Fast Open has been configured on local address '%s' but is not supported", std::get<0>(local).toStringWithPort()); | |
a36ce055 | 2248 | #endif |
5f084036 | 2249 | } |
a36ce055 RG |
2250 | if(cs->local.sin4.sin_family == AF_INET6) { |
2251 | SSetsockopt(cs->tcpFD, IPPROTO_IPV6, IPV6_V6ONLY, 1); | |
2252 | } | |
4c34246d | 2253 | #ifdef SO_REUSEPORT |
5f084036 | 2254 | /* no need to warn again if configured but support is not available, we already did for UDP */ |
4c34246d RG |
2255 | if (std::get<2>(local)) { |
2256 | SSetsockopt(cs->tcpFD, SOL_SOCKET, SO_REUSEPORT, 1); | |
2257 | } | |
2258 | #endif | |
9f67b883 RG |
2259 | |
2260 | const std::string& itf = std::get<4>(local); | |
2261 | if (!itf.empty()) { | |
2262 | #ifdef SO_BINDTODEVICE | |
2263 | int res = setsockopt(cs->tcpFD, SOL_SOCKET, SO_BINDTODEVICE, itf.c_str(), itf.length()); | |
2264 | if (res != 0) { | |
2265 | warnlog("Error setting up the interface on local address '%s': %s", std::get<0>(local).toStringWithPort(), strerror(errno)); | |
2266 | } | |
2267 | #else | |
2268 | warnlog("An interface has been configured on local address '%s' but SO_BINDTODEVICE is not supported", std::get<0>(local).toStringWithPort()); | |
2269 | #endif | |
2270 | } | |
2271 | ||
87b515ed RG |
2272 | #ifdef HAVE_EBPF |
2273 | if (g_defaultBPFFilter) { | |
8429ad04 | 2274 | cs->attachFilter(g_defaultBPFFilter); |
87b515ed RG |
2275 | vinfolog("Attaching default BPF Filter to TCP frontend %s", cs->local.toStringWithPort()); |
2276 | } | |
2277 | #endif /* HAVE_EBPF */ | |
2278 | ||
a36ce055 RG |
2279 | // if(g_vm.count("bind-non-local")) |
2280 | bindAny(cs->local.sin4.sin_family, cs->tcpFD); | |
2281 | SBind(cs->tcpFD, cs->local); | |
2282 | SListen(cs->tcpFD, 64); | |
f0e4dcba | 2283 | warnlog("Listening on %s", cs->local.toStringWithPort()); |
a36ce055 RG |
2284 | |
2285 | toLaunch.push_back(cs); | |
963bef8d | 2286 | g_frontends.push_back(cs); |
41408d3a | 2287 | tcpBindsCount++; |
a36ce055 RG |
2288 | } |
2289 | ||
11e1e08b RG |
2290 | #ifdef HAVE_DNSCRYPT |
2291 | for(auto& dcLocal : g_dnsCryptLocals) { | |
2292 | ClientState* cs = new ClientState; | |
4c34246d RG |
2293 | cs->local = std::get<0>(dcLocal); |
2294 | cs->dnscryptCtx = &(std::get<1>(dcLocal)); | |
11e1e08b RG |
2295 | cs->udpFD = SSocket(cs->local.sin4.sin_family, SOCK_DGRAM, 0); |
2296 | if(cs->local.sin4.sin_family == AF_INET6) { | |
2297 | SSetsockopt(cs->udpFD, IPPROTO_IPV6, IPV6_V6ONLY, 1); | |
2298 | } | |
2299 | bindAny(cs->local.sin4.sin_family, cs->udpFD); | |
2300 | if(IsAnyAddress(cs->local)) { | |
2301 | int one=1; | |
2302 | setsockopt(cs->udpFD, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one)); // linux supports this, so why not - might fail on other systems | |
2303 | #ifdef IPV6_RECVPKTINFO | |
2304 | setsockopt(cs->udpFD, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)); | |
05510903 RG |
2305 | #endif |
2306 | } | |
2307 | if (std::get<2>(dcLocal)) { | |
2308 | #ifdef SO_REUSEPORT | |
2309 | SSetsockopt(cs->udpFD, SOL_SOCKET, SO_REUSEPORT, 1); | |
2310 | #else | |
2311 | warnlog("SO_REUSEPORT has been configured on local address '%s' but is not supported", std::get<0>(dcLocal).toStringWithPort()); | |
11e1e08b RG |
2312 | #endif |
2313 | } | |
9f67b883 RG |
2314 | |
2315 | const std::string& itf = std::get<4>(dcLocal); | |
2316 | if (!itf.empty()) { | |
2317 | #ifdef SO_BINDTODEVICE | |
2318 | int res = setsockopt(cs->udpFD, SOL_SOCKET, SO_BINDTODEVICE, itf.c_str(), itf.length()); | |
2319 | if (res != 0) { | |
2320 | warnlog("Error setting up the interface on local address '%s': %s", std::get<0>(dcLocal).toStringWithPort(), strerror(errno)); | |
2321 | } | |
2322 | #else | |
2323 | warnlog("An interface has been configured on local address '%s' but SO_BINDTODEVICE is not supported", std::get<0>(dcLocal).toStringWithPort()); | |
2324 | #endif | |
2325 | } | |
2326 | ||
87b515ed RG |
2327 | #ifdef HAVE_EBPF |
2328 | if (g_defaultBPFFilter) { | |
8429ad04 | 2329 | cs->attachFilter(g_defaultBPFFilter); |
87b515ed RG |
2330 | vinfolog("Attaching default BPF Filter to UDP DNSCrypt frontend %s", cs->local.toStringWithPort()); |
2331 | } | |
2332 | #endif /* HAVE_EBPF */ | |
11e1e08b RG |
2333 | SBind(cs->udpFD, cs->local); |
2334 | toLaunch.push_back(cs); | |
2335 | g_frontends.push_back(cs); | |
41408d3a | 2336 | udpBindsCount++; |
11e1e08b RG |
2337 | |
2338 | cs = new ClientState; | |
4c34246d RG |
2339 | cs->local = std::get<0>(dcLocal); |
2340 | cs->dnscryptCtx = &(std::get<1>(dcLocal)); | |
11e1e08b RG |
2341 | cs->tcpFD = SSocket(cs->local.sin4.sin_family, SOCK_STREAM, 0); |
2342 | SSetsockopt(cs->tcpFD, SOL_SOCKET, SO_REUSEADDR, 1); | |
2343 | #ifdef TCP_DEFER_ACCEPT | |
2344 | SSetsockopt(cs->tcpFD, SOL_TCP,TCP_DEFER_ACCEPT, 1); | |
4c34246d | 2345 | #endif |
9e284f31 | 2346 | if (std::get<3>(dcLocal) > 0) { |
5f084036 | 2347 | #ifdef TCP_FASTOPEN |
1654ece3 | 2348 | SSetsockopt(cs->tcpFD, IPPROTO_TCP, TCP_FASTOPEN, std::get<3>(dcLocal)); |
5f084036 RG |
2349 | #else |
2350 | warnlog("TCP Fast Open has been configured on local address '%s' but is not supported", std::get<0>(dcLocal).toStringWithPort()); | |
9e284f31 | 2351 | #endif |
5f084036 | 2352 | } |
9f67b883 | 2353 | |
4c34246d | 2354 | #ifdef SO_REUSEPORT |
05510903 | 2355 | /* no need to warn again if configured but support is not available, we already did for UDP */ |
4c34246d RG |
2356 | if (std::get<2>(dcLocal)) { |
2357 | SSetsockopt(cs->tcpFD, SOL_SOCKET, SO_REUSEPORT, 1); | |
2358 | } | |
11e1e08b | 2359 | #endif |
9f67b883 RG |
2360 | |
2361 | if (!itf.empty()) { | |
2362 | #ifdef SO_BINDTODEVICE | |
2363 | int res = setsockopt(cs->tcpFD, SOL_SOCKET, SO_BINDTODEVICE, itf.c_str(), itf.length()); | |
2364 | if (res != 0) { | |
2365 | warnlog("Error setting up the interface on local address '%s': %s", std::get<0>(dcLocal).toStringWithPort(), strerror(errno)); | |
2366 | } | |
2367 | #else | |
2368 | warnlog("An interface has been configured on local address '%s' but SO_BINDTODEVICE is not supported", std::get<0>(dcLocal).toStringWithPort()); | |
2369 | #endif | |
2370 | } | |
2371 | ||
11e1e08b RG |
2372 | if(cs->local.sin4.sin_family == AF_INET6) { |
2373 | SSetsockopt(cs->tcpFD, IPPROTO_IPV6, IPV6_V6ONLY, 1); | |
2374 | } | |
87b515ed RG |
2375 | #ifdef HAVE_EBPF |
2376 | if (g_defaultBPFFilter) { | |
8429ad04 | 2377 | cs->attachFilter(g_defaultBPFFilter); |
87b515ed RG |
2378 | vinfolog("Attaching default BPF Filter to TCP DNSCrypt frontend %s", cs->local.toStringWithPort()); |
2379 | } | |
2380 | #endif /* HAVE_EBPF */ | |
f0e4dcba RG |
2381 | |
2382 | cs->cpus = std::get<5>(dcLocal); | |
2383 | ||
11e1e08b RG |
2384 | bindAny(cs->local.sin4.sin_family, cs->tcpFD); |
2385 | SBind(cs->tcpFD, cs->local); | |
2386 | SListen(cs->tcpFD, 64); | |
2387 | warnlog("Listening on %s", cs->local.toStringWithPort()); | |
2388 | toLaunch.push_back(cs); | |
2389 | g_frontends.push_back(cs); | |
41408d3a | 2390 | tcpBindsCount++; |
11e1e08b RG |
2391 | } |
2392 | #endif | |
2393 | ||
a227f47d RG |
2394 | for(auto& frontend : g_tlslocals) { |
2395 | ClientState* cs = new ClientState; | |
2396 | cs->local = frontend->d_addr; | |
2397 | cs->tcpFD = SSocket(cs->local.sin4.sin_family, SOCK_STREAM, 0); | |
2398 | SSetsockopt(cs->tcpFD, SOL_SOCKET, SO_REUSEADDR, 1); | |
2399 | #ifdef TCP_DEFER_ACCEPT | |
2400 | SSetsockopt(cs->tcpFD, SOL_TCP,TCP_DEFER_ACCEPT, 1); | |
2401 | #endif | |
2402 | if (frontend->d_tcpFastOpenQueueSize > 0) { | |
2403 | #ifdef TCP_FASTOPEN | |
2404 | SSetsockopt(cs->tcpFD, SOL_TCP, TCP_FASTOPEN, frontend->d_tcpFastOpenQueueSize); | |
2405 | #else | |
2406 | warnlog("TCP Fast Open has been configured on local address '%s' but is not supported", cs->local.toStringWithPort()); | |
2407 | #endif | |
2408 | } | |
2409 | if (frontend->d_reusePort) { | |
2410 | #ifdef SO_REUSEPORT | |
2411 | SSetsockopt(cs->tcpFD, SOL_SOCKET, SO_REUSEPORT, 1); | |
2412 | #else | |
2413 | warnlog("SO_REUSEPORT has been configured on local address '%s' but is not supported", cs.local.toStringWithPort()); | |
2414 | #endif | |
2415 | } | |
2416 | if(cs->local.sin4.sin_family == AF_INET6) { | |
2417 | SSetsockopt(cs->tcpFD, IPPROTO_IPV6, IPV6_V6ONLY, 1); | |
2418 | } | |
2419 | ||
2420 | if (!frontend->d_interface.empty()) { | |
2421 | #ifdef SO_BINDTODEVICE | |
2422 | int res = setsockopt(cs->tcpFD, SOL_SOCKET, SO_BINDTODEVICE, frontend->d_interface.c_str(), frontend->d_interface.length()); | |
2423 | if (res != 0) { | |
2424 | warnlog("Error setting up the interface on local address '%s': %s", cs->local.toStringWithPort(), strerror(errno)); | |
2425 | } | |
2426 | #else | |
2427 | warnlog("An interface has been configured on local address '%s' but SO_BINDTODEVICE is not supported", cs->local.toStringWithPort()); | |
2428 | #endif | |
2429 | } | |
2430 | ||
2431 | cs->cpus = frontend->d_cpus; | |
2432 | ||
2433 | bindAny(cs->local.sin4.sin_family, cs->tcpFD); | |
2434 | if (frontend->setupTLS()) { | |
2435 | cs->tlsFrontend = frontend; | |
2436 | SBind(cs->tcpFD, cs->local); | |
2437 | SListen(cs->tcpFD, 64); | |
2438 | warnlog("Listening on %s for TLS", cs->local.toStringWithPort()); | |
2439 | toLaunch.push_back(cs); | |
2440 | g_frontends.push_back(cs); | |
2441 | tcpBindsCount++; | |
2442 | } | |
2443 | else { | |
2444 | delete cs; | |
2445 | errlog("Error while setting up TLS on local address '%s', exiting", cs->local.toStringWithPort()); | |
2446 | _exit(EXIT_FAILURE); | |
2447 | } | |
2448 | } | |
2449 | ||
7cc68f53 | 2450 | if(g_cmdLine.beDaemon) { |
a4652d55 | 2451 | g_console=false; |
b076b34a | 2452 | daemonize(); |
c7b1d943 | 2453 | writepid(g_cmdLine.pidfile); |
a4652d55 | 2454 | } |
b076b34a | 2455 | else { |
64e4ebb4 | 2456 | vinfolog("Running in the foreground"); |
43ac546d | 2457 | warnlog("dnsdist %s comes with ABSOLUTELY NO WARRANTY. This is free software, and you are welcome to redistribute it according to the terms of the GPL version 2", VERSION); |
3fa28388 RG |
2458 | vector<string> vec; |
2459 | std::string acls; | |
2460 | g_ACL.getCopy().toStringVector(&vec); | |
2461 | for(const auto& s : vec) { | |
2462 | if (!acls.empty()) | |
2463 | acls += ", "; | |
2464 | acls += s; | |
2465 | } | |
2466 | infolog("ACL allowing queries from: %s", acls.c_str()); | |
b076b34a | 2467 | } |
6d01c80c | 2468 | |
aac59883 RG |
2469 | uid_t newgid=0; |
2470 | gid_t newuid=0; | |
2471 | ||
2472 | if(!g_cmdLine.gid.empty()) | |
2473 | newgid = strToGID(g_cmdLine.gid.c_str()); | |
2474 | ||
2475 | if(!g_cmdLine.uid.empty()) | |
2476 | newuid = strToUID(g_cmdLine.uid.c_str()); | |
2477 | ||
2478 | dropGroupPrivs(newgid); | |
2479 | dropUserPrivs(newuid); | |
2480 | ||
a36ce055 RG |
2481 | /* this need to be done _after_ dropping privileges */ |
2482 | g_delay = new DelayPipe<DelayedPacket>(); | |
2483 | ||
9f4eb5cc RG |
2484 | if (g_snmpAgent) { |
2485 | g_snmpAgent->run(); | |
2486 | } | |
2487 | ||
edbda1ad | 2488 | g_tcpclientthreads = std::make_shared<TCPClientCollection>(g_maxTCPClientThreads, g_useTCPSinglePipe); |
a9bf3ec4 | 2489 | |
2e72cc0e | 2490 | for(auto& t : todo) |
2491 | t(); | |
2492 | ||
886e2cf2 | 2493 | auto localPools = g_pools.getCopy(); |
8f4f5186 RG |
2494 | /* create the default pool no matter what */ |
2495 | createPoolIfNotExists(localPools, ""); | |
7cc68f53 | 2496 | if(g_cmdLine.remotes.size()) { |
2497 | for(const auto& address : g_cmdLine.remotes) { | |
c9262563 | 2498 | auto ret=std::make_shared<DownstreamState>(ComboAddress(address, 53)); |
886e2cf2 | 2499 | addServerToPool(localPools, "", ret); |
7565f4e6 | 2500 | if (ret->connected) { |
2717a92f | 2501 | ret->tid = thread(responderThread, ret); |
7565f4e6 | 2502 | } |
ecbe9133 | 2503 | g_dstates.modify([ret](servers_t& servers) { servers.push_back(ret); }); |
6d01c80c | 2504 | } |
2505 | } | |
886e2cf2 | 2506 | g_pools.setState(localPools); |
6d01c80c | 2507 | |
e73ec7d3 | 2508 | if(g_dstates.getCopy().empty()) { |
2509 | errlog("No downstream servers defined: all packets will get dropped"); | |
2510 | // you might define them later, but you need to know | |
2511 | } | |
2512 | ||
41408d3a RG |
2513 | checkFileDescriptorsLimits(udpBindsCount, tcpBindsCount); |
2514 | ||
e5a14b2b | 2515 | for(auto& dss : g_dstates.getCopy()) { // it is a copy, but the internal shared_ptrs are the real deal |
773470ca | 2516 | if(dss->availability==DownstreamState::Availability::Auto) { |
fbe2a2e0 | 2517 | bool newState=upCheck(*dss); |
a7940c06 | 2518 | warnlog("Marking downstream %s as '%s'", dss->getNameWithAddr(), newState ? "up" : "down"); |
773470ca | 2519 | dss->upStatus = newState; |
2520 | } | |
2521 | } | |
b076b34a | 2522 | |
2e72cc0e | 2523 | for(auto& cs : toLaunch) { |
a36ce055 RG |
2524 | if (cs->udpFD >= 0) { |
2525 | thread t1(udpClientThread, cs); | |
f0e4dcba RG |
2526 | if (!cs->cpus.empty()) { |
2527 | mapThreadToCPUList(t1.native_handle(), cs->cpus); | |
2528 | } | |
a36ce055 | 2529 | t1.detach(); |
652a7355 | 2530 | } |
a36ce055 RG |
2531 | else if (cs->tcpFD >= 0) { |
2532 | thread t1(tcpAcceptorThread, cs); | |
f0e4dcba RG |
2533 | if (!cs->cpus.empty()) { |
2534 | mapThreadToCPUList(t1.native_handle(), cs->cpus); | |
2535 | } | |
a36ce055 | 2536 | t1.detach(); |
726ddf60 | 2537 | } |
24d5cb00 | 2538 | } |
7730131a | 2539 | |
42fae326 | 2540 | thread carbonthread(carbonDumpThread); |
2541 | carbonthread.detach(); | |
2542 | ||
3c115e0f | 2543 | thread stattid(maintThread); |
886e2cf2 | 2544 | stattid.detach(); |
6d01c80c | 2545 | |
886e2cf2 RG |
2546 | thread healththread(healthChecksThread); |
2547 | ||
505ca3d1 | 2548 | if(g_cmdLine.beDaemon || g_cmdLine.beSupervised) { |
6ab65223 PL |
2549 | #ifdef HAVE_SYSTEMD |
2550 | sd_notify(0, "READY=1"); | |
2551 | #endif | |
886e2cf2 | 2552 | healththread.join(); |
773470ca | 2553 | } |
6d01c80c | 2554 | else { |
886e2cf2 | 2555 | healththread.detach(); |
505ca3d1 | 2556 | doConsole(); |
3c115e0f | 2557 | } |
9cf811d1 | 2558 | _exit(EXIT_SUCCESS); |
3c115e0f | 2559 | |
6d01c80c | 2560 | } |
3f5c3f1d PD |
2561 | catch(const LuaContext::ExecutionErrorException& e) { |
2562 | try { | |
2563 | errlog("Fatal Lua error: %s", e.what()); | |
2564 | std::rethrow_if_nested(e); | |
2010ac95 RG |
2565 | } catch(const std::exception& ne) { |
2566 | errlog("Details: %s", ne.what()); | |
3f5c3f1d PD |
2567 | } |
2568 | catch(PDNSException &ae) | |
2569 | { | |
2570 | errlog("Fatal pdns error: %s", ae.reason); | |
2571 | } | |
2572 | _exit(EXIT_FAILURE); | |
2573 | } | |
24d5cb00 | 2574 | catch(std::exception &e) |
2575 | { | |
6d01c80c | 2576 | errlog("Fatal error: %s", e.what()); |
4a966472 | 2577 | _exit(EXIT_FAILURE); |
24d5cb00 | 2578 | } |
3f81d239 | 2579 | catch(PDNSException &ae) |
7730131a | 2580 | { |
6d01c80c | 2581 | errlog("Fatal pdns error: %s", ae.reason); |
4a966472 | 2582 | _exit(EXIT_FAILURE); |
7730131a | 2583 | } |