]>
Commit | Line | Data |
---|---|---|
12471842 PL |
1 | /* |
2 | * This file is part of PowerDNS or dnsdist. | |
3 | * Copyright -- PowerDNS.COM B.V. and its contributors | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify | |
6 | * it under the terms of version 2 of the GNU General Public License as | |
7 | * published by the Free Software Foundation. | |
8 | * | |
9 | * In addition, for the avoidance of any doubt, permission is granted to | |
10 | * link this program with OpenSSL and to (re)distribute the binaries | |
11 | * produced as the result of such linking. | |
12 | * | |
13 | * This program is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | * GNU General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU General Public License | |
19 | * along with this program; if not, write to the Free Software | |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |
21 | */ | |
df111b53 | 22 | #pragma once |
11e1e08b | 23 | #include "config.h" |
df111b53 | 24 | #include "ext/luawrapper/include/LuaContext.hpp" |
cbf4e13a | 25 | |
df111b53 | 26 | #include <atomic> |
df111b53 | 27 | #include <mutex> |
cbf4e13a | 28 | #include <string> |
df111b53 | 29 | #include <thread> |
cbf4e13a | 30 | #include <time.h> |
bffca8b9 | 31 | #include <unistd.h> |
cbf4e13a RG |
32 | #include <unordered_map> |
33 | ||
cbf4e13a RG |
34 | #include <boost/variant.hpp> |
35 | ||
36 | #include "bpf-filter.hh" | |
f12666f2 | 37 | #include "capabilities.hh" |
9f6a31ff | 38 | #include "circular_buffer.hh" |
11e1e08b | 39 | #include "dnscrypt.hh" |
886e2cf2 | 40 | #include "dnsdist-cache.hh" |
87b515ed | 41 | #include "dnsdist-dynbpf.hh" |
cbf4e13a | 42 | #include "dnsname.hh" |
fbf14b03 | 43 | #include "doh.hh" |
cbf4e13a RG |
44 | #include "ednsoptions.hh" |
45 | #include "gettime.hh" | |
46 | #include "iputils.hh" | |
47 | #include "misc.hh" | |
48 | #include "mplexer.hh" | |
49 | #include "sholder.hh" | |
a227f47d | 50 | #include "tcpiohandler.hh" |
d61aa945 | 51 | #include "uuid-utils.hh" |
d8c19b98 | 52 | |
9b73b71c | 53 | void carbonDumpThread(); |
61d1b966 | 54 | uint64_t uptimeOfProcess(const std::string& str); |
bd1c631b | 55 | |
7b925432 RG |
56 | extern uint16_t g_ECSSourcePrefixV4; |
57 | extern uint16_t g_ECSSourcePrefixV6; | |
58 | extern bool g_ECSOverride; | |
26a6373d | 59 | |
15fac047 CH |
60 | typedef std::unordered_map<string, string> QTag; |
61 | ||
7b925432 RG |
62 | struct DNSQuestion |
63 | { | |
e7c732b8 | 64 | DNSQuestion(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t queryLen, bool isTcp, const struct timespec* queryTime_): |
4ab01344 RG |
65 | qname(name), local(lc), remote(rem), dh(header), queryTime(queryTime_), size(bufferSize), consumed(consumed_), tempFailureTTL(boost::none), qtype(type), qclass(class_), len(queryLen), ecsPrefixLength(rem->sin4.sin_family == AF_INET ? g_ECSSourcePrefixV4 : g_ECSSourcePrefixV6), tcp(isTcp), ecsOverride(g_ECSOverride) { |
66 | const uint16_t* flags = getFlagsFromDNSHeader(dh); | |
67 | origFlags = *flags; | |
68 | } | |
dd1a3034 RG |
69 | DNSQuestion(const DNSQuestion&) = delete; |
70 | DNSQuestion& operator=(const DNSQuestion&) = delete; | |
71 | DNSQuestion(DNSQuestion&&) = default; | |
7b925432 | 72 | |
0ed8f0fa RG |
73 | std::string getTrailingData() const; |
74 | bool setTrailingData(const std::string&); | |
75 | ||
7b925432 | 76 | #ifdef HAVE_PROTOBUF |
ec48a28d | 77 | boost::optional<boost::uuids::uuid> uniqueId; |
7b925432 | 78 | #endif |
bd14f087 | 79 | Netmask ecs; |
4ab01344 | 80 | boost::optional<Netmask> subnet; |
046bac5c | 81 | std::string sni; /* Server Name Indication, if any (DoT or DoH) */ |
2a28db86 | 82 | std::string poolname; |
4ab01344 RG |
83 | const DNSName* qname{nullptr}; |
84 | const ComboAddress* local{nullptr}; | |
85 | const ComboAddress* remote{nullptr}; | |
15fac047 | 86 | std::shared_ptr<QTag> qTag{nullptr}; |
cbf4e13a | 87 | std::shared_ptr<std::map<uint16_t, EDNSOptionView> > ednsOptions; |
4ab01344 RG |
88 | std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr}; |
89 | std::shared_ptr<DNSDistPacketCache> packetCache{nullptr}; | |
90 | struct dnsheader* dh{nullptr}; | |
91 | const struct timespec* queryTime{nullptr}; | |
fbf14b03 | 92 | struct DOHUnit* du{nullptr}; |
7b925432 | 93 | size_t size; |
e7c732b8 | 94 | unsigned int consumed{0}; |
4ab01344 RG |
95 | int delayMsec{0}; |
96 | boost::optional<uint32_t> tempFailureTTL; | |
97 | uint32_t cacheKeyNoECS; | |
98 | uint32_t cacheKey; | |
99 | const uint16_t qtype; | |
100 | const uint16_t qclass; | |
7b925432 RG |
101 | uint16_t len; |
102 | uint16_t ecsPrefixLength; | |
4ab01344 | 103 | uint16_t origFlags; |
1ecbd15e | 104 | uint8_t ednsRCode{0}; |
7b925432 RG |
105 | const bool tcp; |
106 | bool skipCache{false}; | |
107 | bool ecsOverride; | |
5b8255ba | 108 | bool useECS{true}; |
5cc8371b | 109 | bool addXPF{true}; |
bd14f087 | 110 | bool ecsSet{false}; |
4ab01344 RG |
111 | bool ecsAdded{false}; |
112 | bool ednsAdded{false}; | |
113 | bool useZeroScope{false}; | |
114 | bool dnssecOK{false}; | |
7b925432 RG |
115 | }; |
116 | ||
117 | struct DNSResponse : DNSQuestion | |
118 | { | |
dd026b9c RG |
119 | DNSResponse(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t responseLen, bool isTcp, const struct timespec* queryTime_): |
120 | DNSQuestion(name, type, class_, consumed_, lc, rem, header, bufferSize, responseLen, isTcp, queryTime_) { } | |
dd1a3034 RG |
121 | DNSResponse(const DNSResponse&) = delete; |
122 | DNSResponse& operator=(const DNSResponse&) = delete; | |
123 | DNSResponse(DNSResponse&&) = default; | |
7b925432 RG |
124 | }; |
125 | ||
5c30ec69 LM |
126 | /* so what could you do: |
127 | drop, | |
128 | fake up nxdomain, | |
129 | provide actual answer, | |
130 | allow & and stop processing, | |
131 | continue processing, | |
7b925432 RG |
132 | modify header: (servfail|refused|notimp), set TC=1, |
133 | send to pool */ | |
134 | ||
135 | class DNSAction | |
136 | { | |
137 | public: | |
202c4ab9 | 138 | enum class Action { Drop, Nxdomain, Refused, Spoof, Allow, HeaderModify, Pool, Delay, Truncate, ServFail, None, NoOp, NoRecurse, SpoofRaw }; |
b718792f RG |
139 | static std::string typeToString(const Action& action) |
140 | { | |
141 | switch(action) { | |
142 | case Action::Drop: | |
143 | return "Drop"; | |
144 | case Action::Nxdomain: | |
145 | return "Send NXDomain"; | |
146 | case Action::Refused: | |
147 | return "Send Refused"; | |
148 | case Action::Spoof: | |
149 | return "Spoof an answer"; | |
202c4ab9 RG |
150 | case Action::SpoofRaw: |
151 | return "Spoof an answer from raw bytes"; | |
b718792f RG |
152 | case Action::Allow: |
153 | return "Allow"; | |
154 | case Action::HeaderModify: | |
155 | return "Modify the header"; | |
156 | case Action::Pool: | |
157 | return "Route to a pool"; | |
158 | case Action::Delay: | |
159 | return "Delay"; | |
160 | case Action::Truncate: | |
161 | return "Truncate over UDP"; | |
162 | case Action::ServFail: | |
163 | return "Send ServFail"; | |
164 | case Action::None: | |
477c86a0 | 165 | case Action::NoOp: |
b718792f | 166 | return "Do nothing"; |
3d60b39a | 167 | case Action::NoRecurse: |
168 | return "Set rd=0"; | |
b718792f RG |
169 | } |
170 | ||
171 | return "Unknown"; | |
172 | } | |
173 | ||
7b925432 | 174 | virtual Action operator()(DNSQuestion*, string* ruleresult) const =0; |
205f2081 RG |
175 | virtual ~DNSAction() |
176 | { | |
177 | } | |
7b925432 | 178 | virtual string toString() const = 0; |
b8019cf7 | 179 | virtual std::map<string, double> getStats() const |
7b925432 RG |
180 | { |
181 | return {{}}; | |
182 | } | |
183 | }; | |
184 | ||
185 | class DNSResponseAction | |
186 | { | |
187 | public: | |
5f23eb98 | 188 | enum class Action { Allow, Delay, Drop, HeaderModify, ServFail, None }; |
7b925432 | 189 | virtual Action operator()(DNSResponse*, string* ruleresult) const =0; |
205f2081 RG |
190 | virtual ~DNSResponseAction() |
191 | { | |
192 | } | |
7b925432 RG |
193 | virtual string toString() const = 0; |
194 | }; | |
195 | ||
78ffa782 | 196 | struct DynBlock |
197 | { | |
1d3ba133 | 198 | DynBlock(): action(DNSAction::Action::None), warning(false) |
5708a729 RG |
199 | { |
200 | } | |
201 | ||
1d3ba133 | 202 | DynBlock(const std::string& reason_, const struct timespec& until_, const DNSName& domain_, DNSAction::Action action_): reason(reason_), until(until_), domain(domain_), action(action_), warning(false) |
5708a729 RG |
203 | { |
204 | } | |
205 | ||
1d3ba133 | 206 | DynBlock(const DynBlock& rhs): reason(rhs.reason), until(rhs.until), domain(rhs.domain), action(rhs.action), warning(rhs.warning) |
5708a729 RG |
207 | { |
208 | blocks.store(rhs.blocks); | |
209 | } | |
210 | ||
78ffa782 | 211 | DynBlock& operator=(const DynBlock& rhs) |
212 | { | |
213 | reason=rhs.reason; | |
214 | until=rhs.until; | |
71c94675 | 215 | domain=rhs.domain; |
7b925432 | 216 | action=rhs.action; |
78ffa782 | 217 | blocks.store(rhs.blocks); |
1d3ba133 | 218 | warning=rhs.warning; |
78ffa782 | 219 | return *this; |
220 | } | |
71c94675 | 221 | |
78ffa782 | 222 | string reason; |
223 | struct timespec until; | |
71c94675 | 224 | DNSName domain; |
7b925432 | 225 | DNSAction::Action action; |
78ffa782 | 226 | mutable std::atomic<unsigned int> blocks; |
1d3ba133 | 227 | bool warning; |
78ffa782 | 228 | }; |
229 | ||
230 | extern GlobalStateHolder<NetmaskTree<DynBlock>> g_dynblockNMG; | |
f758857a | 231 | |
232 | extern vector<pair<struct timeval, std::string> > g_confDelta; | |
233 | ||
eb0335ff MC |
234 | extern uint64_t getLatencyCount(const std::string&); |
235 | ||
e48090d1 | 236 | struct DNSDistStats |
237 | { | |
6ad8b29a | 238 | using stat_t=std::atomic<uint64_t>; // aww yiss ;-) |
e48090d1 | 239 | stat_t responses{0}; |
240 | stat_t servfailResponses{0}; | |
241 | stat_t queries{0}; | |
61d10a4d MH |
242 | stat_t frontendNXDomain{0}; |
243 | stat_t frontendServFail{0}; | |
244 | stat_t frontendNoError{0}; | |
e73ec7d3 | 245 | stat_t nonCompliantQueries{0}; |
d08b1cdf | 246 | stat_t nonCompliantResponses{0}; |
643a182a | 247 | stat_t rdQueries{0}; |
2efd427d | 248 | stat_t emptyQueries{0}; |
e48090d1 | 249 | stat_t aclDrops{0}; |
bd1c631b | 250 | stat_t dynBlocked{0}; |
e48090d1 | 251 | stat_t ruleDrop{0}; |
252 | stat_t ruleNXDomain{0}; | |
dd46e5e3 | 253 | stat_t ruleRefused{0}; |
5f23eb98 | 254 | stat_t ruleServFail{0}; |
e48090d1 | 255 | stat_t selfAnswered{0}; |
256 | stat_t downstreamTimeouts{0}; | |
257 | stat_t downstreamSendErrors{0}; | |
6ad8b29a | 258 | stat_t truncFail{0}; |
b8bc7e61 | 259 | stat_t noPolicy{0}; |
886e2cf2 RG |
260 | stat_t cacheHits{0}; |
261 | stat_t cacheMisses{0}; | |
eb0335ff | 262 | stat_t latency0_1{0}, latency1_10{0}, latency10_50{0}, latency50_100{0}, latency100_1000{0}, latencySlow{0}, latencySum{0}; |
f29758cc | 263 | stat_t securityStatus{0}; |
5c30ec69 | 264 | |
e16fd59c | 265 | double latencyAvg100{0}, latencyAvg1000{0}, latencyAvg10000{0}, latencyAvg1000000{0}; |
a1a787dc | 266 | typedef std::function<uint64_t(const std::string&)> statfunction_t; |
72f58a53 | 267 | typedef boost::variant<stat_t*, double*, statfunction_t> entry_t; |
e16fd59c | 268 | std::vector<std::pair<std::string, entry_t>> entries{ |
dd46e5e3 RG |
269 | {"responses", &responses}, |
270 | {"servfail-responses", &servfailResponses}, | |
271 | {"queries", &queries}, | |
61d10a4d MH |
272 | {"frontend-nxdomain", &frontendNXDomain}, |
273 | {"frontend-servfail", &frontendServFail}, | |
274 | {"frontend-noerror", &frontendNoError}, | |
dd46e5e3 | 275 | {"acl-drops", &aclDrops}, |
dd46e5e3 RG |
276 | {"rule-drop", &ruleDrop}, |
277 | {"rule-nxdomain", &ruleNXDomain}, | |
278 | {"rule-refused", &ruleRefused}, | |
5f23eb98 | 279 | {"rule-servfail", &ruleServFail}, |
dd46e5e3 RG |
280 | {"self-answered", &selfAnswered}, |
281 | {"downstream-timeouts", &downstreamTimeouts}, | |
5c30ec69 | 282 | {"downstream-send-errors", &downstreamSendErrors}, |
dd46e5e3 RG |
283 | {"trunc-failures", &truncFail}, |
284 | {"no-policy", &noPolicy}, | |
285 | {"latency0-1", &latency0_1}, | |
286 | {"latency1-10", &latency1_10}, | |
287 | {"latency10-50", &latency10_50}, | |
288 | {"latency50-100", &latency50_100}, | |
289 | {"latency100-1000", &latency100_1000}, | |
290 | {"latency-slow", &latencySlow}, | |
291 | {"latency-avg100", &latencyAvg100}, | |
292 | {"latency-avg1000", &latencyAvg1000}, | |
293 | {"latency-avg10000", &latencyAvg10000}, | |
294 | {"latency-avg1000000", &latencyAvg1000000}, | |
61d1b966 | 295 | {"uptime", uptimeOfProcess}, |
a9b6db56 | 296 | {"real-memory-usage", getRealMemoryUsage}, |
330dcb5c | 297 | {"special-memory-usage", getSpecialMemoryUsage}, |
0d394f35 RG |
298 | {"udp-in-errors", boost::bind(udpErrorStats, "udp-in-errors")}, |
299 | {"udp-noport-errors", boost::bind(udpErrorStats, "udp-noport-errors")}, | |
300 | {"udp-recvbuf-errors", boost::bind(udpErrorStats, "udp-recvbuf-errors")}, | |
301 | {"udp-sndbuf-errors", boost::bind(udpErrorStats, "udp-sndbuf-errors")}, | |
a2aa00ed | 302 | {"noncompliant-queries", &nonCompliantQueries}, |
d08b1cdf | 303 | {"noncompliant-responses", &nonCompliantResponses}, |
643a182a | 304 | {"rdqueries", &rdQueries}, |
2efd427d | 305 | {"empty-queries", &emptyQueries}, |
886e2cf2 RG |
306 | {"cache-hits", &cacheHits}, |
307 | {"cache-misses", &cacheMisses}, | |
0d394f35 RG |
308 | {"cpu-iowait", getCPUIOWait}, |
309 | {"cpu-steal", getCPUSteal}, | |
4f99f3d3 | 310 | {"cpu-sys-msec", getCPUTimeSystem}, |
0d394f35 | 311 | {"cpu-user-msec", getCPUTimeUser}, |
dd46e5e3 | 312 | {"fd-usage", getOpenFileDescriptors}, |
5c30ec69 | 313 | {"dyn-blocked", &dynBlocked}, |
f29758cc | 314 | {"dyn-block-nmg-size", [](const std::string&) { return g_dynblockNMG.getLocal()->size(); }}, |
eb0335ff MC |
315 | {"security-status", &securityStatus}, |
316 | // Latency histogram | |
317 | {"latency-sum", &latencySum}, | |
318 | {"latency-count", getLatencyCount}, | |
42fae326 | 319 | }; |
e48090d1 | 320 | }; |
321 | ||
322 | extern struct DNSDistStats g_stats; | |
f653b8df | 323 | void doLatencyStats(double udiff); |
e48090d1 | 324 | |
638184e9 | 325 | |
df111b53 | 326 | struct StopWatch |
327 | { | |
58307a85 RG |
328 | StopWatch(bool realTime=false): d_needRealTime(realTime) |
329 | { | |
330 | } | |
df111b53 | 331 | struct timespec d_start{0,0}; |
58307a85 RG |
332 | bool d_needRealTime{false}; |
333 | ||
5c30ec69 | 334 | void start() { |
58307a85 | 335 | if(gettime(&d_start, d_needRealTime) < 0) |
df111b53 | 336 | unixDie("Getting timestamp"); |
5c30ec69 | 337 | |
df111b53 | 338 | } |
cf48b0ce RG |
339 | |
340 | void set(const struct timespec& from) { | |
341 | d_start = from; | |
342 | } | |
5c30ec69 | 343 | |
df111b53 | 344 | double udiff() const { |
345 | struct timespec now; | |
58307a85 | 346 | if(gettime(&now, d_needRealTime) < 0) |
df111b53 | 347 | unixDie("Getting timestamp"); |
5c30ec69 | 348 | |
df111b53 | 349 | return 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0; |
350 | } | |
351 | ||
352 | double udiffAndSet() { | |
353 | struct timespec now; | |
58307a85 | 354 | if(gettime(&now, d_needRealTime) < 0) |
df111b53 | 355 | unixDie("Getting timestamp"); |
5c30ec69 | 356 | |
df111b53 | 357 | auto ret= 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0; |
358 | d_start = now; | |
359 | return ret; | |
360 | } | |
361 | ||
362 | }; | |
363 | ||
67ce0bdd | 364 | class BasicQPSLimiter |
df111b53 | 365 | { |
366 | public: | |
67ce0bdd | 367 | BasicQPSLimiter() |
df111b53 | 368 | { |
369 | } | |
370 | ||
2d29e6b7 | 371 | BasicQPSLimiter(unsigned int burst): d_tokens(burst) |
67ce0bdd RG |
372 | { |
373 | d_prev.start(); | |
374 | } | |
375 | ||
376 | bool check(unsigned int rate, unsigned int burst) const // this is not quite fair | |
377 | { | |
378 | auto delta = d_prev.udiffAndSet(); | |
379 | ||
1a1787b6 | 380 | if(delta > 0.0) // time, frequently, does go backwards.. |
381 | d_tokens += 1.0 * rate * (delta/1000000.0); | |
67ce0bdd RG |
382 | |
383 | if(d_tokens > burst) { | |
384 | d_tokens = burst; | |
385 | } | |
386 | ||
387 | bool ret=false; | |
388 | if(d_tokens >= 1.0) { // we need this because burst=1 is weird otherwise | |
389 | ret=true; | |
390 | --d_tokens; | |
391 | } | |
392 | ||
393 | return ret; | |
394 | } | |
395 | ||
396 | bool seenSince(const struct timespec& cutOff) const | |
397 | { | |
398 | return cutOff < d_prev.d_start; | |
399 | } | |
400 | ||
401 | protected: | |
402 | mutable StopWatch d_prev; | |
403 | mutable double d_tokens; | |
404 | }; | |
405 | ||
406 | class QPSLimiter : public BasicQPSLimiter | |
407 | { | |
408 | public: | |
409 | QPSLimiter(): BasicQPSLimiter() | |
410 | { | |
411 | } | |
412 | ||
2d29e6b7 | 413 | QPSLimiter(unsigned int rate, unsigned int burst): BasicQPSLimiter(burst), d_rate(rate), d_burst(burst), d_passthrough(false) |
df111b53 | 414 | { |
df111b53 | 415 | d_prev.start(); |
416 | } | |
417 | ||
418 | unsigned int getRate() const | |
419 | { | |
67ce0bdd | 420 | return d_passthrough ? 0 : d_rate; |
df111b53 | 421 | } |
422 | ||
423 | int getPassed() const | |
424 | { | |
425 | return d_passed; | |
426 | } | |
67ce0bdd | 427 | |
df111b53 | 428 | int getBlocked() const |
429 | { | |
430 | return d_blocked; | |
431 | } | |
432 | ||
ecbe9133 | 433 | bool check() const // this is not quite fair |
df111b53 | 434 | { |
67ce0bdd | 435 | if (d_passthrough) { |
df111b53 | 436 | return true; |
67ce0bdd | 437 | } |
df111b53 | 438 | |
67ce0bdd RG |
439 | bool ret = BasicQPSLimiter::check(d_rate, d_burst); |
440 | if (ret) { | |
df111b53 | 441 | d_passed++; |
442 | } | |
67ce0bdd | 443 | else { |
df111b53 | 444 | d_blocked++; |
67ce0bdd | 445 | } |
df111b53 | 446 | |
5c30ec69 | 447 | return ret; |
df111b53 | 448 | } |
449 | private: | |
ecbe9133 | 450 | mutable unsigned int d_passed{0}; |
451 | mutable unsigned int d_blocked{0}; | |
67ce0bdd RG |
452 | unsigned int d_rate; |
453 | unsigned int d_burst; | |
454 | bool d_passthrough{true}; | |
df111b53 | 455 | }; |
456 | ||
b5b93e0b RG |
457 | struct ClientState; |
458 | ||
df111b53 | 459 | struct IDState |
460 | { | |
a9489723 | 461 | IDState(): sentTime(true), delayMsec(0), tempFailureTTL(boost::none) { origDest.sin4.sin_family = 0;} |
71b86bd8 | 462 | IDState(const IDState& orig): origRemote(orig.origRemote), origDest(orig.origDest), age(orig.age) |
df111b53 | 463 | { |
a9489723 RG |
464 | usageIndicator.store(orig.usageIndicator.load()); |
465 | origFD = orig.origFD; | |
df111b53 | 466 | origID = orig.origID; |
7b3865cd | 467 | delayMsec = orig.delayMsec; |
acb8f5d5 | 468 | tempFailureTTL = orig.tempFailureTTL; |
df111b53 | 469 | } |
470 | ||
311f19d5 | 471 | static const int64_t unusedIndicator = -1; |
2bf26975 | 472 | |
311f19d5 RG |
473 | static bool isInUse(int64_t usageIndicator) |
474 | { | |
475 | return usageIndicator != unusedIndicator; | |
476 | } | |
477 | ||
478 | bool isInUse() const | |
479 | { | |
480 | return usageIndicator != unusedIndicator; | |
481 | } | |
482 | ||
483 | /* return true if the value has been successfully replaced meaning that | |
484 | no-one updated the usage indicator in the meantime */ | |
485 | bool tryMarkUnused(int64_t expectedUsageIndicator) | |
486 | { | |
487 | return usageIndicator.compare_exchange_strong(expectedUsageIndicator, unusedIndicator); | |
488 | } | |
489 | ||
490 | /* mark as unused no matter what, return true if the state was in use before */ | |
491 | bool markAsUsed() | |
492 | { | |
493 | auto currentGeneration = generation++; | |
494 | return markAsUsed(currentGeneration); | |
495 | } | |
496 | ||
497 | /* mark as unused no matter what, return true if the state was in use before */ | |
498 | bool markAsUsed(int64_t currentGeneration) | |
499 | { | |
500 | int64_t oldUsage = usageIndicator.exchange(currentGeneration); | |
501 | return oldUsage != unusedIndicator; | |
502 | } | |
503 | ||
a9489723 | 504 | /* We use this value to detect whether this state is in use. |
9bd1a882 RG |
505 | For performance reasons we don't want to use a lock here, but that means |
506 | we need to be very careful when modifying this value. Modifications happen | |
507 | from: | |
508 | - one of the UDP or DoH 'client' threads receiving a query, selecting a backend | |
509 | then picking one of the states associated to this backend (via the idOffset). | |
a9489723 | 510 | Most of the time this state should not be in use and usageIndicator is -1, but we |
9bd1a882 RG |
511 | might not yet have received a response for the query previously associated to this |
512 | state, meaning that we will 'reuse' this state and erase the existing state. | |
513 | If we ever receive a response for this state, it will be discarded. This is | |
514 | mostly fine for UDP except that we still need to be careful in order to miss | |
515 | the 'outstanding' counters, which should only be increased when we are picking | |
516 | an empty state, and not when reusing ; | |
517 | For DoH, though, we have dynamically allocated a DOHUnit object that needs to | |
518 | be freed, as well as internal objects internals to libh2o. | |
519 | - one of the UDP receiver threads receiving a response from a backend, picking | |
520 | the corresponding state and sending the response to the client ; | |
521 | - the 'healthcheck' thread scanning the states to actively discover timeouts, | |
522 | mostly to keep some counters like the 'outstanding' one sane. | |
a9489723 RG |
523 | We previously based that logic on the origFD (FD on which the query was received, |
524 | and therefore from where the response should be sent) but this suffered from an | |
525 | ABA problem since it was quite likely that a UDP 'client thread' would reset it to the | |
526 | same value since we only have so much incoming sockets: | |
527 | - 1/ 'client' thread gets a query and set origFD to its FD, say 5 ; | |
528 | - 2/ 'receiver' thread gets a response, read the value of origFD to 5, check that the qname, | |
529 | qtype and qclass match | |
530 | - 3/ during that time the 'client' thread reuses the state, setting again origFD to 5 ; | |
531 | - 4/ the 'receiver' thread uses compare_exchange_strong() to only replace the value if it's still | |
532 | 5, except it's not the same 5 anymore and it overrides a fresh state. | |
533 | We now use a 32-bit unsigned counter instead, which is incremented every time the state is set, | |
534 | wrapping around if necessary, and we set an atomic signed 64-bit value, so that we still have -1 | |
535 | when the state is unused and the value of our counter otherwise. | |
9bd1a882 | 536 | */ |
311f19d5 RG |
537 | std::atomic<int64_t> usageIndicator{unusedIndicator}; // set to unusedIndicator to indicate this state is empty // 8 |
538 | std::atomic<uint32_t> generation{0}; // increased every time a state is used, to be able to detect an ABA issue // 4 | |
2bf26975 | 539 | ComboAddress origRemote; // 28 |
549d63c9 | 540 | ComboAddress origDest; // 28 |
2bf26975 | 541 | StopWatch sentTime; // 16 |
542 | DNSName qname; // 80 | |
43234e76 | 543 | std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr}; |
d8c19b98 | 544 | #ifdef HAVE_PROTOBUF |
ec48a28d | 545 | boost::optional<boost::uuids::uuid> uniqueId; |
11e1e08b | 546 | #endif |
78e3ac9e | 547 | boost::optional<Netmask> subnet{boost::none}; |
886e2cf2 | 548 | std::shared_ptr<DNSDistPacketCache> packetCache{nullptr}; |
a76b0d63 | 549 | std::shared_ptr<QTag> qTag{nullptr}; |
b5b93e0b | 550 | const ClientState* cs{nullptr}; |
fbf14b03 | 551 | DOHUnit* du{nullptr}; |
9837850d | 552 | uint32_t cacheKey; // 4 |
553 | uint32_t cacheKeyNoECS; // 4 | |
71b86bd8 | 554 | uint16_t age; // 4 |
2bf26975 | 555 | uint16_t qtype; // 2 |
886e2cf2 | 556 | uint16_t qclass; // 2 |
2bf26975 | 557 | uint16_t origID; // 2 |
aeb36780 | 558 | uint16_t origFlags; // 2 |
a9489723 | 559 | int origFD{-1}; |
7b3865cd | 560 | int delayMsec; |
acb8f5d5 | 561 | boost::optional<uint32_t> tempFailureTTL; |
ca404e94 | 562 | bool ednsAdded{false}; |
ff73f02b | 563 | bool ecsAdded{false}; |
886e2cf2 | 564 | bool skipCache{false}; |
7cea4e39 | 565 | bool destHarvested{false}; // if true, origDest holds the original dest addr, otherwise the listening addr |
d7728daf | 566 | bool dnssecOK{false}; |
389d903a | 567 | bool useZeroScope; |
df111b53 | 568 | }; |
569 | ||
786e4d8c | 570 | typedef std::unordered_map<string, unsigned int> QueryCountRecords; |
dd1a3034 | 571 | typedef std::function<std::tuple<bool, string>(const DNSQuestion* dq)> QueryCountFilter; |
786e4d8c RS |
572 | struct QueryCount { |
573 | QueryCount() | |
574 | { | |
43234e76 | 575 | pthread_rwlock_init(&queryLock, nullptr); |
786e4d8c | 576 | } |
040793d4 OM |
577 | ~QueryCount() |
578 | { | |
579 | pthread_rwlock_destroy(&queryLock); | |
580 | } | |
786e4d8c RS |
581 | QueryCountRecords records; |
582 | QueryCountFilter filter; | |
583 | pthread_rwlock_t queryLock; | |
584 | bool enabled{false}; | |
585 | }; | |
586 | ||
587 | extern QueryCount g_qcount; | |
588 | ||
8a5d5053 | 589 | struct ClientState |
590 | { | |
8274967b | 591 | ClientState(const ComboAddress& local_, bool isTCP_, bool doReusePort, int fastOpenQueue, const std::string& itfName, const std::set<int>& cpus_): cpus(cpus_), local(local_), interface(itfName), fastOpenQueueSize(fastOpenQueue), tcp(isTCP_), reuseport(doReusePort) |
6e9fd124 RG |
592 | { |
593 | } | |
594 | ||
f0e4dcba | 595 | std::set<int> cpus; |
8a5d5053 | 596 | ComboAddress local; |
43234e76 | 597 | std::shared_ptr<DNSCryptContext> dnscryptCtx{nullptr}; |
6e9fd124 | 598 | std::shared_ptr<TLSFrontend> tlsFrontend{nullptr}; |
fbf14b03 | 599 | std::shared_ptr<DOHFrontend> dohFrontend{nullptr}; |
6e9fd124 | 600 | std::string interface; |
963bef8d | 601 | std::atomic<uint64_t> queries{0}; |
7fc95193 | 602 | mutable std::atomic<uint64_t> responses{0}; |
a6e9e107 RG |
603 | std::atomic<uint64_t> tcpDiedReadingQuery{0}; |
604 | std::atomic<uint64_t> tcpDiedSendingResponse{0}; | |
605 | std::atomic<uint64_t> tcpGaveUp{0}; | |
606 | std::atomic<uint64_t> tcpClientTimeouts{0}; | |
607 | std::atomic<uint64_t> tcpDownstreamTimeouts{0}; | |
cff9aa03 | 608 | std::atomic<uint64_t> tcpCurrentConnections{0}; |
846b63bb RG |
609 | std::atomic<uint64_t> tlsNewSessions{0}; // A new TLS session has been negotiated, no resumption |
610 | std::atomic<uint64_t> tlsResumptions{0}; // A TLS session has been resumed, either via session id or via a TLS ticket | |
b608e6c6 RG |
611 | std::atomic<uint64_t> tlsUnknownTicketKey{0}; // A TLS ticket has been presented but we don't have the associated key (might have expired) |
612 | std::atomic<uint64_t> tlsInactiveTicketKey{0}; // A TLS ticket has been successfully resumed but the key is no longer active, we should issue a new one | |
bb3954f0 RG |
613 | std::atomic<uint64_t> tls10queries{0}; // valid DNS queries received via TLSv1.0 |
614 | std::atomic<uint64_t> tls11queries{0}; // valid DNS queries received via TLSv1.1 | |
615 | std::atomic<uint64_t> tls12queries{0}; // valid DNS queries received via TLSv1.2 | |
616 | std::atomic<uint64_t> tls13queries{0}; // valid DNS queries received via TLSv1.3 | |
617 | std::atomic<uint64_t> tlsUnknownqueries{0}; // valid DNS queries received via unknown TLS version | |
cff9aa03 RG |
618 | std::atomic<double> tcpAvgQueriesPerConnection{0.0}; |
619 | /* in ms */ | |
620 | std::atomic<double> tcpAvgConnectionDuration{0.0}; | |
a36ce055 RG |
621 | int udpFD{-1}; |
622 | int tcpFD{-1}; | |
6e9fd124 | 623 | int fastOpenQueueSize{0}; |
b5b93e0b | 624 | bool muted{false}; |
6e9fd124 RG |
625 | bool tcp; |
626 | bool reuseport; | |
627 | bool ready{false}; | |
8429ad04 RG |
628 | |
629 | int getSocket() const | |
630 | { | |
631 | return udpFD != -1 ? udpFD : tcpFD; | |
632 | } | |
633 | ||
3a2ca389 RG |
634 | bool isUDP() const |
635 | { | |
636 | return udpFD != -1; | |
637 | } | |
638 | ||
639 | bool isTCP() const | |
640 | { | |
641 | return udpFD == -1; | |
642 | } | |
643 | ||
f34fdcc5 RG |
644 | bool hasTLS() const |
645 | { | |
646 | return tlsFrontend != nullptr || dohFrontend != nullptr; | |
647 | } | |
648 | ||
ba7ec340 RG |
649 | std::string getType() const |
650 | { | |
651 | std::string result = udpFD != -1 ? "UDP" : "TCP"; | |
652 | ||
fbf14b03 RG |
653 | if (dohFrontend) { |
654 | result += " (DNS over HTTPS)"; | |
655 | } | |
656 | else if (tlsFrontend) { | |
ba7ec340 RG |
657 | result += " (DNS over TLS)"; |
658 | } | |
659 | else if (dnscryptCtx) { | |
660 | result += " (DNSCrypt)"; | |
661 | } | |
662 | ||
663 | return result; | |
664 | } | |
665 | ||
8429ad04 RG |
666 | #ifdef HAVE_EBPF |
667 | shared_ptr<BPFFilter> d_filter; | |
668 | ||
669 | void detachFilter() | |
670 | { | |
671 | if (d_filter) { | |
672 | d_filter->removeSocket(getSocket()); | |
673 | d_filter = nullptr; | |
674 | } | |
675 | } | |
676 | ||
677 | void attachFilter(shared_ptr<BPFFilter> bpf) | |
678 | { | |
679 | detachFilter(); | |
680 | ||
681 | bpf->addSocket(getSocket()); | |
682 | d_filter = bpf; | |
683 | } | |
684 | #endif /* HAVE_EBPF */ | |
cff9aa03 | 685 | |
dd026b9c | 686 | void updateTCPMetrics(size_t nbQueries, uint64_t durationMs) |
cff9aa03 | 687 | { |
dd026b9c | 688 | tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0); |
cff9aa03 RG |
689 | tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0); |
690 | } | |
8a5d5053 | 691 | }; |
692 | ||
693 | class TCPClientCollection { | |
694 | std::vector<int> d_tcpclientthreads; | |
ded1985a | 695 | std::atomic<uint64_t> d_numthreads{0}; |
a9bf3ec4 | 696 | std::atomic<uint64_t> d_pos{0}; |
ded1985a | 697 | std::atomic<uint64_t> d_queued{0}; |
73402775 | 698 | const uint64_t d_maxthreads{0}; |
ded1985a | 699 | std::mutex d_mutex; |
edbda1ad | 700 | int d_singlePipe[2]; |
73402775 | 701 | const bool d_useSinglePipe; |
ded1985a | 702 | public: |
8a5d5053 | 703 | |
b79e4996 RG |
704 | TCPClientCollection(size_t maxThreads, bool useSinglePipe=false): d_maxthreads(maxThreads), d_singlePipe{-1,-1}, d_useSinglePipe(useSinglePipe) |
705 | ||
8a5d5053 | 706 | { |
a9bf3ec4 | 707 | d_tcpclientthreads.reserve(maxThreads); |
edbda1ad RG |
708 | |
709 | if (d_useSinglePipe) { | |
710 | if (pipe(d_singlePipe) < 0) { | |
c52b8cb6 OM |
711 | int err = errno; |
712 | throw std::runtime_error("Error creating the TCP single communication pipe: " + stringerror(err)); | |
edbda1ad | 713 | } |
3b07fd1b RG |
714 | |
715 | if (!setNonBlocking(d_singlePipe[0])) { | |
716 | int err = errno; | |
717 | close(d_singlePipe[0]); | |
718 | close(d_singlePipe[1]); | |
c52b8cb6 | 719 | throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err)); |
3b07fd1b RG |
720 | } |
721 | ||
edbda1ad RG |
722 | if (!setNonBlocking(d_singlePipe[1])) { |
723 | int err = errno; | |
724 | close(d_singlePipe[0]); | |
725 | close(d_singlePipe[1]); | |
c52b8cb6 | 726 | throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err)); |
edbda1ad RG |
727 | } |
728 | } | |
8a5d5053 | 729 | } |
a9bf3ec4 | 730 | int getThread() |
8a5d5053 | 731 | { |
6c1ca990 | 732 | uint64_t pos = d_pos++; |
8a5d5053 | 733 | ++d_queued; |
734 | return d_tcpclientthreads[pos % d_numthreads]; | |
735 | } | |
ded1985a RG |
736 | bool hasReachedMaxThreads() const |
737 | { | |
738 | return d_numthreads >= d_maxthreads; | |
739 | } | |
740 | uint64_t getThreadsCount() const | |
741 | { | |
742 | return d_numthreads; | |
743 | } | |
744 | uint64_t getQueuedCount() const | |
745 | { | |
746 | return d_queued; | |
747 | } | |
748 | void decrementQueuedCount() | |
749 | { | |
750 | --d_queued; | |
751 | } | |
8a5d5053 | 752 | void addTCPClientThread(); |
753 | }; | |
754 | ||
1f7646c2 | 755 | extern std::unique_ptr<TCPClientCollection> g_tcpclientthreads; |
8a5d5053 | 756 | |
df111b53 | 757 | struct DownstreamState |
758 | { | |
1720247e | 759 | typedef std::function<std::tuple<DNSName, uint16_t, uint16_t>(const DNSName&, uint16_t, uint16_t, dnsheader*)> checkfunc_t; |
98650fde | 760 | |
203b5348 RG |
761 | DownstreamState(const ComboAddress& remote_, const ComboAddress& sourceAddr_, unsigned int sourceItf, const std::string& sourceItfName, size_t numberOfSockets, bool connect); |
762 | DownstreamState(const ComboAddress& remote_): DownstreamState(remote_, ComboAddress(), 0, std::string(), 1, true) {} | |
6a62c0e3 RG |
763 | ~DownstreamState() |
764 | { | |
5bdbb83d | 765 | for (auto& fd : sockets) { |
150105a2 RG |
766 | if (fd >= 0) { |
767 | close(fd); | |
768 | fd = -1; | |
769 | } | |
770 | } | |
040793d4 | 771 | pthread_rwlock_destroy(&d_lock); |
6a62c0e3 | 772 | } |
1720247e CHB |
773 | boost::uuids::uuid id; |
774 | std::set<unsigned int> hashes; | |
d58e616a | 775 | mutable pthread_rwlock_t d_lock; |
5bdbb83d | 776 | std::vector<int> sockets; |
70b0d0e2 | 777 | const std::string sourceItfName; |
5bdbb83d | 778 | std::mutex socketsLock; |
5d7e6765 | 779 | std::mutex connectLock; |
5bdbb83d | 780 | std::unique_ptr<FDMultiplexer> mplexer{nullptr}; |
df111b53 | 781 | std::thread tid; |
a2353842 | 782 | const ComboAddress remote; |
df111b53 | 783 | QPSLimiter qps; |
784 | vector<IDState> idStates; | |
73402775 | 785 | const ComboAddress sourceAddr; |
98650fde | 786 | checkfunc_t checkFunction; |
fbe2a2e0 RG |
787 | DNSName checkName{"a.root-servers.net."}; |
788 | QType checkType{QType::A}; | |
de9f7157 | 789 | uint16_t checkClass{QClass::IN}; |
df111b53 | 790 | std::atomic<uint64_t> idOffset{0}; |
791 | std::atomic<uint64_t> sendErrors{0}; | |
792 | std::atomic<uint64_t> outstanding{0}; | |
793 | std::atomic<uint64_t> reuseds{0}; | |
794 | std::atomic<uint64_t> queries{0}; | |
7fc95193 | 795 | std::atomic<uint64_t> responses{0}; |
df111b53 | 796 | struct { |
797 | std::atomic<uint64_t> sendErrors{0}; | |
798 | std::atomic<uint64_t> reuseds{0}; | |
799 | std::atomic<uint64_t> queries{0}; | |
800 | } prev; | |
a6e9e107 RG |
801 | std::atomic<uint64_t> tcpDiedSendingQuery{0}; |
802 | std::atomic<uint64_t> tcpDiedReadingResponse{0}; | |
803 | std::atomic<uint64_t> tcpGaveUp{0}; | |
804 | std::atomic<uint64_t> tcpReadTimeouts{0}; | |
805 | std::atomic<uint64_t> tcpWriteTimeouts{0}; | |
cff9aa03 RG |
806 | std::atomic<uint64_t> tcpCurrentConnections{0}; |
807 | std::atomic<double> tcpAvgQueriesPerConnection{0.0}; | |
808 | /* in ms */ | |
809 | std::atomic<double> tcpAvgConnectionDuration{0.0}; | |
5bdbb83d | 810 | size_t socketsOffset{0}; |
df111b53 | 811 | double queryLoad{0.0}; |
812 | double dropRate{0.0}; | |
813 | double latencyUsec{0.0}; | |
814 | int order{1}; | |
815 | int weight{1}; | |
b40cffe7 | 816 | int tcpConnectTimeout{5}; |
3f6d07a4 RG |
817 | int tcpRecvTimeout{30}; |
818 | int tcpSendTimeout{30}; | |
7c9bf18d | 819 | unsigned int checkInterval{1}; |
820 | unsigned int lastCheck{0}; | |
73402775 | 821 | const unsigned int sourceItf{0}; |
3f6d07a4 | 822 | uint16_t retries{5}; |
c85f69a8 | 823 | uint16_t xpfRRCode{0}; |
b7e6f4a1 | 824 | uint16_t checkTimeout{1000}; /* in milliseconds */ |
9e87dcb8 | 825 | uint8_t currentCheckFailures{0}; |
853faf61 | 826 | uint8_t consecutiveSuccessfulChecks{0}; |
9e87dcb8 | 827 | uint8_t maxCheckFailures{1}; |
1b633bec | 828 | uint8_t minRiseSuccesses{1}; |
df111b53 | 829 | StopWatch sw; |
830 | set<string> pools; | |
831 | enum class Availability { Up, Down, Auto} availability{Availability::Auto}; | |
fbe2a2e0 | 832 | bool mustResolve{false}; |
df111b53 | 833 | bool upStatus{false}; |
ca404e94 | 834 | bool useECS{false}; |
21830638 | 835 | bool setCD{false}; |
49c33a6c | 836 | bool disableZeroScope{false}; |
7565f4e6 | 837 | std::atomic<bool> connected{false}; |
5d7e6765 | 838 | std::atomic_flag threadStarted; |
284d460c | 839 | bool tcpFastOpen{false}; |
5602f131 | 840 | bool ipBindAddrNoPort{true}; |
5d7e6765 | 841 | |
df111b53 | 842 | bool isUp() const |
843 | { | |
844 | if(availability == Availability::Down) | |
845 | return false; | |
846 | if(availability == Availability::Up) | |
847 | return true; | |
848 | return upStatus; | |
849 | } | |
850 | void setUp() { availability = Availability::Up; } | |
851 | void setDown() { availability = Availability::Down; } | |
852 | void setAuto() { availability = Availability::Auto; } | |
18eeccc9 | 853 | string getName() const { |
18eeccc9 RG |
854 | return name; |
855 | } | |
a7940c06 | 856 | string getNameWithAddr() const { |
be05aa91 RG |
857 | return nameWithAddr; |
858 | } | |
859 | void setName(const std::string& newName) | |
860 | { | |
861 | name = newName; | |
862 | if (newName.empty()) { | |
863 | nameWithAddr = newName.empty() ? remote.toStringWithPort() : (name + " (" + remote.toStringWithPort()+ ")"); | |
a7940c06 | 864 | } |
a7940c06 | 865 | } |
be05aa91 | 866 | |
9f4eb5cc RG |
867 | string getStatus() const |
868 | { | |
869 | string status; | |
870 | if(availability == DownstreamState::Availability::Up) | |
871 | status = "UP"; | |
872 | else if(availability == DownstreamState::Availability::Down) | |
873 | status = "DOWN"; | |
874 | else | |
875 | status = (upStatus ? "up" : "down"); | |
876 | return status; | |
877 | } | |
5d7e6765 | 878 | bool reconnect(); |
f2caf657 CHB |
879 | void hash(); |
880 | void setId(const boost::uuids::uuid& newId); | |
881 | void setWeight(int newWeight); | |
cff9aa03 | 882 | |
dd026b9c | 883 | void updateTCPMetrics(size_t nbQueries, uint64_t durationMs) |
cff9aa03 | 884 | { |
dd026b9c | 885 | tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0); |
cff9aa03 RG |
886 | tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0); |
887 | } | |
be05aa91 RG |
888 | private: |
889 | std::string name; | |
890 | std::string nameWithAddr; | |
df111b53 | 891 | }; |
892 | using servers_t =vector<std::shared_ptr<DownstreamState>>; | |
df111b53 | 893 | |
9b73b71c | 894 | void responderThread(std::shared_ptr<DownstreamState> state); |
da4e7813 | 895 | extern std::mutex g_luamutex; |
896 | extern LuaContext g_lua; | |
897 | extern std::string g_outputBuffer; // locking for this is ok, as locked by g_luamutex | |
898 | ||
0940e4eb | 899 | class DNSRule |
900 | { | |
901 | public: | |
205f2081 RG |
902 | virtual ~DNSRule () |
903 | { | |
904 | } | |
497a6e3a | 905 | virtual bool matches(const DNSQuestion* dq) const =0; |
0940e4eb | 906 | virtual string toString() const = 0; |
907 | mutable std::atomic<uint64_t> d_matches{0}; | |
908 | }; | |
909 | ||
be05aa91 RG |
910 | struct dnsdist_ffi_servers_list_t; |
911 | struct dnsdist_ffi_server_t; | |
912 | struct dnsdist_ffi_dnsquestion_t; | |
df111b53 | 913 | |
914 | struct ServerPolicy | |
915 | { | |
be05aa91 RG |
916 | template <class T> using NumberedVector = std::vector<std::pair<unsigned int, T> >; |
917 | using NumberedServerVector = NumberedVector<shared_ptr<DownstreamState>>; | |
918 | typedef std::function<shared_ptr<DownstreamState>(const NumberedServerVector& servers, const DNSQuestion*)> policyfunc_t; | |
919 | typedef std::function<unsigned int(dnsdist_ffi_servers_list_t* servers, dnsdist_ffi_dnsquestion_t* dq)> ffipolicyfunc_t; | |
920 | ||
921 | ServerPolicy(const std::string& name_, policyfunc_t policy_, bool isLua_): name(name_), policy(policy_), isLua(isLua_) | |
922 | { | |
923 | } | |
924 | ServerPolicy(const std::string& name_, ffipolicyfunc_t policy_): name(name_), ffipolicy(policy_), isLua(true), isFFI(true) | |
925 | { | |
926 | } | |
927 | ServerPolicy() | |
928 | { | |
929 | } | |
930 | ||
df111b53 | 931 | string name; |
70a57b05 | 932 | policyfunc_t policy; |
be05aa91 RG |
933 | ffipolicyfunc_t ffipolicy; |
934 | bool isLua{false}; | |
935 | bool isFFI{false}; | |
936 | ||
a4fd2d2f CH |
937 | std::string toString() const { |
938 | return string("ServerPolicy") + (isLua ? " (Lua)" : "") + " \"" + name + "\""; | |
939 | } | |
df111b53 | 940 | }; |
941 | ||
886e2cf2 RG |
942 | struct ServerPool |
943 | { | |
a1b1a29d RG |
944 | ServerPool() |
945 | { | |
946 | pthread_rwlock_init(&d_lock, nullptr); | |
947 | } | |
040793d4 OM |
948 | ~ServerPool() |
949 | { | |
950 | pthread_rwlock_destroy(&d_lock); | |
951 | } | |
a1b1a29d | 952 | |
886e2cf2 RG |
953 | const std::shared_ptr<DNSDistPacketCache> getCache() const { return packetCache; }; |
954 | ||
7e687744 RG |
955 | bool getECS() const |
956 | { | |
957 | return d_useECS; | |
958 | } | |
959 | ||
960 | void setECS(bool useECS) | |
961 | { | |
962 | d_useECS = useECS; | |
963 | } | |
964 | ||
886e2cf2 | 965 | std::shared_ptr<DNSDistPacketCache> packetCache{nullptr}; |
b9f8a6c8 | 966 | std::shared_ptr<ServerPolicy> policy{nullptr}; |
5c30ec69 | 967 | |
a1b1a29d RG |
968 | size_t countServers(bool upOnly) |
969 | { | |
970 | size_t count = 0; | |
971 | ReadLock rl(&d_lock); | |
972 | for (const auto& server : d_servers) { | |
973 | if (!upOnly || std::get<1>(server)->isUp() ) { | |
974 | count++; | |
c1b81381 RG |
975 | } |
976 | } | |
a1b1a29d RG |
977 | return count; |
978 | } | |
979 | ||
be05aa91 | 980 | ServerPolicy::NumberedServerVector getServers() |
a1b1a29d | 981 | { |
be05aa91 | 982 | ServerPolicy::NumberedServerVector result; |
a1b1a29d RG |
983 | { |
984 | ReadLock rl(&d_lock); | |
985 | result = d_servers; | |
986 | } | |
987 | return result; | |
988 | } | |
989 | ||
990 | void addServer(shared_ptr<DownstreamState>& server) | |
991 | { | |
992 | WriteLock wl(&d_lock); | |
993 | unsigned int count = (unsigned int) d_servers.size(); | |
994 | d_servers.push_back(make_pair(++count, server)); | |
995 | /* we need to reorder based on the server 'order' */ | |
996 | std::stable_sort(d_servers.begin(), d_servers.end(), [](const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& a, const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& b) { | |
997 | return a.second->order < b.second->order; | |
998 | }); | |
999 | /* and now we need to renumber for Lua (custom policies) */ | |
1000 | size_t idx = 1; | |
1001 | for (auto& serv : d_servers) { | |
1002 | serv.first = idx++; | |
1003 | } | |
1004 | } | |
1005 | ||
1006 | void removeServer(shared_ptr<DownstreamState>& server) | |
1007 | { | |
1008 | WriteLock wl(&d_lock); | |
1009 | size_t idx = 1; | |
1010 | bool found = false; | |
1011 | for (auto it = d_servers.begin(); it != d_servers.end();) { | |
1012 | if (found) { | |
1013 | /* we need to renumber the servers placed | |
1014 | after the removed one, for Lua (custom policies) */ | |
1015 | it->first = idx++; | |
1016 | it++; | |
1017 | } | |
1018 | else if (it->second == server) { | |
1019 | it = d_servers.erase(it); | |
1020 | found = true; | |
1021 | } else { | |
1022 | idx++; | |
1023 | it++; | |
1024 | } | |
1025 | } | |
1026 | } | |
1027 | ||
1028 | private: | |
be05aa91 | 1029 | ServerPolicy::NumberedServerVector d_servers; |
a1b1a29d | 1030 | pthread_rwlock_t d_lock; |
7e687744 | 1031 | bool d_useECS{false}; |
886e2cf2 RG |
1032 | }; |
1033 | using pools_t=map<std::string,std::shared_ptr<ServerPool>>; | |
742c079a | 1034 | void setPoolPolicy(pools_t& pools, const string& poolName, std::shared_ptr<ServerPolicy> policy); |
886e2cf2 RG |
1035 | void addServerToPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server); |
1036 | void removeServerFromPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server); | |
1037 | ||
42fae326 | 1038 | struct CarbonConfig |
1039 | { | |
d617b22c | 1040 | ComboAddress server; |
813b0ba9 | 1041 | std::string namespace_name; |
42fae326 | 1042 | std::string ourname; |
813b0ba9 | 1043 | std::string instance_name; |
d617b22c | 1044 | unsigned int interval; |
42fae326 | 1045 | }; |
1046 | ||
ca404e94 RG |
1047 | enum ednsHeaderFlags { |
1048 | EDNS_HEADER_FLAG_NONE = 0, | |
1049 | EDNS_HEADER_FLAG_DO = 32768 | |
1050 | }; | |
1051 | ||
4d5959e6 RG |
1052 | struct DNSDistRuleAction |
1053 | { | |
1054 | std::shared_ptr<DNSRule> d_rule; | |
1055 | std::shared_ptr<DNSAction> d_action; | |
1056 | boost::uuids::uuid d_id; | |
f8a222ac | 1057 | uint64_t d_creationOrder; |
4d5959e6 RG |
1058 | }; |
1059 | ||
1060 | struct DNSDistResponseRuleAction | |
1061 | { | |
1062 | std::shared_ptr<DNSRule> d_rule; | |
1063 | std::shared_ptr<DNSResponseAction> d_action; | |
1064 | boost::uuids::uuid d_id; | |
f8a222ac | 1065 | uint64_t d_creationOrder; |
4d5959e6 RG |
1066 | }; |
1067 | ||
71c94675 | 1068 | extern GlobalStateHolder<SuffixMatchTree<DynBlock>> g_dynblockSMT; |
dd46e5e3 | 1069 | extern DNSAction::Action g_dynBlockAction; |
71c94675 | 1070 | |
d617b22c | 1071 | extern GlobalStateHolder<vector<CarbonConfig> > g_carbon; |
ecbe9133 | 1072 | extern GlobalStateHolder<ServerPolicy> g_policy; |
1073 | extern GlobalStateHolder<servers_t> g_dstates; | |
886e2cf2 | 1074 | extern GlobalStateHolder<pools_t> g_pools; |
4d5959e6 RG |
1075 | extern GlobalStateHolder<vector<DNSDistRuleAction> > g_rulactions; |
1076 | extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_resprulactions; | |
1077 | extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_cachehitresprulactions; | |
2d4783a8 | 1078 | extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_selfansweredresprulactions; |
638184e9 | 1079 | extern GlobalStateHolder<NetmaskGroup> g_ACL; |
2e72cc0e | 1080 | |
ecbe9133 | 1081 | extern ComboAddress g_serverControl; // not changed during runtime |
1082 | ||
f0e4dcba | 1083 | extern std::vector<std::tuple<ComboAddress, bool, bool, int, std::string, std::set<int>>> g_locals; // not changed at runtime (we hope XXX) |
a227f47d | 1084 | extern std::vector<shared_ptr<TLSFrontend>> g_tlslocals; |
fbf14b03 | 1085 | extern std::vector<shared_ptr<DOHFrontend>> g_dohlocals; |
6e9fd124 | 1086 | extern std::vector<std::unique_ptr<ClientState>> g_frontends; |
6ad8b29a | 1087 | extern bool g_truncateTC; |
b29edbee | 1088 | extern bool g_fixupCase; |
3f6d07a4 RG |
1089 | extern int g_tcpRecvTimeout; |
1090 | extern int g_tcpSendTimeout; | |
e0b5e49d | 1091 | extern int g_udpTimeout; |
e41f8165 RG |
1092 | extern uint16_t g_maxOutstanding; |
1093 | extern std::atomic<bool> g_configurationDone; | |
6c1ca990 RG |
1094 | extern uint64_t g_maxTCPClientThreads; |
1095 | extern uint64_t g_maxTCPQueuedConnections; | |
9396d955 RG |
1096 | extern size_t g_maxTCPQueriesPerConn; |
1097 | extern size_t g_maxTCPConnectionDuration; | |
1098 | extern size_t g_maxTCPConnectionsPerClient; | |
886e2cf2 | 1099 | extern std::atomic<uint16_t> g_cacheCleaningDelay; |
f65ea0c2 | 1100 | extern std::atomic<uint16_t> g_cacheCleaningPercentage; |
1ea747c0 | 1101 | extern uint32_t g_staleCacheEntriesTTL; |
56d68fad RG |
1102 | extern bool g_apiReadWrite; |
1103 | extern std::string g_apiConfigDirectory; | |
26a3cdb7 | 1104 | extern bool g_servFailOnNoPolicy; |
36e763fa | 1105 | extern uint32_t g_hashperturb; |
edbda1ad | 1106 | extern bool g_useTCPSinglePipe; |
cff9aa03 | 1107 | extern uint16_t g_downstreamTCPCleanupInterval; |
0beaa5c8 | 1108 | extern size_t g_udpVectorSize; |
53c57da7 | 1109 | extern bool g_preserveTrailingData; |
0dffe9e3 | 1110 | extern bool g_allowEmptyResponse; |
32b86928 | 1111 | extern bool g_roundrobinFailOnNoServer; |
2b4287d4 | 1112 | extern double g_consistentHashBalancingFactor; |
ca404e94 | 1113 | |
87b515ed RG |
1114 | #ifdef HAVE_EBPF |
1115 | extern shared_ptr<BPFFilter> g_defaultBPFFilter; | |
8429ad04 | 1116 | extern std::vector<std::shared_ptr<DynBPFFilter> > g_dynBPFFilters; |
87b515ed RG |
1117 | #endif /* HAVE_EBPF */ |
1118 | ||
0beaa5c8 RG |
1119 | struct LocalHolders |
1120 | { | |
2d4783a8 | 1121 | LocalHolders(): acl(g_ACL.getLocal()), policy(g_policy.getLocal()), rulactions(g_rulactions.getLocal()), cacheHitRespRulactions(g_cachehitresprulactions.getLocal()), selfAnsweredRespRulactions(g_selfansweredresprulactions.getLocal()), servers(g_dstates.getLocal()), dynNMGBlock(g_dynblockNMG.getLocal()), dynSMTBlock(g_dynblockSMT.getLocal()), pools(g_pools.getLocal()) |
0beaa5c8 RG |
1122 | { |
1123 | } | |
1124 | ||
1125 | LocalStateHolder<NetmaskGroup> acl; | |
1126 | LocalStateHolder<ServerPolicy> policy; | |
4d5959e6 RG |
1127 | LocalStateHolder<vector<DNSDistRuleAction> > rulactions; |
1128 | LocalStateHolder<vector<DNSDistResponseRuleAction> > cacheHitRespRulactions; | |
2d4783a8 | 1129 | LocalStateHolder<vector<DNSDistResponseRuleAction> > selfAnsweredRespRulactions; |
0beaa5c8 RG |
1130 | LocalStateHolder<servers_t> servers; |
1131 | LocalStateHolder<NetmaskTree<DynBlock> > dynNMGBlock; | |
1132 | LocalStateHolder<SuffixMatchTree<DynBlock> > dynSMTBlock; | |
1133 | LocalStateHolder<pools_t> pools; | |
1134 | }; | |
1135 | ||
ecbe9133 | 1136 | struct dnsheader; |
1137 | ||
1138 | void controlThread(int fd, ComboAddress local); | |
886e2cf2 RG |
1139 | std::shared_ptr<ServerPool> getPool(const pools_t& pools, const std::string& poolName); |
1140 | std::shared_ptr<ServerPool> createPoolIfNotExists(pools_t& pools, const string& poolName); | |
be05aa91 | 1141 | ServerPolicy::NumberedServerVector getDownstreamCandidates(const pools_t& pools, const std::string& poolName); |
da4e7813 | 1142 | |
be05aa91 | 1143 | std::shared_ptr<DownstreamState> firstAvailable(const ServerPolicy::NumberedServerVector& servers, const DNSQuestion* dq); |
ecbe9133 | 1144 | |
be05aa91 RG |
1145 | std::shared_ptr<DownstreamState> leastOutstanding(const ServerPolicy::NumberedServerVector& servers, const DNSQuestion* dq); |
1146 | std::shared_ptr<DownstreamState> wrandom(const ServerPolicy::NumberedServerVector& servers, const DNSQuestion* dq); | |
1147 | std::shared_ptr<DownstreamState> whashed(const ServerPolicy::NumberedServerVector& servers, const DNSQuestion* dq); | |
1148 | std::shared_ptr<DownstreamState> chashed(const ServerPolicy::NumberedServerVector& servers, const DNSQuestion* dq); | |
1149 | std::shared_ptr<DownstreamState> roundrobin(const ServerPolicy::NumberedServerVector& servers, const DNSQuestion* dq); | |
e7c732b8 | 1150 | |
80dbd7d2 CHB |
1151 | struct WebserverConfig |
1152 | { | |
1153 | std::string password; | |
1154 | std::string apiKey; | |
1155 | boost::optional<std::map<std::string, std::string> > customHeaders; | |
1156 | std::mutex lock; | |
1157 | }; | |
1158 | ||
32c97b56 CHB |
1159 | void setWebserverAPIKey(const boost::optional<std::string> apiKey); |
1160 | void setWebserverPassword(const std::string& password); | |
1161 | void setWebserverCustomHeaders(const boost::optional<std::map<std::string, std::string> > customHeaders); | |
1162 | ||
80dbd7d2 | 1163 | void dnsdistWebserverThread(int sock, const ComboAddress& local); |
9b73b71c | 1164 | void tcpAcceptorThread(void* p); |
fbf14b03 RG |
1165 | #ifdef HAVE_DNS_OVER_HTTPS |
1166 | void dohThread(ClientState* cs); | |
1167 | #endif /* HAVE_DNS_OVER_HTTPS */ | |
80a216c9 | 1168 | |
f758857a | 1169 | void setLuaNoSideEffect(); // if nothing has been declared, set that there are no side effects |
1170 | void setLuaSideEffect(); // set to report a side effect, cancelling all _no_ side effect calls | |
1171 | bool getLuaNoSideEffect(); // set if there were only explicit declarations of _no_ side effect | |
1172 | void resetLuaSideEffect(); // reset to indeterminate state | |
11e1e08b | 1173 | |
e7c732b8 | 1174 | bool responseContentMatches(const char* response, const uint16_t responseLen, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const ComboAddress& remote, unsigned int& consumed); |
3e425868 | 1175 | bool processResponse(char** response, uint16_t* responseLen, size_t* responseSize, LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr, size_t addRoom, std::vector<uint8_t>& rewrittenResponse, bool muted); |
2a28db86 | 1176 | bool processRulesResult(const DNSAction::Action& action, DNSQuestion& dq, std::string& ruleresult, bool& drop); |
4ab01344 | 1177 | |
0beaa5c8 | 1178 | bool checkQueryHeaders(const struct dnsheader* dh); |
fcffc585 | 1179 | |
6e9fd124 | 1180 | extern std::vector<std::shared_ptr<DNSCryptContext>> g_dnsCryptLocals; |
43234e76 | 1181 | int handleDNSCryptQuery(char* packet, uint16_t len, std::shared_ptr<DNSCryptQuery> query, uint16_t* decryptedQueryLen, bool tcp, time_t now, std::vector<uint8_t>& response); |
4ab01344 | 1182 | boost::optional<std::vector<uint8_t>> checkDNSCryptQuery(const ClientState& cs, const char* query, uint16_t& len, std::shared_ptr<DNSCryptQuery>& dnsCryptQuery, time_t now, bool tcp); |
9f4eb5cc | 1183 | |
18f707fa | 1184 | bool addXPF(DNSQuestion& dq, uint16_t optionCode); |
5cc8371b | 1185 | |
555970c9 RG |
1186 | uint16_t getRandomDNSID(); |
1187 | ||
9f4eb5cc RG |
1188 | #include "dnsdist-snmp.hh" |
1189 | ||
1190 | extern bool g_snmpEnabled; | |
1191 | extern bool g_snmpTrapsEnabled; | |
1192 | extern DNSDistSNMPAgent* g_snmpAgent; | |
e7c732b8 RG |
1193 | extern bool g_addEDNSToSelfGeneratedResponses; |
1194 | ||
83fe2c55 | 1195 | extern std::set<std::string> g_capabilitiesToRetain; |
8179b6d6 RG |
1196 | static const uint16_t s_udpIncomingBufferSize{1500}; // don't accept UDP queries larger than this value |
1197 | static const size_t s_maxPacketCacheEntrySize{4096}; // don't cache responses larger than this value | |
4ab01344 | 1198 | |
3e425868 RG |
1199 | enum class ProcessQueryResult { Drop, SendAnswer, PassToBackend }; |
1200 | ProcessQueryResult processQuery(DNSQuestion& dq, ClientState& cs, LocalHolders& holders, std::shared_ptr<DownstreamState>& selectedBackend); | |
4ab01344 | 1201 | |
d0ae6360 RG |
1202 | DNSResponse makeDNSResponseFromIDState(IDState& ids, struct dnsheader* dh, size_t bufferSize, uint16_t responseLen, bool isTCP); |
1203 | void setIDStateFromDNSQuestion(IDState& ids, DNSQuestion& dq, DNSName&& qname); | |
fbf14b03 RG |
1204 | |
1205 | int pickBackendSocketForSending(std::shared_ptr<DownstreamState>& state); | |
1206 | ssize_t udpClientSendRequestToBackend(const std::shared_ptr<DownstreamState>& ss, const int sd, const char* request, const size_t requestLen, bool healthCheck=false); |