pdns/dnsdist.hh

   1 /*
   2  * This file is part of PowerDNS or dnsdist.
   3  * Copyright -- PowerDNS.COM B.V. and its contributors
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of version 2 of the GNU General Public License as
   7  * published by the Free Software Foundation.
   8  *
   9  * In addition, for the avoidance of any doubt, permission is granted to
  10  * link this program with OpenSSL and to (re)distribute the binaries
  11  * produced as the result of such linking.
  12  *
  13  * This program is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  21  */
  22 #pragma once
  23 #include "config.h"
  24 #include "ext/luawrapper/include/LuaContext.hpp"
  25
  26 #include <atomic>
  27 #include <mutex>
  28 #include <string>
  29 #include <thread>
  30 #include <time.h>
  31 #include <unistd.h>
  32 #include <unordered_map>
  33
  34 #include <boost/variant.hpp>
  35
  36 #include "bpf-filter.hh"
  37 #include "capabilities.hh"
  38 #include "circular_buffer.hh"
  39 #include "dnscrypt.hh"
  40 #include "dnsdist-cache.hh"
  41 #include "dnsdist-dynbpf.hh"
  42 #include "dnsname.hh"
  43 #include "doh.hh"
  44 #include "ednsoptions.hh"
  45 #include "gettime.hh"
  46 #include "iputils.hh"
  47 #include "misc.hh"
  48 #include "mplexer.hh"
  49 #include "sholder.hh"
  50 #include "tcpiohandler.hh"
  51 #include "uuid-utils.hh"
  52
  53 void carbonDumpThread();
  54 uint64_t uptimeOfProcess(const std::string& str);
  55
  56 extern uint16_t g_ECSSourcePrefixV4;
  57 extern uint16_t g_ECSSourcePrefixV6;
  58 extern bool g_ECSOverride;
  59
  60 typedef std::unordered_map<string, string> QTag;
  61
  62 struct DNSQuestion
  63 {
  64   DNSQuestion(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t queryLen, bool isTcp, const struct timespec* queryTime_):
  65     qname(name), local(lc), remote(rem), dh(header), queryTime(queryTime_), size(bufferSize), consumed(consumed_), tempFailureTTL(boost::none), qtype(type), qclass(class_), len(queryLen), ecsPrefixLength(rem->sin4.sin_family == AF_INET ? g_ECSSourcePrefixV4 : g_ECSSourcePrefixV6), tcp(isTcp), ecsOverride(g_ECSOverride) {
  66     const uint16_t* flags = getFlagsFromDNSHeader(dh);
  67     origFlags = *flags;
  68   }
  69   DNSQuestion(const DNSQuestion&) = delete;
  70   DNSQuestion& operator=(const DNSQuestion&) = delete;
  71   DNSQuestion(DNSQuestion&&) = default;
  72
  73 #ifdef HAVE_PROTOBUF
  74   boost::optional<boost::uuids::uuid> uniqueId;
  75 #endif
  76   Netmask ecs;
  77   boost::optional<Netmask> subnet;
  78   std::string sni; /* Server Name Indication, if any (DoT or DoH) */
  79   std::string poolname;
  80   const DNSName* qname{nullptr};
  81   const ComboAddress* local{nullptr};
  82   const ComboAddress* remote{nullptr};
  83   std::shared_ptr<QTag> qTag{nullptr};
  84   std::shared_ptr<std::map<uint16_t, EDNSOptionView> > ednsOptions;
  85   std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
  86   std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
  87   struct dnsheader* dh{nullptr};
  88   const struct timespec* queryTime{nullptr};
  89   struct DOHUnit* du{nullptr};
  90   size_t size;
  91   unsigned int consumed{0};
  92   int delayMsec{0};
  93   boost::optional<uint32_t> tempFailureTTL;
  94   uint32_t cacheKeyNoECS;
  95   uint32_t cacheKey;
  96   const uint16_t qtype;
  97   const uint16_t qclass;
  98   uint16_t len;
  99   uint16_t ecsPrefixLength;
 100   uint16_t origFlags;
 101   uint8_t ednsRCode{0};
 102   const bool tcp;
 103   bool skipCache{false};
 104   bool ecsOverride;
 105   bool useECS{true};
 106   bool addXPF{true};
 107   bool ecsSet{false};
 108   bool ecsAdded{false};
 109   bool ednsAdded{false};
 110   bool useZeroScope{false};
 111   bool dnssecOK{false};
 112 };
 113
 114 struct DNSResponse : DNSQuestion
 115 {
 116   DNSResponse(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t responseLen, bool isTcp, const struct timespec* queryTime_):
 117     DNSQuestion(name, type, class_, consumed, lc, rem, header, bufferSize, responseLen, isTcp, queryTime_) { }
 118   DNSResponse(const DNSResponse&) = delete;
 119   DNSResponse& operator=(const DNSResponse&) = delete;
 120   DNSResponse(DNSResponse&&) = default;
 121 };
 122
 123 /* so what could you do:
 124    drop,
 125    fake up nxdomain,
 126    provide actual answer,
 127    allow & and stop processing,
 128    continue processing,
 129    modify header:    (servfail|refused|notimp), set TC=1,
 130    send to pool */
 131
 132 class DNSAction
 133 {
 134 public:
 135   enum class Action { Drop, Nxdomain, Refused, Spoof, Allow, HeaderModify, Pool, Delay, Truncate, ServFail, None, NoOp, NoRecurse };
 136   static std::string typeToString(const Action& action)
 137   {
 138     switch(action) {
 139     case Action::Drop:
 140       return "Drop";
 141     case Action::Nxdomain:
 142       return "Send NXDomain";
 143     case Action::Refused:
 144       return "Send Refused";
 145     case Action::Spoof:
 146       return "Spoof an answer";
 147     case Action::Allow:
 148       return "Allow";
 149     case Action::HeaderModify:
 150       return "Modify the header";
 151     case Action::Pool:
 152       return "Route to a pool";
 153     case Action::Delay:
 154       return "Delay";
 155     case Action::Truncate:
 156       return "Truncate over UDP";
 157     case Action::ServFail:
 158       return "Send ServFail";
 159     case Action::None:
 160     case Action::NoOp:
 161       return "Do nothing";
 162     case Action::NoRecurse:
 163       return "Set rd=0";
 164     }
 165
 166     return "Unknown";
 167   }
 168
 169   virtual Action operator()(DNSQuestion*, string* ruleresult) const =0;
 170   virtual ~DNSAction()
 171   {
 172   }
 173   virtual string toString() const = 0;
 174   virtual std::map<string, double> getStats() const
 175   {
 176     return {{}};
 177   }
 178 };
 179
 180 class DNSResponseAction
 181 {
 182 public:
 183   enum class Action { Allow, Delay, Drop, HeaderModify, ServFail, None };
 184   virtual Action operator()(DNSResponse*, string* ruleresult) const =0;
 185   virtual ~DNSResponseAction()
 186   {
 187   }
 188   virtual string toString() const = 0;
 189 };
 190
 191 struct DynBlock
 192 {
 193   DynBlock(): action(DNSAction::Action::None), warning(false)
 194   {
 195   }
 196
 197   DynBlock(const std::string& reason_, const struct timespec& until_, const DNSName& domain_, DNSAction::Action action_): reason(reason_), until(until_), domain(domain_), action(action_), warning(false)
 198   {
 199   }
 200
 201   DynBlock(const DynBlock& rhs): reason(rhs.reason), until(rhs.until), domain(rhs.domain), action(rhs.action), warning(rhs.warning)
 202   {
 203     blocks.store(rhs.blocks);
 204   }
 205
 206   DynBlock& operator=(const DynBlock& rhs)
 207   {
 208     reason=rhs.reason;
 209     until=rhs.until;
 210     domain=rhs.domain;
 211     action=rhs.action;
 212     blocks.store(rhs.blocks);
 213     warning=rhs.warning;
 214     return *this;
 215   }
 216
 217   string reason;
 218   struct timespec until;
 219   DNSName domain;
 220   DNSAction::Action action;
 221   mutable std::atomic<unsigned int> blocks;
 222   bool warning;
 223 };
 224
 225 extern GlobalStateHolder<NetmaskTree<DynBlock>> g_dynblockNMG;
 226
 227 extern vector<pair<struct timeval, std::string> > g_confDelta;
 228
 229 extern uint64_t getLatencyCount(const std::string&);
 230
 231 struct DNSDistStats
 232 {
 233   using stat_t=std::atomic<uint64_t>; // aww yiss ;-)
 234   stat_t responses{0};
 235   stat_t servfailResponses{0};
 236   stat_t queries{0};
 237   stat_t frontendNXDomain{0};
 238   stat_t frontendServFail{0};
 239   stat_t frontendNoError{0};
 240   stat_t nonCompliantQueries{0};
 241   stat_t nonCompliantResponses{0};
 242   stat_t rdQueries{0};
 243   stat_t emptyQueries{0};
 244   stat_t aclDrops{0};
 245   stat_t dynBlocked{0};
 246   stat_t ruleDrop{0};
 247   stat_t ruleNXDomain{0};
 248   stat_t ruleRefused{0};
 249   stat_t ruleServFail{0};
 250   stat_t selfAnswered{0};
 251   stat_t downstreamTimeouts{0};
 252   stat_t downstreamSendErrors{0};
 253   stat_t truncFail{0};
 254   stat_t noPolicy{0};
 255   stat_t cacheHits{0};
 256   stat_t cacheMisses{0};
 257   stat_t latency0_1{0}, latency1_10{0}, latency10_50{0}, latency50_100{0}, latency100_1000{0}, latencySlow{0}, latencySum{0};
 258   stat_t securityStatus{0};
 259
 260   double latencyAvg100{0}, latencyAvg1000{0}, latencyAvg10000{0}, latencyAvg1000000{0};
 261   typedef std::function<uint64_t(const std::string&)> statfunction_t;
 262   typedef boost::variant<stat_t*, double*, statfunction_t> entry_t;
 263   std::vector<std::pair<std::string, entry_t>> entries{
 264     {"responses", &responses},
 265     {"servfail-responses", &servfailResponses},
 266     {"queries", &queries},
 267     {"frontend-nxdomain", &frontendNXDomain},
 268     {"frontend-servfail", &frontendServFail},
 269     {"frontend-noerror", &frontendNoError},
 270     {"acl-drops", &aclDrops},
 271     {"rule-drop", &ruleDrop},
 272     {"rule-nxdomain", &ruleNXDomain},
 273     {"rule-refused", &ruleRefused},
 274     {"rule-servfail", &ruleServFail},
 275     {"self-answered", &selfAnswered},
 276     {"downstream-timeouts", &downstreamTimeouts},
 277     {"downstream-send-errors", &downstreamSendErrors},
 278     {"trunc-failures", &truncFail},
 279     {"no-policy", &noPolicy},
 280     {"latency0-1", &latency0_1},
 281     {"latency1-10", &latency1_10},
 282     {"latency10-50", &latency10_50},
 283     {"latency50-100", &latency50_100},
 284     {"latency100-1000", &latency100_1000},
 285     {"latency-slow", &latencySlow},
 286     {"latency-avg100", &latencyAvg100},
 287     {"latency-avg1000", &latencyAvg1000},
 288     {"latency-avg10000", &latencyAvg10000},
 289     {"latency-avg1000000", &latencyAvg1000000},
 290     {"uptime", uptimeOfProcess},
 291     {"real-memory-usage", getRealMemoryUsage},
 292     {"special-memory-usage", getSpecialMemoryUsage},
 293     {"noncompliant-queries", &nonCompliantQueries},
 294     {"noncompliant-responses", &nonCompliantResponses},
 295     {"rdqueries", &rdQueries},
 296     {"empty-queries", &emptyQueries},
 297     {"cache-hits", &cacheHits},
 298     {"cache-misses", &cacheMisses},
 299     {"cpu-user-msec", getCPUTimeUser},
 300     {"cpu-sys-msec", getCPUTimeSystem},
 301     {"fd-usage", getOpenFileDescriptors},
 302     {"dyn-blocked", &dynBlocked},
 303     {"dyn-block-nmg-size", [](const std::string&) { return g_dynblockNMG.getLocal()->size(); }},
 304     {"security-status", &securityStatus},
 305     // Latency histogram
 306     {"latency-sum", &latencySum},
 307     {"latency-count", getLatencyCount},
 308   };
 309 };
 310
 311 // Metric types for Prometheus
 312 enum class PrometheusMetricType: int {
 313     counter = 1,
 314     gauge = 2
 315 };
 316
 317 // Keeps additional information about metrics
 318 struct MetricDefinition {
 319   MetricDefinition(PrometheusMetricType _prometheusType, const std::string& _description): description(_description), prometheusType(_prometheusType) {
 320   }
 321
 322   MetricDefinition() = default;
 323
 324   // Metric description
 325   std::string description;
 326   // Metric type for Prometheus
 327   PrometheusMetricType prometheusType;
 328 };
 329
 330 struct MetricDefinitionStorage {
 331   // Return metric definition by name
 332   bool getMetricDetails(std::string metricName, MetricDefinition& metric) {
 333   auto metricDetailsIter = metrics.find(metricName);
 334
 335   if (metricDetailsIter == metrics.end()) {
 336     return false;
 337   }
 338
 339   metric = metricDetailsIter->second;
 340     return true;
 341   };
 342
 343   // Return string representation of Prometheus metric type
 344   std::string getPrometheusStringMetricType(PrometheusMetricType metricType) {
 345     switch (metricType) {
 346       case PrometheusMetricType::counter:
 347         return "counter";
 348         break;
 349       case PrometheusMetricType::gauge:
 350         return "gauge";
 351         break;
 352       default:
 353         return "";
 354         break;
 355     }
 356   };
 357
 358   std::map<std::string, MetricDefinition> metrics = {
 359     { "responses",              MetricDefinition(PrometheusMetricType::counter, "Number of responses received from backends") },
 360     { "servfail-responses",     MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received from backends") },
 361     { "queries",                MetricDefinition(PrometheusMetricType::counter, "Number of received queries")},
 362     { "frontend-nxdomain",      MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers sent to clients")},
 363     { "frontend-servfail",      MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers sent to clients")},
 364     { "frontend-noerror",       MetricDefinition(PrometheusMetricType::counter, "Number of NoError answers sent to clients")},
 365     { "acl-drops",              MetricDefinition(PrometheusMetricType::counter, "Number of packets dropped because of the ACL")},
 366     { "rule-drop",              MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a rule")},
 367     { "rule-nxdomain",          MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers returned because of a rule")},
 368     { "rule-refused",           MetricDefinition(PrometheusMetricType::counter, "Number of Refused answers returned because of a rule")},
 369     { "rule-servfail",          MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received because of a rule")},
 370     { "self-answered",          MetricDefinition(PrometheusMetricType::counter, "Number of self-answered responses")},
 371     { "downstream-timeouts",    MetricDefinition(PrometheusMetricType::counter, "Number of queries not answered in time by a backend")},
 372     { "downstream-send-errors", MetricDefinition(PrometheusMetricType::counter, "Number of errors when sending a query to a backend")},
 373     { "trunc-failures",         MetricDefinition(PrometheusMetricType::counter, "Number of errors encountered while truncating an answer")},
 374     { "no-policy",              MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because no server was available")},
 375     { "latency0-1",             MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in less than 1ms")},
 376     { "latency1-10",            MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 1-10 ms")},
 377     { "latency10-50",           MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 10-50 ms")},
 378     { "latency50-100",          MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 50-100 ms")},
 379     { "latency100-1000",        MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 100-1000 ms")},
 380     { "latency-slow",           MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in more than 1 second")},
 381     { "latency-avg100",         MetricDefinition(PrometheusMetricType::gauge,   "Average response latency in microseconds of the last 100 packets")},
 382     { "latency-avg1000",        MetricDefinition(PrometheusMetricType::gauge,   "Average response latency in microseconds of the last 1000 packets")},
 383     { "latency-avg10000",       MetricDefinition(PrometheusMetricType::gauge,   "Average response latency in microseconds of the last 10000 packets")},
 384     { "latency-avg1000000",     MetricDefinition(PrometheusMetricType::gauge,   "Average response latency in microseconds of the last 1000000 packets")},
 385     { "uptime",                 MetricDefinition(PrometheusMetricType::gauge,   "Uptime of the dnsdist process in seconds")},
 386     { "real-memory-usage",      MetricDefinition(PrometheusMetricType::gauge,   "Current memory usage in bytes")},
 387     { "noncompliant-queries",   MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped as non-compliant")},
 388     { "noncompliant-responses", MetricDefinition(PrometheusMetricType::counter, "Number of answers from a backend dropped as non-compliant")},
 389     { "rdqueries",              MetricDefinition(PrometheusMetricType::counter, "Number of received queries with the recursion desired bit set")},
 390     { "empty-queries",          MetricDefinition(PrometheusMetricType::counter, "Number of empty queries received from clients")},
 391     { "cache-hits",             MetricDefinition(PrometheusMetricType::counter, "Number of times an answer was retrieved from cache")},
 392     { "cache-misses",           MetricDefinition(PrometheusMetricType::counter, "Number of times an answer not found in the cache")},
 393     { "cpu-user-msec",          MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the user state")},
 394     { "cpu-sys-msec",           MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the system state")},
 395     { "fd-usage",               MetricDefinition(PrometheusMetricType::gauge,   "Number of currently used file descriptors")},
 396     { "dyn-blocked",            MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a dynamic block")},
 397     { "dyn-block-nmg-size",     MetricDefinition(PrometheusMetricType::gauge,   "Number of dynamic blocks entries") },
 398     { "security-status",        MetricDefinition(PrometheusMetricType::gauge,   "Security status of this software. 0=unknown, 1=OK, 2=upgrade recommended, 3=upgrade mandatory") },
 399     // Latency histogram
 400     { "latency-sum",            MetricDefinition(PrometheusMetricType::counter, "Total response time in milliseconds")},
 401     { "latency-count",          MetricDefinition(PrometheusMetricType::counter, "Number of queries contributing to response time histogram")},
 402   };
 403 };
 404
 405 extern MetricDefinitionStorage g_metricDefinitions;
 406 extern struct DNSDistStats g_stats;
 407 void doLatencyStats(double udiff);
 408
 409
 410 struct StopWatch
 411 {
 412   StopWatch(bool realTime=false): d_needRealTime(realTime)
 413   {
 414   }
 415   struct timespec d_start{0,0};
 416   bool d_needRealTime{false};
 417
 418   void start() {
 419     if(gettime(&d_start, d_needRealTime) < 0)
 420       unixDie("Getting timestamp");
 421
 422   }
 423
 424   void set(const struct timespec& from) {
 425     d_start = from;
 426   }
 427
 428   double udiff() const {
 429     struct timespec now;
 430     if(gettime(&now, d_needRealTime) < 0)
 431       unixDie("Getting timestamp");
 432
 433     return 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
 434   }
 435
 436   double udiffAndSet() {
 437     struct timespec now;
 438     if(gettime(&now, d_needRealTime) < 0)
 439       unixDie("Getting timestamp");
 440
 441     auto ret= 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
 442     d_start = now;
 443     return ret;
 444   }
 445
 446 };
 447
 448 class BasicQPSLimiter
 449 {
 450 public:
 451   BasicQPSLimiter()
 452   {
 453   }
 454
 455   BasicQPSLimiter(unsigned int burst): d_tokens(burst)
 456   {
 457     d_prev.start();
 458   }
 459
 460   bool check(unsigned int rate, unsigned int burst) const // this is not quite fair
 461   {
 462     auto delta = d_prev.udiffAndSet();
 463
 464     if(delta > 0.0) // time, frequently, does go backwards..
 465       d_tokens += 1.0 * rate * (delta/1000000.0);
 466
 467     if(d_tokens > burst) {
 468       d_tokens = burst;
 469     }
 470
 471     bool ret=false;
 472     if(d_tokens >= 1.0) { // we need this because burst=1 is weird otherwise
 473       ret=true;
 474       --d_tokens;
 475     }
 476
 477     return ret;
 478   }
 479
 480   bool seenSince(const struct timespec& cutOff) const
 481   {
 482     return cutOff < d_prev.d_start;
 483   }
 484
 485 protected:
 486   mutable StopWatch d_prev;
 487   mutable double d_tokens;
 488 };
 489
 490 class QPSLimiter : public BasicQPSLimiter
 491 {
 492 public:
 493   QPSLimiter(): BasicQPSLimiter()
 494   {
 495   }
 496
 497   QPSLimiter(unsigned int rate, unsigned int burst): BasicQPSLimiter(burst), d_rate(rate), d_burst(burst), d_passthrough(false)
 498   {
 499     d_prev.start();
 500   }
 501
 502   unsigned int getRate() const
 503   {
 504     return d_passthrough ? 0 : d_rate;
 505   }
 506
 507   int getPassed() const
 508   {
 509     return d_passed;
 510   }
 511
 512   int getBlocked() const
 513   {
 514     return d_blocked;
 515   }
 516
 517   bool check() const // this is not quite fair
 518   {
 519     if (d_passthrough) {
 520       return true;
 521     }
 522
 523     bool ret = BasicQPSLimiter::check(d_rate, d_burst);
 524     if (ret) {
 525       d_passed++;
 526     }
 527     else {
 528       d_blocked++;
 529     }
 530
 531     return ret;
 532   }
 533 private:
 534   mutable unsigned int d_passed{0};
 535   mutable unsigned int d_blocked{0};
 536   unsigned int d_rate;
 537   unsigned int d_burst;
 538   bool d_passthrough{true};
 539 };
 540
 541 struct ClientState;
 542
 543 struct IDState
 544 {
 545   IDState(): sentTime(true), delayMsec(0), tempFailureTTL(boost::none) { origDest.sin4.sin_family = 0;}
 546   IDState(const IDState& orig): origRemote(orig.origRemote), origDest(orig.origDest), age(orig.age)
 547   {
 548     usageIndicator.store(orig.usageIndicator.load());
 549     origFD = orig.origFD;
 550     origID = orig.origID;
 551     delayMsec = orig.delayMsec;
 552     tempFailureTTL = orig.tempFailureTTL;
 553   }
 554
 555   static const int64_t unusedIndicator = -1;
 556
 557   static bool isInUse(int64_t usageIndicator)
 558   {
 559     return usageIndicator != unusedIndicator;
 560   }
 561
 562   bool isInUse() const
 563   {
 564     return usageIndicator != unusedIndicator;
 565   }
 566
 567   /* return true if the value has been successfully replaced meaning that
 568      no-one updated the usage indicator in the meantime */
 569   bool tryMarkUnused(int64_t expectedUsageIndicator)
 570   {
 571     return usageIndicator.compare_exchange_strong(expectedUsageIndicator, unusedIndicator);
 572   }
 573
 574   /* mark as unused no matter what, return true if the state was in use before */
 575   bool markAsUsed()
 576   {
 577     auto currentGeneration = generation++;
 578     return markAsUsed(currentGeneration);
 579   }
 580
 581   /* mark as unused no matter what, return true if the state was in use before */
 582   bool markAsUsed(int64_t currentGeneration)
 583   {
 584     int64_t oldUsage = usageIndicator.exchange(currentGeneration);
 585     return oldUsage != unusedIndicator;
 586   }
 587
 588   /* We use this value to detect whether this state is in use.
 589      For performance reasons we don't want to use a lock here, but that means
 590      we need to be very careful when modifying this value. Modifications happen
 591      from:
 592      - one of the UDP or DoH 'client' threads receiving a query, selecting a backend
 593        then picking one of the states associated to this backend (via the idOffset).
 594        Most of the time this state should not be in use and usageIndicator is -1, but we
 595        might not yet have received a response for the query previously associated to this
 596        state, meaning that we will 'reuse' this state and erase the existing state.
 597        If we ever receive a response for this state, it will be discarded. This is
 598        mostly fine for UDP except that we still need to be careful in order to miss
 599        the 'outstanding' counters, which should only be increased when we are picking
 600        an empty state, and not when reusing ;
 601        For DoH, though, we have dynamically allocated a DOHUnit object that needs to
 602        be freed, as well as internal objects internals to libh2o.
 603      - one of the UDP receiver threads receiving a response from a backend, picking
 604        the corresponding state and sending the response to the client ;
 605      - the 'healthcheck' thread scanning the states to actively discover timeouts,
 606        mostly to keep some counters like the 'outstanding' one sane.
 607      We previously based that logic on the origFD (FD on which the query was received,
 608      and therefore from where the response should be sent) but this suffered from an
 609      ABA problem since it was quite likely that a UDP 'client thread' would reset it to the
 610      same value since we only have so much incoming sockets:
 611      - 1/ 'client' thread gets a query and set origFD to its FD, say 5 ;
 612      - 2/ 'receiver' thread gets a response, read the value of origFD to 5, check that the qname,
 613        qtype and qclass match
 614      - 3/ during that time the 'client' thread reuses the state, setting again origFD to 5 ;
 615      - 4/ the 'receiver' thread uses compare_exchange_strong() to only replace the value if it's still
 616        5, except it's not the same 5 anymore and it overrides a fresh state.
 617      We now use a 32-bit unsigned counter instead, which is incremented every time the state is set,
 618      wrapping around if necessary, and we set an atomic signed 64-bit value, so that we still have -1
 619      when the state is unused and the value of our counter otherwise.
 620   */
 621   std::atomic<int64_t> usageIndicator{unusedIndicator};  // set to unusedIndicator to indicate this state is empty   // 8
 622   std::atomic<uint32_t> generation{0}; // increased every time a state is used, to be able to detect an ABA issue    // 4
 623   ComboAddress origRemote;                                    // 28
 624   ComboAddress origDest;                                      // 28
 625   StopWatch sentTime;                                         // 16
 626   DNSName qname;                                              // 80
 627   std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
 628 #ifdef HAVE_PROTOBUF
 629   boost::optional<boost::uuids::uuid> uniqueId;
 630 #endif
 631   boost::optional<Netmask> subnet{boost::none};
 632   std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
 633   std::shared_ptr<QTag> qTag{nullptr};
 634   const ClientState* cs{nullptr};
 635   DOHUnit* du{nullptr};
 636   uint32_t cacheKey;                                          // 4
 637   uint32_t cacheKeyNoECS;                                     // 4
 638   uint16_t age;                                               // 4
 639   uint16_t qtype;                                             // 2
 640   uint16_t qclass;                                            // 2
 641   uint16_t origID;                                            // 2
 642   uint16_t origFlags;                                         // 2
 643   int origFD{-1};
 644   int delayMsec;
 645   boost::optional<uint32_t> tempFailureTTL;
 646   bool ednsAdded{false};
 647   bool ecsAdded{false};
 648   bool skipCache{false};
 649   bool destHarvested{false}; // if true, origDest holds the original dest addr, otherwise the listening addr
 650   bool dnssecOK{false};
 651   bool useZeroScope;
 652 };
 653
 654 typedef std::unordered_map<string, unsigned int> QueryCountRecords;
 655 typedef std::function<std::tuple<bool, string>(const DNSQuestion* dq)> QueryCountFilter;
 656 struct QueryCount {
 657   QueryCount()
 658   {
 659     pthread_rwlock_init(&queryLock, nullptr);
 660   }
 661   QueryCountRecords records;
 662   QueryCountFilter filter;
 663   pthread_rwlock_t queryLock;
 664   bool enabled{false};
 665 };
 666
 667 extern QueryCount g_qcount;
 668
 669 struct ClientState
 670 {
 671   ClientState(const ComboAddress& local_, bool isTCP, bool doReusePort, int fastOpenQueue, const std::string& itfName, const std::set<int>& cpus_): cpus(cpus_), local(local_), interface(itfName), fastOpenQueueSize(fastOpenQueue), tcp(isTCP), reuseport(doReusePort)
 672   {
 673   }
 674
 675   std::set<int> cpus;
 676   ComboAddress local;
 677   std::shared_ptr<DNSCryptContext> dnscryptCtx{nullptr};
 678   std::shared_ptr<TLSFrontend> tlsFrontend{nullptr};
 679   std::shared_ptr<DOHFrontend> dohFrontend{nullptr};
 680   std::string interface;
 681   std::atomic<uint64_t> queries{0};
 682   mutable std::atomic<uint64_t> responses{0};
 683   std::atomic<uint64_t> tcpDiedReadingQuery{0};
 684   std::atomic<uint64_t> tcpDiedSendingResponse{0};
 685   std::atomic<uint64_t> tcpGaveUp{0};
 686   std::atomic<uint64_t> tcpClientTimeouts{0};
 687   std::atomic<uint64_t> tcpDownstreamTimeouts{0};
 688   std::atomic<uint64_t> tcpCurrentConnections{0};
 689   std::atomic<uint64_t> tlsNewSessions{0}; // A new TLS session has been negotiated, no resumption
 690   std::atomic<uint64_t> tlsResumptions{0}; // A TLS session has been resumed, either via session id or via a TLS ticket
 691
 692   std::atomic<double> tcpAvgQueriesPerConnection{0.0};
 693   /* in ms */
 694   std::atomic<double> tcpAvgConnectionDuration{0.0};
 695   int udpFD{-1};
 696   int tcpFD{-1};
 697   int fastOpenQueueSize{0};
 698   bool muted{false};
 699   bool tcp;
 700   bool reuseport;
 701   bool ready{false};
 702
 703   int getSocket() const
 704   {
 705     return udpFD != -1 ? udpFD : tcpFD;
 706   }
 707
 708   bool isUDP() const
 709   {
 710     return udpFD != -1;
 711   }
 712
 713   bool isTCP() const
 714   {
 715     return udpFD == -1;
 716   }
 717
 718   std::string getType() const
 719   {
 720     std::string result = udpFD != -1 ? "UDP" : "TCP";
 721
 722     if (dohFrontend) {
 723       result += " (DNS over HTTPS)";
 724     }
 725     else if (tlsFrontend) {
 726       result += " (DNS over TLS)";
 727     }
 728     else if (dnscryptCtx) {
 729       result += " (DNSCrypt)";
 730     }
 731
 732     return result;
 733   }
 734
 735 #ifdef HAVE_EBPF
 736   shared_ptr<BPFFilter> d_filter;
 737
 738   void detachFilter()
 739   {
 740     if (d_filter) {
 741       d_filter->removeSocket(getSocket());
 742       d_filter = nullptr;
 743     }
 744   }
 745
 746   void attachFilter(shared_ptr<BPFFilter> bpf)
 747   {
 748     detachFilter();
 749
 750     bpf->addSocket(getSocket());
 751     d_filter = bpf;
 752   }
 753 #endif /* HAVE_EBPF */
 754
 755   void updateTCPMetrics(size_t queries, uint64_t durationMs)
 756   {
 757     tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (queries / 100.0);
 758     tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
 759   }
 760 };
 761
 762 class TCPClientCollection {
 763   std::vector<int> d_tcpclientthreads;
 764   std::atomic<uint64_t> d_numthreads{0};
 765   std::atomic<uint64_t> d_pos{0};
 766   std::atomic<uint64_t> d_queued{0};
 767   const uint64_t d_maxthreads{0};
 768   std::mutex d_mutex;
 769   int d_singlePipe[2];
 770   const bool d_useSinglePipe;
 771 public:
 772
 773   TCPClientCollection(size_t maxThreads, bool useSinglePipe=false): d_maxthreads(maxThreads), d_singlePipe{-1,-1}, d_useSinglePipe(useSinglePipe)
 774
 775   {
 776     d_tcpclientthreads.reserve(maxThreads);
 777
 778     if (d_useSinglePipe) {
 779       if (pipe(d_singlePipe) < 0) {
 780         int err = errno;
 781         throw std::runtime_error("Error creating the TCP single communication pipe: " + stringerror(err));
 782       }
 783
 784       if (!setNonBlocking(d_singlePipe[0])) {
 785         int err = errno;
 786         close(d_singlePipe[0]);
 787         close(d_singlePipe[1]);
 788         throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err));
 789       }
 790
 791       if (!setNonBlocking(d_singlePipe[1])) {
 792         int err = errno;
 793         close(d_singlePipe[0]);
 794         close(d_singlePipe[1]);
 795         throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err));
 796       }
 797     }
 798   }
 799   int getThread()
 800   {
 801     uint64_t pos = d_pos++;
 802     ++d_queued;
 803     return d_tcpclientthreads[pos % d_numthreads];
 804   }
 805   bool hasReachedMaxThreads() const
 806   {
 807     return d_numthreads >= d_maxthreads;
 808   }
 809   uint64_t getThreadsCount() const
 810   {
 811     return d_numthreads;
 812   }
 813   uint64_t getQueuedCount() const
 814   {
 815     return d_queued;
 816   }
 817   void decrementQueuedCount()
 818   {
 819     --d_queued;
 820   }
 821   void addTCPClientThread();
 822 };
 823
 824 extern std::unique_ptr<TCPClientCollection> g_tcpclientthreads;
 825
 826 struct DownstreamState
 827 {
 828    typedef std::function<std::tuple<DNSName, uint16_t, uint16_t>(const DNSName&, uint16_t, uint16_t, dnsheader*)> checkfunc_t;
 829
 830   DownstreamState(const ComboAddress& remote_, const ComboAddress& sourceAddr_, unsigned int sourceItf, size_t numberOfSockets);
 831   DownstreamState(const ComboAddress& remote_): DownstreamState(remote_, ComboAddress(), 0, 1) {}
 832   ~DownstreamState()
 833   {
 834     for (auto& fd : sockets) {
 835       if (fd >= 0) {
 836         close(fd);
 837         fd = -1;
 838       }
 839     }
 840   }
 841   boost::uuids::uuid id;
 842   std::set<unsigned int> hashes;
 843   mutable pthread_rwlock_t d_lock;
 844   std::vector<int> sockets;
 845   std::mutex socketsLock;
 846   std::mutex connectLock;
 847   std::unique_ptr<FDMultiplexer> mplexer{nullptr};
 848   std::thread tid;
 849   const ComboAddress remote;
 850   QPSLimiter qps;
 851   vector<IDState> idStates;
 852   const ComboAddress sourceAddr;
 853   checkfunc_t checkFunction;
 854   DNSName checkName{"a.root-servers.net."};
 855   QType checkType{QType::A};
 856   uint16_t checkClass{QClass::IN};
 857   std::atomic<uint64_t> idOffset{0};
 858   std::atomic<uint64_t> sendErrors{0};
 859   std::atomic<uint64_t> outstanding{0};
 860   std::atomic<uint64_t> reuseds{0};
 861   std::atomic<uint64_t> queries{0};
 862   std::atomic<uint64_t> responses{0};
 863   struct {
 864     std::atomic<uint64_t> sendErrors{0};
 865     std::atomic<uint64_t> reuseds{0};
 866     std::atomic<uint64_t> queries{0};
 867   } prev;
 868   std::atomic<uint64_t> tcpDiedSendingQuery{0};
 869   std::atomic<uint64_t> tcpDiedReadingResponse{0};
 870   std::atomic<uint64_t> tcpGaveUp{0};
 871   std::atomic<uint64_t> tcpReadTimeouts{0};
 872   std::atomic<uint64_t> tcpWriteTimeouts{0};
 873   std::atomic<uint64_t> tcpCurrentConnections{0};
 874   std::atomic<double> tcpAvgQueriesPerConnection{0.0};
 875   /* in ms */
 876   std::atomic<double> tcpAvgConnectionDuration{0.0};
 877   string name;
 878   size_t socketsOffset{0};
 879   double queryLoad{0.0};
 880   double dropRate{0.0};
 881   double latencyUsec{0.0};
 882   int order{1};
 883   int weight{1};
 884   int tcpConnectTimeout{5};
 885   int tcpRecvTimeout{30};
 886   int tcpSendTimeout{30};
 887   unsigned int checkInterval{1};
 888   unsigned int lastCheck{0};
 889   const unsigned int sourceItf{0};
 890   uint16_t retries{5};
 891   uint16_t xpfRRCode{0};
 892   uint16_t checkTimeout{1000}; /* in milliseconds */
 893   uint8_t currentCheckFailures{0};
 894   uint8_t consecutiveSuccessfulChecks{0};
 895   uint8_t maxCheckFailures{1};
 896   uint8_t minRiseSuccesses{1};
 897   StopWatch sw;
 898   set<string> pools;
 899   enum class Availability { Up, Down, Auto} availability{Availability::Auto};
 900   bool mustResolve{false};
 901   bool upStatus{false};
 902   bool useECS{false};
 903   bool setCD{false};
 904   bool disableZeroScope{false};
 905   std::atomic<bool> connected{false};
 906   std::atomic_flag threadStarted;
 907   bool tcpFastOpen{false};
 908   bool ipBindAddrNoPort{true};
 909
 910   bool isUp() const
 911   {
 912     if(availability == Availability::Down)
 913       return false;
 914     if(availability == Availability::Up)
 915       return true;
 916     return upStatus;
 917   }
 918   void setUp() { availability = Availability::Up; }
 919   void setDown() { availability = Availability::Down; }
 920   void setAuto() { availability = Availability::Auto; }
 921   string getName() const {
 922     if (name.empty()) {
 923       return remote.toStringWithPort();
 924     }
 925     return name;
 926   }
 927   string getNameWithAddr() const {
 928     if (name.empty()) {
 929       return remote.toStringWithPort();
 930     }
 931     return name + " (" + remote.toStringWithPort()+ ")";
 932   }
 933   string getStatus() const
 934   {
 935     string status;
 936     if(availability == DownstreamState::Availability::Up)
 937       status = "UP";
 938     else if(availability == DownstreamState::Availability::Down)
 939       status = "DOWN";
 940     else
 941       status = (upStatus ? "up" : "down");
 942     return status;
 943   }
 944   bool reconnect();
 945   void hash();
 946   void setId(const boost::uuids::uuid& newId);
 947   void setWeight(int newWeight);
 948
 949   void updateTCPMetrics(size_t queries, uint64_t durationMs)
 950   {
 951     tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (queries / 100.0);
 952     tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
 953   }
 954 };
 955 using servers_t =vector<std::shared_ptr<DownstreamState>>;
 956
 957 template <class T> using NumberedVector = std::vector<std::pair<unsigned int, T> >;
 958
 959 void responderThread(std::shared_ptr<DownstreamState> state);
 960 extern std::mutex g_luamutex;
 961 extern LuaContext g_lua;
 962 extern std::string g_outputBuffer; // locking for this is ok, as locked by g_luamutex
 963
 964 class DNSRule
 965 {
 966 public:
 967   virtual ~DNSRule ()
 968   {
 969   }
 970   virtual bool matches(const DNSQuestion* dq) const =0;
 971   virtual string toString() const = 0;
 972   mutable std::atomic<uint64_t> d_matches{0};
 973 };
 974
 975 using NumberedServerVector = NumberedVector<shared_ptr<DownstreamState>>;
 976 typedef std::function<shared_ptr<DownstreamState>(const NumberedServerVector& servers, const DNSQuestion*)> policyfunc_t;
 977
 978 struct ServerPolicy
 979 {
 980   string name;
 981   policyfunc_t policy;
 982   bool isLua;
 983   std::string toString() const {
 984     return string("ServerPolicy") + (isLua ? " (Lua)" : "") + " \"" + name + "\"";
 985   }
 986 };
 987
 988 struct ServerPool
 989 {
 990   ServerPool()
 991   {
 992     pthread_rwlock_init(&d_lock, nullptr);
 993   }
 994
 995   const std::shared_ptr<DNSDistPacketCache> getCache() const { return packetCache; };
 996
 997   bool getECS() const
 998   {
 999     return d_useECS;
1000   }
1001
1002   void setECS(bool useECS)
1003   {
1004     d_useECS = useECS;
1005   }
1006
1007   std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
1008   std::shared_ptr<ServerPolicy> policy{nullptr};
1009
1010   size_t countServers(bool upOnly)
1011   {
1012     size_t count = 0;
1013     ReadLock rl(&d_lock);
1014     for (const auto& server : d_servers) {
1015       if (!upOnly || std::get<1>(server)->isUp() ) {
1016         count++;
1017       }
1018     }
1019     return count;
1020   }
1021
1022   NumberedVector<shared_ptr<DownstreamState>> getServers()
1023   {
1024     NumberedVector<shared_ptr<DownstreamState>> result;
1025     {
1026       ReadLock rl(&d_lock);
1027       result = d_servers;
1028     }
1029     return result;
1030   }
1031
1032   void addServer(shared_ptr<DownstreamState>& server)
1033   {
1034     WriteLock wl(&d_lock);
1035     unsigned int count = (unsigned int) d_servers.size();
1036     d_servers.push_back(make_pair(++count, server));
1037     /* we need to reorder based on the server 'order' */
1038     std::stable_sort(d_servers.begin(), d_servers.end(), [](const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& a, const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& b) {
1039       return a.second->order < b.second->order;
1040     });
1041     /* and now we need to renumber for Lua (custom policies) */
1042     size_t idx = 1;
1043     for (auto& serv : d_servers) {
1044       serv.first = idx++;
1045     }
1046   }
1047
1048   void removeServer(shared_ptr<DownstreamState>& server)
1049   {
1050     WriteLock wl(&d_lock);
1051     size_t idx = 1;
1052     bool found = false;
1053     for (auto it = d_servers.begin(); it != d_servers.end();) {
1054       if (found) {
1055         /* we need to renumber the servers placed
1056            after the removed one, for Lua (custom policies) */
1057         it->first = idx++;
1058         it++;
1059       }
1060       else if (it->second == server) {
1061         it = d_servers.erase(it);
1062         found = true;
1063       } else {
1064         idx++;
1065         it++;
1066       }
1067     }
1068   }
1069
1070 private:
1071   NumberedVector<shared_ptr<DownstreamState>> d_servers;
1072   pthread_rwlock_t d_lock;
1073   bool d_useECS{false};
1074 };
1075 using pools_t=map<std::string,std::shared_ptr<ServerPool>>;
1076 void setPoolPolicy(pools_t& pools, const string& poolName, std::shared_ptr<ServerPolicy> policy);
1077 void addServerToPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server);
1078 void removeServerFromPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server);
1079
1080 struct CarbonConfig
1081 {
1082   ComboAddress server;
1083   std::string namespace_name;
1084   std::string ourname;
1085   std::string instance_name;
1086   unsigned int interval;
1087 };
1088
1089 enum ednsHeaderFlags {
1090   EDNS_HEADER_FLAG_NONE = 0,
1091   EDNS_HEADER_FLAG_DO = 32768
1092 };
1093
1094 struct DNSDistRuleAction
1095 {
1096   std::shared_ptr<DNSRule> d_rule;
1097   std::shared_ptr<DNSAction> d_action;
1098   boost::uuids::uuid d_id;
1099   uint64_t d_creationOrder;
1100 };
1101
1102 struct DNSDistResponseRuleAction
1103 {
1104   std::shared_ptr<DNSRule> d_rule;
1105   std::shared_ptr<DNSResponseAction> d_action;
1106   boost::uuids::uuid d_id;
1107   uint64_t d_creationOrder;
1108 };
1109
1110 extern GlobalStateHolder<SuffixMatchTree<DynBlock>> g_dynblockSMT;
1111 extern DNSAction::Action g_dynBlockAction;
1112
1113 extern GlobalStateHolder<vector<CarbonConfig> > g_carbon;
1114 extern GlobalStateHolder<ServerPolicy> g_policy;
1115 extern GlobalStateHolder<servers_t> g_dstates;
1116 extern GlobalStateHolder<pools_t> g_pools;
1117 extern GlobalStateHolder<vector<DNSDistRuleAction> > g_rulactions;
1118 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_resprulactions;
1119 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_cachehitresprulactions;
1120 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_selfansweredresprulactions;
1121 extern GlobalStateHolder<NetmaskGroup> g_ACL;
1122
1123 extern ComboAddress g_serverControl; // not changed during runtime
1124
1125 extern std::vector<std::tuple<ComboAddress, bool, bool, int, std::string, std::set<int>>> g_locals; // not changed at runtime (we hope XXX)
1126 extern std::vector<shared_ptr<TLSFrontend>> g_tlslocals;
1127 extern std::vector<shared_ptr<DOHFrontend>> g_dohlocals;
1128 extern std::vector<std::unique_ptr<ClientState>> g_frontends;
1129 extern bool g_truncateTC;
1130 extern bool g_fixupCase;
1131 extern int g_tcpRecvTimeout;
1132 extern int g_tcpSendTimeout;
1133 extern int g_udpTimeout;
1134 extern uint16_t g_maxOutstanding;
1135 extern std::atomic<bool> g_configurationDone;
1136 extern uint64_t g_maxTCPClientThreads;
1137 extern uint64_t g_maxTCPQueuedConnections;
1138 extern size_t g_maxTCPQueriesPerConn;
1139 extern size_t g_maxTCPConnectionDuration;
1140 extern size_t g_maxTCPConnectionsPerClient;
1141 extern std::atomic<uint16_t> g_cacheCleaningDelay;
1142 extern std::atomic<uint16_t> g_cacheCleaningPercentage;
1143 extern bool g_verboseHealthChecks;
1144 extern uint32_t g_staleCacheEntriesTTL;
1145 extern bool g_apiReadWrite;
1146 extern std::string g_apiConfigDirectory;
1147 extern bool g_servFailOnNoPolicy;
1148 extern uint32_t g_hashperturb;
1149 extern bool g_useTCPSinglePipe;
1150 extern uint16_t g_downstreamTCPCleanupInterval;
1151 extern size_t g_udpVectorSize;
1152 extern bool g_preserveTrailingData;
1153 extern bool g_allowEmptyResponse;
1154 extern bool g_roundrobinFailOnNoServer;
1155
1156 #ifdef HAVE_EBPF
1157 extern shared_ptr<BPFFilter> g_defaultBPFFilter;
1158 extern std::vector<std::shared_ptr<DynBPFFilter> > g_dynBPFFilters;
1159 #endif /* HAVE_EBPF */
1160
1161 struct LocalHolders
1162 {
1163   LocalHolders(): acl(g_ACL.getLocal()), policy(g_policy.getLocal()), rulactions(g_rulactions.getLocal()), cacheHitRespRulactions(g_cachehitresprulactions.getLocal()), selfAnsweredRespRulactions(g_selfansweredresprulactions.getLocal()), servers(g_dstates.getLocal()), dynNMGBlock(g_dynblockNMG.getLocal()), dynSMTBlock(g_dynblockSMT.getLocal()), pools(g_pools.getLocal())
1164   {
1165   }
1166
1167   LocalStateHolder<NetmaskGroup> acl;
1168   LocalStateHolder<ServerPolicy> policy;
1169   LocalStateHolder<vector<DNSDistRuleAction> > rulactions;
1170   LocalStateHolder<vector<DNSDistResponseRuleAction> > cacheHitRespRulactions;
1171   LocalStateHolder<vector<DNSDistResponseRuleAction> > selfAnsweredRespRulactions;
1172   LocalStateHolder<servers_t> servers;
1173   LocalStateHolder<NetmaskTree<DynBlock> > dynNMGBlock;
1174   LocalStateHolder<SuffixMatchTree<DynBlock> > dynSMTBlock;
1175   LocalStateHolder<pools_t> pools;
1176 };
1177
1178 struct dnsheader;
1179
1180 void controlThread(int fd, ComboAddress local);
1181 vector<std::function<void(void)>> setupLua(bool client, const std::string& config);
1182 std::shared_ptr<ServerPool> getPool(const pools_t& pools, const std::string& poolName);
1183 std::shared_ptr<ServerPool> createPoolIfNotExists(pools_t& pools, const string& poolName);
1184 NumberedServerVector getDownstreamCandidates(const pools_t& pools, const std::string& poolName);
1185
1186 std::shared_ptr<DownstreamState> firstAvailable(const NumberedServerVector& servers, const DNSQuestion* dq);
1187
1188 std::shared_ptr<DownstreamState> leastOutstanding(const NumberedServerVector& servers, const DNSQuestion* dq);
1189 std::shared_ptr<DownstreamState> wrandom(const NumberedServerVector& servers, const DNSQuestion* dq);
1190 std::shared_ptr<DownstreamState> whashed(const NumberedServerVector& servers, const DNSQuestion* dq);
1191 std::shared_ptr<DownstreamState> chashed(const NumberedServerVector& servers, const DNSQuestion* dq);
1192 std::shared_ptr<DownstreamState> roundrobin(const NumberedServerVector& servers, const DNSQuestion* dq);
1193
1194 struct WebserverConfig
1195 {
1196   std::string password;
1197   std::string apiKey;
1198   boost::optional<std::map<std::string, std::string> > customHeaders;
1199   std::mutex lock;
1200 };
1201
1202 void setWebserverAPIKey(const boost::optional<std::string> apiKey);
1203 void setWebserverPassword(const std::string& password);
1204 void setWebserverCustomHeaders(const boost::optional<std::map<std::string, std::string> > customHeaders);
1205
1206 void dnsdistWebserverThread(int sock, const ComboAddress& local);
1207 void tcpAcceptorThread(void* p);
1208 #ifdef HAVE_DNS_OVER_HTTPS
1209 void dohThread(ClientState* cs);
1210 #endif /* HAVE_DNS_OVER_HTTPS */
1211
1212 void setLuaNoSideEffect(); // if nothing has been declared, set that there are no side effects
1213 void setLuaSideEffect();   // set to report a side effect, cancelling all _no_ side effect calls
1214 bool getLuaNoSideEffect(); // set if there were only explicit declarations of _no_ side effect
1215 void resetLuaSideEffect(); // reset to indeterminate state
1216
1217 bool responseContentMatches(const char* response, const uint16_t responseLen, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const ComboAddress& remote, unsigned int& consumed);
1218 bool processResponse(char** response, uint16_t* responseLen, size_t* responseSize, LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr, size_t addRoom, std::vector<uint8_t>& rewrittenResponse, bool muted);
1219 bool processRulesResult(const DNSAction::Action& action, DNSQuestion& dq, std::string& ruleresult, bool& drop);
1220
1221 bool checkQueryHeaders(const struct dnsheader* dh);
1222
1223 extern std::vector<std::shared_ptr<DNSCryptContext>> g_dnsCryptLocals;
1224 int handleDNSCryptQuery(char* packet, uint16_t len, std::shared_ptr<DNSCryptQuery> query, uint16_t* decryptedQueryLen, bool tcp, time_t now, std::vector<uint8_t>& response);
1225 boost::optional<std::vector<uint8_t>> checkDNSCryptQuery(const ClientState& cs, const char* query, uint16_t& len, std::shared_ptr<DNSCryptQuery>& dnsCryptQuery, time_t now, bool tcp);
1226
1227 bool addXPF(DNSQuestion& dq, uint16_t optionCode);
1228
1229 uint16_t getRandomDNSID();
1230
1231 #include "dnsdist-snmp.hh"
1232
1233 extern bool g_snmpEnabled;
1234 extern bool g_snmpTrapsEnabled;
1235 extern DNSDistSNMPAgent* g_snmpAgent;
1236 extern bool g_addEDNSToSelfGeneratedResponses;
1237
1238 static const size_t s_udpIncomingBufferSize{1500};
1239
1240 enum class ProcessQueryResult { Drop, SendAnswer, PassToBackend };
1241 ProcessQueryResult processQuery(DNSQuestion& dq, ClientState& cs, LocalHolders& holders, std::shared_ptr<DownstreamState>& selectedBackend);
1242
1243 DNSResponse makeDNSResponseFromIDState(IDState& ids, struct dnsheader* dh, size_t bufferSize, uint16_t responseLen, bool isTCP);
1244 void setIDStateFromDNSQuestion(IDState& ids, DNSQuestion& dq, DNSName&& qname);
1245
1246 int pickBackendSocketForSending(std::shared_ptr<DownstreamState>& state);
1247 ssize_t udpClientSendRequestToBackend(const std::shared_ptr<DownstreamState>& ss, const int sd, const char* request, const size_t requestLen, bool healthCheck=false);