pdns/dnsdist.hh

   1 /*
   2  * This file is part of PowerDNS or dnsdist.
   3  * Copyright -- PowerDNS.COM B.V. and its contributors
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of version 2 of the GNU General Public License as
   7  * published by the Free Software Foundation.
   8  *
   9  * In addition, for the avoidance of any doubt, permission is granted to
  10  * link this program with OpenSSL and to (re)distribute the binaries
  11  * produced as the result of such linking.
  12  *
  13  * This program is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  21  */
  22 #pragma once
  23 #include "config.h"
  24 #include "ext/luawrapper/include/LuaContext.hpp"
  25
  26 #include <atomic>
  27 #include <mutex>
  28 #include <string>
  29 #include <thread>
  30 #include <time.h>
  31 #include <unistd.h>
  32 #include <unordered_map>
  33
  34 #include <boost/variant.hpp>
  35
  36 #include "bpf-filter.hh"
  37 #include "capabilities.hh"
  38 #include "circular_buffer.hh"
  39 #include "dnscrypt.hh"
  40 #include "dnsdist-cache.hh"
  41 #include "dnsdist-dynbpf.hh"
  42 #include "dnsname.hh"
  43 #include "doh.hh"
  44 #include "ednsoptions.hh"
  45 #include "gettime.hh"
  46 #include "iputils.hh"
  47 #include "misc.hh"
  48 #include "mplexer.hh"
  49 #include "sholder.hh"
  50 #include "tcpiohandler.hh"
  51 #include "uuid-utils.hh"
  52
  53 void carbonDumpThread();
  54 uint64_t uptimeOfProcess(const std::string& str);
  55
  56 extern uint16_t g_ECSSourcePrefixV4;
  57 extern uint16_t g_ECSSourcePrefixV6;
  58 extern bool g_ECSOverride;
  59
  60 typedef std::unordered_map<string, string> QTag;
  61
  62 struct DNSQuestion
  63 {
  64   DNSQuestion(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t queryLen, bool isTcp, const struct timespec* queryTime_):
  65     qname(name), local(lc), remote(rem), dh(header), queryTime(queryTime_), size(bufferSize), consumed(consumed_), tempFailureTTL(boost::none), qtype(type), qclass(class_), len(queryLen), ecsPrefixLength(rem->sin4.sin_family == AF_INET ? g_ECSSourcePrefixV4 : g_ECSSourcePrefixV6), tcp(isTcp), ecsOverride(g_ECSOverride) {
  66     const uint16_t* flags = getFlagsFromDNSHeader(dh);
  67     origFlags = *flags;
  68   }
  69   DNSQuestion(const DNSQuestion&) = delete;
  70   DNSQuestion& operator=(const DNSQuestion&) = delete;
  71   DNSQuestion(DNSQuestion&&) = default;
  72
  73 #ifdef HAVE_PROTOBUF
  74   boost::optional<boost::uuids::uuid> uniqueId;
  75 #endif
  76   Netmask ecs;
  77   boost::optional<Netmask> subnet;
  78   std::string sni; /* Server Name Indication, if any (DoT or DoH) */
  79   std::string poolname;
  80   const DNSName* qname{nullptr};
  81   const ComboAddress* local{nullptr};
  82   const ComboAddress* remote{nullptr};
  83   std::shared_ptr<QTag> qTag{nullptr};
  84   std::shared_ptr<std::map<uint16_t, EDNSOptionView> > ednsOptions;
  85   std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
  86   std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
  87   struct dnsheader* dh{nullptr};
  88   const struct timespec* queryTime{nullptr};
  89   struct DOHUnit* du{nullptr};
  90   size_t size;
  91   unsigned int consumed{0};
  92   int delayMsec{0};
  93   boost::optional<uint32_t> tempFailureTTL;
  94   uint32_t cacheKeyNoECS;
  95   uint32_t cacheKey;
  96   const uint16_t qtype;
  97   const uint16_t qclass;
  98   uint16_t len;
  99   uint16_t ecsPrefixLength;
 100   uint16_t origFlags;
 101   uint8_t ednsRCode{0};
 102   const bool tcp;
 103   bool skipCache{false};
 104   bool ecsOverride;
 105   bool useECS{true};
 106   bool addXPF{true};
 107   bool ecsSet{false};
 108   bool ecsAdded{false};
 109   bool ednsAdded{false};
 110   bool useZeroScope{false};
 111   bool dnssecOK{false};
 112 };
 113
 114 struct DNSResponse : DNSQuestion
 115 {
 116   DNSResponse(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t responseLen, bool isTcp, const struct timespec* queryTime_):
 117     DNSQuestion(name, type, class_, consumed, lc, rem, header, bufferSize, responseLen, isTcp, queryTime_) { }
 118   DNSResponse(const DNSResponse&) = delete;
 119   DNSResponse& operator=(const DNSResponse&) = delete;
 120   DNSResponse(DNSResponse&&) = default;
 121 };
 122
 123 /* so what could you do:
 124    drop,
 125    fake up nxdomain,
 126    provide actual answer,
 127    allow & and stop processing,
 128    continue processing,
 129    modify header:    (servfail|refused|notimp), set TC=1,
 130    send to pool */
 131
 132 class DNSAction
 133 {
 134 public:
 135   enum class Action { Drop, Nxdomain, Refused, Spoof, Allow, HeaderModify, Pool, Delay, Truncate, ServFail, None, NoOp, NoRecurse };
 136   static std::string typeToString(const Action& action)
 137   {
 138     switch(action) {
 139     case Action::Drop:
 140       return "Drop";
 141     case Action::Nxdomain:
 142       return "Send NXDomain";
 143     case Action::Refused:
 144       return "Send Refused";
 145     case Action::Spoof:
 146       return "Spoof an answer";
 147     case Action::Allow:
 148       return "Allow";
 149     case Action::HeaderModify:
 150       return "Modify the header";
 151     case Action::Pool:
 152       return "Route to a pool";
 153     case Action::Delay:
 154       return "Delay";
 155     case Action::Truncate:
 156       return "Truncate over UDP";
 157     case Action::ServFail:
 158       return "Send ServFail";
 159     case Action::None:
 160     case Action::NoOp:
 161       return "Do nothing";
 162     case Action::NoRecurse:
 163       return "Set rd=0";
 164     }
 165
 166     return "Unknown";
 167   }
 168
 169   virtual Action operator()(DNSQuestion*, string* ruleresult) const =0;
 170   virtual ~DNSAction()
 171   {
 172   }
 173   virtual string toString() const = 0;
 174   virtual std::map<string, double> getStats() const
 175   {
 176     return {{}};
 177   }
 178 };
 179
 180 class DNSResponseAction
 181 {
 182 public:
 183   enum class Action { Allow, Delay, Drop, HeaderModify, ServFail, None };
 184   virtual Action operator()(DNSResponse*, string* ruleresult) const =0;
 185   virtual ~DNSResponseAction()
 186   {
 187   }
 188   virtual string toString() const = 0;
 189 };
 190
 191 struct DynBlock
 192 {
 193   DynBlock(): action(DNSAction::Action::None), warning(false)
 194   {
 195   }
 196
 197   DynBlock(const std::string& reason_, const struct timespec& until_, const DNSName& domain_, DNSAction::Action action_): reason(reason_), until(until_), domain(domain_), action(action_), warning(false)
 198   {
 199   }
 200
 201   DynBlock(const DynBlock& rhs): reason(rhs.reason), until(rhs.until), domain(rhs.domain), action(rhs.action), warning(rhs.warning)
 202   {
 203     blocks.store(rhs.blocks);
 204   }
 205
 206   DynBlock& operator=(const DynBlock& rhs)
 207   {
 208     reason=rhs.reason;
 209     until=rhs.until;
 210     domain=rhs.domain;
 211     action=rhs.action;
 212     blocks.store(rhs.blocks);
 213     warning=rhs.warning;
 214     return *this;
 215   }
 216
 217   string reason;
 218   struct timespec until;
 219   DNSName domain;
 220   DNSAction::Action action;
 221   mutable std::atomic<unsigned int> blocks;
 222   bool warning;
 223 };
 224
 225 extern GlobalStateHolder<NetmaskTree<DynBlock>> g_dynblockNMG;
 226
 227 extern vector<pair<struct timeval, std::string> > g_confDelta;
 228
 229 extern uint64_t getLatencyCount(const std::string&);
 230
 231 struct DNSDistStats
 232 {
 233   using stat_t=std::atomic<uint64_t>; // aww yiss ;-)
 234   stat_t responses{0};
 235   stat_t servfailResponses{0};
 236   stat_t queries{0};
 237   stat_t frontendNXDomain{0};
 238   stat_t frontendServFail{0};
 239   stat_t frontendNoError{0};
 240   stat_t nonCompliantQueries{0};
 241   stat_t nonCompliantResponses{0};
 242   stat_t rdQueries{0};
 243   stat_t emptyQueries{0};
 244   stat_t aclDrops{0};
 245   stat_t dynBlocked{0};
 246   stat_t ruleDrop{0};
 247   stat_t ruleNXDomain{0};
 248   stat_t ruleRefused{0};
 249   stat_t ruleServFail{0};
 250   stat_t selfAnswered{0};
 251   stat_t downstreamTimeouts{0};
 252   stat_t downstreamSendErrors{0};
 253   stat_t truncFail{0};
 254   stat_t noPolicy{0};
 255   stat_t cacheHits{0};
 256   stat_t cacheMisses{0};
 257   stat_t latency0_1{0}, latency1_10{0}, latency10_50{0}, latency50_100{0}, latency100_1000{0}, latencySlow{0}, latencySum{0};
 258   stat_t securityStatus{0};
 259
 260   double latencyAvg100{0}, latencyAvg1000{0}, latencyAvg10000{0}, latencyAvg1000000{0};
 261   typedef std::function<uint64_t(const std::string&)> statfunction_t;
 262   typedef boost::variant<stat_t*, double*, statfunction_t> entry_t;
 263   std::vector<std::pair<std::string, entry_t>> entries{
 264     {"responses", &responses},
 265     {"servfail-responses", &servfailResponses},
 266     {"queries", &queries},
 267     {"frontend-nxdomain", &frontendNXDomain},
 268     {"frontend-servfail", &frontendServFail},
 269     {"frontend-noerror", &frontendNoError},
 270     {"acl-drops", &aclDrops},
 271     {"rule-drop", &ruleDrop},
 272     {"rule-nxdomain", &ruleNXDomain},
 273     {"rule-refused", &ruleRefused},
 274     {"rule-servfail", &ruleServFail},
 275     {"self-answered", &selfAnswered},
 276     {"downstream-timeouts", &downstreamTimeouts},
 277     {"downstream-send-errors", &downstreamSendErrors},
 278     {"trunc-failures", &truncFail},
 279     {"no-policy", &noPolicy},
 280     {"latency0-1", &latency0_1},
 281     {"latency1-10", &latency1_10},
 282     {"latency10-50", &latency10_50},
 283     {"latency50-100", &latency50_100},
 284     {"latency100-1000", &latency100_1000},
 285     {"latency-slow", &latencySlow},
 286     {"latency-avg100", &latencyAvg100},
 287     {"latency-avg1000", &latencyAvg1000},
 288     {"latency-avg10000", &latencyAvg10000},
 289     {"latency-avg1000000", &latencyAvg1000000},
 290     {"uptime", uptimeOfProcess},
 291     {"real-memory-usage", getRealMemoryUsage},
 292     {"special-memory-usage", getSpecialMemoryUsage},
 293     {"noncompliant-queries", &nonCompliantQueries},
 294     {"noncompliant-responses", &nonCompliantResponses},
 295     {"rdqueries", &rdQueries},
 296     {"empty-queries", &emptyQueries},
 297     {"cache-hits", &cacheHits},
 298     {"cache-misses", &cacheMisses},
 299     {"cpu-user-msec", getCPUTimeUser},
 300     {"cpu-sys-msec", getCPUTimeSystem},
 301     {"fd-usage", getOpenFileDescriptors},
 302     {"dyn-blocked", &dynBlocked},
 303     {"dyn-block-nmg-size", [](const std::string&) { return g_dynblockNMG.getLocal()->size(); }},
 304     {"security-status", &securityStatus},
 305     // Latency histogram
 306     {"latency-sum", &latencySum},
 307     {"latency-count", getLatencyCount},
 308   };
 309 };
 310
 311 // Metric types for Prometheus
 312 enum class PrometheusMetricType: int {
 313     counter = 1,
 314     gauge = 2
 315 };
 316
 317 // Keeps additional information about metrics
 318 struct MetricDefinition {
 319   MetricDefinition(PrometheusMetricType _prometheusType, const std::string& _description): description(_description), prometheusType(_prometheusType) {
 320   }
 321
 322   MetricDefinition() = default;
 323
 324   // Metric description
 325   std::string description;
 326   // Metric type for Prometheus
 327   PrometheusMetricType prometheusType;
 328 };
 329
 330 struct MetricDefinitionStorage {
 331   // Return metric definition by name
 332   bool getMetricDetails(std::string metricName, MetricDefinition& metric) {
 333   auto metricDetailsIter = metrics.find(metricName);
 334
 335   if (metricDetailsIter == metrics.end()) {
 336     return false;
 337   }
 338
 339   metric = metricDetailsIter->second;
 340     return true;
 341   };
 342
 343   // Return string representation of Prometheus metric type
 344   std::string getPrometheusStringMetricType(PrometheusMetricType metricType) {
 345     switch (metricType) {
 346       case PrometheusMetricType::counter:
 347         return "counter";
 348         break;
 349       case PrometheusMetricType::gauge:
 350         return "gauge";
 351         break;
 352       default:
 353         return "";
 354         break;
 355     }
 356   };
 357
 358   std::map<std::string, MetricDefinition> metrics = {
 359     { "responses",              MetricDefinition(PrometheusMetricType::counter, "Number of responses received from backends") },
 360     { "servfail-responses",     MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received from backends") },
 361     { "queries",                MetricDefinition(PrometheusMetricType::counter, "Number of received queries")},
 362     { "frontend-nxdomain",      MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers sent to clients")},
 363     { "frontend-servfail",      MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers sent to clients")},
 364     { "frontend-noerror",       MetricDefinition(PrometheusMetricType::counter, "Number of NoError answers sent to clients")},
 365     { "acl-drops",              MetricDefinition(PrometheusMetricType::counter, "Number of packets dropped because of the ACL")},
 366     { "rule-drop",              MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a rule")},
 367     { "rule-nxdomain",          MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers returned because of a rule")},
 368     { "rule-refused",           MetricDefinition(PrometheusMetricType::counter, "Number of Refused answers returned because of a rule")},
 369     { "rule-servfail",          MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received because of a rule")},
 370     { "self-answered",          MetricDefinition(PrometheusMetricType::counter, "Number of self-answered responses")},
 371     { "downstream-timeouts",    MetricDefinition(PrometheusMetricType::counter, "Number of queries not answered in time by a backend")},
 372     { "downstream-send-errors", MetricDefinition(PrometheusMetricType::counter, "Number of errors when sending a query to a backend")},
 373     { "trunc-failures",         MetricDefinition(PrometheusMetricType::counter, "Number of errors encountered while truncating an answer")},
 374     { "no-policy",              MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because no server was available")},
 375     { "latency0-1",             MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in less than 1ms")},
 376     { "latency1-10",            MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 1-10 ms")},
 377     { "latency10-50",           MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 10-50 ms")},
 378     { "latency50-100",          MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 50-100 ms")},
 379     { "latency100-1000",        MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 100-1000 ms")},
 380     { "latency-slow",           MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in more than 1 second")},
 381     { "latency-avg100",         MetricDefinition(PrometheusMetricType::gauge,   "Average response latency in microseconds of the last 100 packets")},
 382     { "latency-avg1000",        MetricDefinition(PrometheusMetricType::gauge,   "Average response latency in microseconds of the last 1000 packets")},
 383     { "latency-avg10000",       MetricDefinition(PrometheusMetricType::gauge,   "Average response latency in microseconds of the last 10000 packets")},
 384     { "latency-avg1000000",     MetricDefinition(PrometheusMetricType::gauge,   "Average response latency in microseconds of the last 1000000 packets")},
 385     { "uptime",                 MetricDefinition(PrometheusMetricType::gauge,   "Uptime of the dnsdist process in seconds")},
 386     { "real-memory-usage",      MetricDefinition(PrometheusMetricType::gauge,   "Current memory usage in bytes")},
 387     { "noncompliant-queries",   MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped as non-compliant")},
 388     { "noncompliant-responses", MetricDefinition(PrometheusMetricType::counter, "Number of answers from a backend dropped as non-compliant")},
 389     { "rdqueries",              MetricDefinition(PrometheusMetricType::counter, "Number of received queries with the recursion desired bit set")},
 390     { "empty-queries",          MetricDefinition(PrometheusMetricType::counter, "Number of empty queries received from clients")},
 391     { "cache-hits",             MetricDefinition(PrometheusMetricType::counter, "Number of times an answer was retrieved from cache")},
 392     { "cache-misses",           MetricDefinition(PrometheusMetricType::counter, "Number of times an answer not found in the cache")},
 393     { "cpu-user-msec",          MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the user state")},
 394     { "cpu-sys-msec",           MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the system state")},
 395     { "fd-usage",               MetricDefinition(PrometheusMetricType::gauge,   "Number of currently used file descriptors")},
 396     { "dyn-blocked",            MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a dynamic block")},
 397     { "dyn-block-nmg-size",     MetricDefinition(PrometheusMetricType::gauge,   "Number of dynamic blocks entries") },
 398     { "security-status",        MetricDefinition(PrometheusMetricType::gauge,   "Security status of this software. 0=unknown, 1=OK, 2=upgrade recommended, 3=upgrade mandatory") },
 399     // Latency histogram
 400     { "latency-sum",            MetricDefinition(PrometheusMetricType::counter, "Total response time in milliseconds")},
 401     { "latency-count",          MetricDefinition(PrometheusMetricType::counter, "Number of queries contributing to response time histogram")},
 402   };
 403 };
 404
 405 extern MetricDefinitionStorage g_metricDefinitions;
 406 extern struct DNSDistStats g_stats;
 407 void doLatencyStats(double udiff);
 408
 409
 410 struct StopWatch
 411 {
 412   StopWatch(bool realTime=false): d_needRealTime(realTime)
 413   {
 414   }
 415   struct timespec d_start{0,0};
 416   bool d_needRealTime{false};
 417
 418   void start() {
 419     if(gettime(&d_start, d_needRealTime) < 0)
 420       unixDie("Getting timestamp");
 421
 422   }
 423
 424   void set(const struct timespec& from) {
 425     d_start = from;
 426   }
 427
 428   double udiff() const {
 429     struct timespec now;
 430     if(gettime(&now, d_needRealTime) < 0)
 431       unixDie("Getting timestamp");
 432
 433     return 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
 434   }
 435
 436   double udiffAndSet() {
 437     struct timespec now;
 438     if(gettime(&now, d_needRealTime) < 0)
 439       unixDie("Getting timestamp");
 440
 441     auto ret= 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
 442     d_start = now;
 443     return ret;
 444   }
 445
 446 };
 447
 448 class BasicQPSLimiter
 449 {
 450 public:
 451   BasicQPSLimiter()
 452   {
 453   }
 454
 455   BasicQPSLimiter(unsigned int burst): d_tokens(burst)
 456   {
 457     d_prev.start();
 458   }
 459
 460   bool check(unsigned int rate, unsigned int burst) const // this is not quite fair
 461   {
 462     auto delta = d_prev.udiffAndSet();
 463
 464     if(delta > 0.0) // time, frequently, does go backwards..
 465       d_tokens += 1.0 * rate * (delta/1000000.0);
 466
 467     if(d_tokens > burst) {
 468       d_tokens = burst;
 469     }
 470
 471     bool ret=false;
 472     if(d_tokens >= 1.0) { // we need this because burst=1 is weird otherwise
 473       ret=true;
 474       --d_tokens;
 475     }
 476
 477     return ret;
 478   }
 479
 480   bool seenSince(const struct timespec& cutOff) const
 481   {
 482     return cutOff < d_prev.d_start;
 483   }
 484
 485 protected:
 486   mutable StopWatch d_prev;
 487   mutable double d_tokens;
 488 };
 489
 490 class QPSLimiter : public BasicQPSLimiter
 491 {
 492 public:
 493   QPSLimiter(): BasicQPSLimiter()
 494   {
 495   }
 496
 497   QPSLimiter(unsigned int rate, unsigned int burst): BasicQPSLimiter(burst), d_rate(rate), d_burst(burst), d_passthrough(false)
 498   {
 499     d_prev.start();
 500   }
 501
 502   unsigned int getRate() const
 503   {
 504     return d_passthrough ? 0 : d_rate;
 505   }
 506
 507   int getPassed() const
 508   {
 509     return d_passed;
 510   }
 511
 512   int getBlocked() const
 513   {
 514     return d_blocked;
 515   }
 516
 517   bool check() const // this is not quite fair
 518   {
 519     if (d_passthrough) {
 520       return true;
 521     }
 522
 523     bool ret = BasicQPSLimiter::check(d_rate, d_burst);
 524     if (ret) {
 525       d_passed++;
 526     }
 527     else {
 528       d_blocked++;
 529     }
 530
 531     return ret;
 532   }
 533 private:
 534   mutable unsigned int d_passed{0};
 535   mutable unsigned int d_blocked{0};
 536   unsigned int d_rate;
 537   unsigned int d_burst;
 538   bool d_passthrough{true};
 539 };
 540
 541 struct ClientState;
 542
 543 struct IDState
 544 {
 545   IDState(): sentTime(true), delayMsec(0), tempFailureTTL(boost::none) { origDest.sin4.sin_family = 0;}
 546   IDState(const IDState& orig): origRemote(orig.origRemote), origDest(orig.origDest), age(orig.age)
 547   {
 548     usageIndicator.store(orig.usageIndicator.load());
 549     origFD = orig.origFD;
 550     origID = orig.origID;
 551     delayMsec = orig.delayMsec;
 552     tempFailureTTL = orig.tempFailureTTL;
 553   }
 554
 555   static const int64_t unusedIndicator = -1;
 556
 557   static bool isInUse(int64_t usageIndicator)
 558   {
 559     return usageIndicator != unusedIndicator;
 560   }
 561
 562   bool isInUse() const
 563   {
 564     return usageIndicator != unusedIndicator;
 565   }
 566
 567   /* return true if the value has been successfully replaced meaning that
 568      no-one updated the usage indicator in the meantime */
 569   bool tryMarkUnused(int64_t expectedUsageIndicator)
 570   {
 571     return usageIndicator.compare_exchange_strong(expectedUsageIndicator, unusedIndicator);
 572   }
 573
 574   /* mark as unused no matter what, return true if the state was in use before */
 575   bool markAsUsed()
 576   {
 577     auto currentGeneration = generation++;
 578     return markAsUsed(currentGeneration);
 579   }
 580
 581   /* mark as unused no matter what, return true if the state was in use before */
 582   bool markAsUsed(int64_t currentGeneration)
 583   {
 584     int64_t oldUsage = usageIndicator.exchange(currentGeneration);
 585     return oldUsage != unusedIndicator;
 586   }
 587
 588   /* We use this value to detect whether this state is in use.
 589      For performance reasons we don't want to use a lock here, but that means
 590      we need to be very careful when modifying this value. Modifications happen
 591      from:
 592      - one of the UDP or DoH 'client' threads receiving a query, selecting a backend
 593        then picking one of the states associated to this backend (via the idOffset).
 594        Most of the time this state should not be in use and usageIndicator is -1, but we
 595        might not yet have received a response for the query previously associated to this
 596        state, meaning that we will 'reuse' this state and erase the existing state.
 597        If we ever receive a response for this state, it will be discarded. This is
 598        mostly fine for UDP except that we still need to be careful in order to miss
 599        the 'outstanding' counters, which should only be increased when we are picking
 600        an empty state, and not when reusing ;
 601        For DoH, though, we have dynamically allocated a DOHUnit object that needs to
 602        be freed, as well as internal objects internals to libh2o.
 603      - one of the UDP receiver threads receiving a response from a backend, picking
 604        the corresponding state and sending the response to the client ;
 605      - the 'healthcheck' thread scanning the states to actively discover timeouts,
 606        mostly to keep some counters like the 'outstanding' one sane.
 607      We previously based that logic on the origFD (FD on which the query was received,
 608      and therefore from where the response should be sent) but this suffered from an
 609      ABA problem since it was quite likely that a UDP 'client thread' would reset it to the
 610      same value since we only have so much incoming sockets:
 611      - 1/ 'client' thread gets a query and set origFD to its FD, say 5 ;
 612      - 2/ 'receiver' thread gets a response, read the value of origFD to 5, check that the qname,
 613        qtype and qclass match
 614      - 3/ during that time the 'client' thread reuses the state, setting again origFD to 5 ;
 615      - 4/ the 'receiver' thread uses compare_exchange_strong() to only replace the value if it's still
 616        5, except it's not the same 5 anymore and it overrides a fresh state.
 617      We now use a 32-bit unsigned counter instead, which is incremented every time the state is set,
 618      wrapping around if necessary, and we set an atomic signed 64-bit value, so that we still have -1
 619      when the state is unused and the value of our counter otherwise.
 620   */
 621   std::atomic<int64_t> usageIndicator{unusedIndicator};  // set to unusedIndicator to indicate this state is empty   // 8
 622   std::atomic<uint32_t> generation{0}; // increased every time a state is used, to be able to detect an ABA issue    // 4
 623   ComboAddress origRemote;                                    // 28
 624   ComboAddress origDest;                                      // 28
 625   StopWatch sentTime;                                         // 16
 626   DNSName qname;                                              // 80
 627   std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
 628 #ifdef HAVE_PROTOBUF
 629   boost::optional<boost::uuids::uuid> uniqueId;
 630 #endif
 631   boost::optional<Netmask> subnet{boost::none};
 632   std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
 633   std::shared_ptr<QTag> qTag{nullptr};
 634   const ClientState* cs{nullptr};
 635   DOHUnit* du{nullptr};
 636   uint32_t cacheKey;                                          // 4
 637   uint32_t cacheKeyNoECS;                                     // 4
 638   uint16_t age;                                               // 4
 639   uint16_t qtype;                                             // 2
 640   uint16_t qclass;                                            // 2
 641   uint16_t origID;                                            // 2
 642   uint16_t origFlags;                                         // 2
 643   int origFD{-1};
 644   int delayMsec;
 645   boost::optional<uint32_t> tempFailureTTL;
 646   bool ednsAdded{false};
 647   bool ecsAdded{false};
 648   bool skipCache{false};
 649   bool destHarvested{false}; // if true, origDest holds the original dest addr, otherwise the listening addr
 650   bool dnssecOK{false};
 651   bool useZeroScope;
 652 };
 653
 654 typedef std::unordered_map<string, unsigned int> QueryCountRecords;
 655 typedef std::function<std::tuple<bool, string>(const DNSQuestion* dq)> QueryCountFilter;
 656 struct QueryCount {
 657   QueryCount()
 658   {
 659     pthread_rwlock_init(&queryLock, nullptr);
 660   }
 661   QueryCountRecords records;
 662   QueryCountFilter filter;
 663   pthread_rwlock_t queryLock;
 664   bool enabled{false};
 665 };
 666
 667 extern QueryCount g_qcount;
 668
 669 struct ClientState
 670 {
 671   ClientState(const ComboAddress& local_, bool isTCP, bool doReusePort, int fastOpenQueue, const std::string& itfName, const std::set<int>& cpus_): cpus(cpus_), local(local_), interface(itfName), fastOpenQueueSize(fastOpenQueue), tcp(isTCP), reuseport(doReusePort)
 672   {
 673   }
 674
 675   std::set<int> cpus;
 676   ComboAddress local;
 677   std::shared_ptr<DNSCryptContext> dnscryptCtx{nullptr};
 678   std::shared_ptr<TLSFrontend> tlsFrontend{nullptr};
 679   std::shared_ptr<DOHFrontend> dohFrontend{nullptr};
 680   std::string interface;
 681   std::atomic<uint64_t> queries{0};
 682   std::atomic<uint64_t> tcpDiedReadingQuery{0};
 683   std::atomic<uint64_t> tcpDiedSendingResponse{0};
 684   std::atomic<uint64_t> tcpGaveUp{0};
 685   std::atomic<uint64_t> tcpClientTimeouts{0};
 686   std::atomic<uint64_t> tcpDownstreamTimeouts{0};
 687   std::atomic<uint64_t> tcpCurrentConnections{0};
 688   std::atomic<double> tcpAvgQueriesPerConnection{0.0};
 689   /* in ms */
 690   std::atomic<double> tcpAvgConnectionDuration{0.0};
 691   int udpFD{-1};
 692   int tcpFD{-1};
 693   int fastOpenQueueSize{0};
 694   bool muted{false};
 695   bool tcp;
 696   bool reuseport;
 697   bool ready{false};
 698
 699   int getSocket() const
 700   {
 701     return udpFD != -1 ? udpFD : tcpFD;
 702   }
 703
 704   std::string getType() const
 705   {
 706     std::string result = udpFD != -1 ? "UDP" : "TCP";
 707
 708     if (dohFrontend) {
 709       result += " (DNS over HTTPS)";
 710     }
 711     else if (tlsFrontend) {
 712       result += " (DNS over TLS)";
 713     }
 714     else if (dnscryptCtx) {
 715       result += " (DNSCrypt)";
 716     }
 717
 718     return result;
 719   }
 720
 721 #ifdef HAVE_EBPF
 722   shared_ptr<BPFFilter> d_filter;
 723
 724   void detachFilter()
 725   {
 726     if (d_filter) {
 727       d_filter->removeSocket(getSocket());
 728       d_filter = nullptr;
 729     }
 730   }
 731
 732   void attachFilter(shared_ptr<BPFFilter> bpf)
 733   {
 734     detachFilter();
 735
 736     bpf->addSocket(getSocket());
 737     d_filter = bpf;
 738   }
 739 #endif /* HAVE_EBPF */
 740
 741   void updateTCPMetrics(size_t queries, uint64_t durationMs)
 742   {
 743     tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (queries / 100.0);
 744     tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
 745   }
 746 };
 747
 748 class TCPClientCollection {
 749   std::vector<int> d_tcpclientthreads;
 750   std::atomic<uint64_t> d_numthreads{0};
 751   std::atomic<uint64_t> d_pos{0};
 752   std::atomic<uint64_t> d_queued{0};
 753   const uint64_t d_maxthreads{0};
 754   std::mutex d_mutex;
 755   int d_singlePipe[2];
 756   const bool d_useSinglePipe;
 757 public:
 758
 759   TCPClientCollection(size_t maxThreads, bool useSinglePipe=false): d_maxthreads(maxThreads), d_singlePipe{-1,-1}, d_useSinglePipe(useSinglePipe)
 760
 761   {
 762     d_tcpclientthreads.reserve(maxThreads);
 763
 764     if (d_useSinglePipe) {
 765       if (pipe(d_singlePipe) < 0) {
 766         int err = errno;
 767         throw std::runtime_error("Error creating the TCP single communication pipe: " + stringerror(err));
 768       }
 769
 770       if (!setNonBlocking(d_singlePipe[0])) {
 771         int err = errno;
 772         close(d_singlePipe[0]);
 773         close(d_singlePipe[1]);
 774         throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err));
 775       }
 776
 777       if (!setNonBlocking(d_singlePipe[1])) {
 778         int err = errno;
 779         close(d_singlePipe[0]);
 780         close(d_singlePipe[1]);
 781         throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err));
 782       }
 783     }
 784   }
 785   int getThread()
 786   {
 787     uint64_t pos = d_pos++;
 788     ++d_queued;
 789     return d_tcpclientthreads[pos % d_numthreads];
 790   }
 791   bool hasReachedMaxThreads() const
 792   {
 793     return d_numthreads >= d_maxthreads;
 794   }
 795   uint64_t getThreadsCount() const
 796   {
 797     return d_numthreads;
 798   }
 799   uint64_t getQueuedCount() const
 800   {
 801     return d_queued;
 802   }
 803   void decrementQueuedCount()
 804   {
 805     --d_queued;
 806   }
 807   void addTCPClientThread();
 808 };
 809
 810 extern std::unique_ptr<TCPClientCollection> g_tcpclientthreads;
 811
 812 struct DownstreamState
 813 {
 814    typedef std::function<std::tuple<DNSName, uint16_t, uint16_t>(const DNSName&, uint16_t, uint16_t, dnsheader*)> checkfunc_t;
 815
 816   DownstreamState(const ComboAddress& remote_, const ComboAddress& sourceAddr_, unsigned int sourceItf, size_t numberOfSockets);
 817   DownstreamState(const ComboAddress& remote_): DownstreamState(remote_, ComboAddress(), 0, 1) {}
 818   ~DownstreamState()
 819   {
 820     for (auto& fd : sockets) {
 821       if (fd >= 0) {
 822         close(fd);
 823         fd = -1;
 824       }
 825     }
 826   }
 827   boost::uuids::uuid id;
 828   std::set<unsigned int> hashes;
 829   mutable pthread_rwlock_t d_lock;
 830   std::vector<int> sockets;
 831   std::mutex socketsLock;
 832   std::mutex connectLock;
 833   std::unique_ptr<FDMultiplexer> mplexer{nullptr};
 834   std::thread tid;
 835   const ComboAddress remote;
 836   QPSLimiter qps;
 837   vector<IDState> idStates;
 838   const ComboAddress sourceAddr;
 839   checkfunc_t checkFunction;
 840   DNSName checkName{"a.root-servers.net."};
 841   QType checkType{QType::A};
 842   uint16_t checkClass{QClass::IN};
 843   std::atomic<uint64_t> idOffset{0};
 844   std::atomic<uint64_t> sendErrors{0};
 845   std::atomic<uint64_t> outstanding{0};
 846   std::atomic<uint64_t> reuseds{0};
 847   std::atomic<uint64_t> queries{0};
 848   struct {
 849     std::atomic<uint64_t> sendErrors{0};
 850     std::atomic<uint64_t> reuseds{0};
 851     std::atomic<uint64_t> queries{0};
 852   } prev;
 853   std::atomic<uint64_t> tcpDiedSendingQuery{0};
 854   std::atomic<uint64_t> tcpDiedReadingResponse{0};
 855   std::atomic<uint64_t> tcpGaveUp{0};
 856   std::atomic<uint64_t> tcpReadTimeouts{0};
 857   std::atomic<uint64_t> tcpWriteTimeouts{0};
 858   std::atomic<uint64_t> tcpCurrentConnections{0};
 859   std::atomic<double> tcpAvgQueriesPerConnection{0.0};
 860   /* in ms */
 861   std::atomic<double> tcpAvgConnectionDuration{0.0};
 862   string name;
 863   size_t socketsOffset{0};
 864   double queryLoad{0.0};
 865   double dropRate{0.0};
 866   double latencyUsec{0.0};
 867   int order{1};
 868   int weight{1};
 869   int tcpConnectTimeout{5};
 870   int tcpRecvTimeout{30};
 871   int tcpSendTimeout{30};
 872   unsigned int checkInterval{1};
 873   unsigned int lastCheck{0};
 874   const unsigned int sourceItf{0};
 875   uint16_t retries{5};
 876   uint16_t xpfRRCode{0};
 877   uint16_t checkTimeout{1000}; /* in milliseconds */
 878   uint8_t currentCheckFailures{0};
 879   uint8_t consecutiveSuccessfulChecks{0};
 880   uint8_t maxCheckFailures{1};
 881   uint8_t minRiseSuccesses{1};
 882   StopWatch sw;
 883   set<string> pools;
 884   enum class Availability { Up, Down, Auto} availability{Availability::Auto};
 885   bool mustResolve{false};
 886   bool upStatus{false};
 887   bool useECS{false};
 888   bool setCD{false};
 889   bool disableZeroScope{false};
 890   std::atomic<bool> connected{false};
 891   std::atomic_flag threadStarted;
 892   bool tcpFastOpen{false};
 893   bool ipBindAddrNoPort{true};
 894
 895   bool isUp() const
 896   {
 897     if(availability == Availability::Down)
 898       return false;
 899     if(availability == Availability::Up)
 900       return true;
 901     return upStatus;
 902   }
 903   void setUp() { availability = Availability::Up; }
 904   void setDown() { availability = Availability::Down; }
 905   void setAuto() { availability = Availability::Auto; }
 906   string getName() const {
 907     if (name.empty()) {
 908       return remote.toStringWithPort();
 909     }
 910     return name;
 911   }
 912   string getNameWithAddr() const {
 913     if (name.empty()) {
 914       return remote.toStringWithPort();
 915     }
 916     return name + " (" + remote.toStringWithPort()+ ")";
 917   }
 918   string getStatus() const
 919   {
 920     string status;
 921     if(availability == DownstreamState::Availability::Up)
 922       status = "UP";
 923     else if(availability == DownstreamState::Availability::Down)
 924       status = "DOWN";
 925     else
 926       status = (upStatus ? "up" : "down");
 927     return status;
 928   }
 929   bool reconnect();
 930   void hash();
 931   void setId(const boost::uuids::uuid& newId);
 932   void setWeight(int newWeight);
 933
 934   void updateTCPMetrics(size_t queries, uint64_t durationMs)
 935   {
 936     tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (queries / 100.0);
 937     tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
 938   }
 939 };
 940 using servers_t =vector<std::shared_ptr<DownstreamState>>;
 941
 942 template <class T> using NumberedVector = std::vector<std::pair<unsigned int, T> >;
 943
 944 void responderThread(std::shared_ptr<DownstreamState> state);
 945 extern std::mutex g_luamutex;
 946 extern LuaContext g_lua;
 947 extern std::string g_outputBuffer; // locking for this is ok, as locked by g_luamutex
 948
 949 class DNSRule
 950 {
 951 public:
 952   virtual ~DNSRule ()
 953   {
 954   }
 955   virtual bool matches(const DNSQuestion* dq) const =0;
 956   virtual string toString() const = 0;
 957   mutable std::atomic<uint64_t> d_matches{0};
 958 };
 959
 960 using NumberedServerVector = NumberedVector<shared_ptr<DownstreamState>>;
 961 typedef std::function<shared_ptr<DownstreamState>(const NumberedServerVector& servers, const DNSQuestion*)> policyfunc_t;
 962
 963 struct ServerPolicy
 964 {
 965   string name;
 966   policyfunc_t policy;
 967   bool isLua;
 968   std::string toString() const {
 969     return string("ServerPolicy") + (isLua ? " (Lua)" : "") + " \"" + name + "\"";
 970   }
 971 };
 972
 973 struct ServerPool
 974 {
 975   ServerPool()
 976   {
 977     pthread_rwlock_init(&d_lock, nullptr);
 978   }
 979
 980   const std::shared_ptr<DNSDistPacketCache> getCache() const { return packetCache; };
 981
 982   bool getECS() const
 983   {
 984     return d_useECS;
 985   }
 986
 987   void setECS(bool useECS)
 988   {
 989     d_useECS = useECS;
 990   }
 991
 992   std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
 993   std::shared_ptr<ServerPolicy> policy{nullptr};
 994
 995   size_t countServers(bool upOnly)
 996   {
 997     size_t count = 0;
 998     ReadLock rl(&d_lock);
 999     for (const auto& server : d_servers) {
1000       if (!upOnly || std::get<1>(server)->isUp() ) {
1001         count++;
1002       }
1003     }
1004     return count;
1005   }
1006
1007   NumberedVector<shared_ptr<DownstreamState>> getServers()
1008   {
1009     NumberedVector<shared_ptr<DownstreamState>> result;
1010     {
1011       ReadLock rl(&d_lock);
1012       result = d_servers;
1013     }
1014     return result;
1015   }
1016
1017   void addServer(shared_ptr<DownstreamState>& server)
1018   {
1019     WriteLock wl(&d_lock);
1020     unsigned int count = (unsigned int) d_servers.size();
1021     d_servers.push_back(make_pair(++count, server));
1022     /* we need to reorder based on the server 'order' */
1023     std::stable_sort(d_servers.begin(), d_servers.end(), [](const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& a, const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& b) {
1024       return a.second->order < b.second->order;
1025     });
1026     /* and now we need to renumber for Lua (custom policies) */
1027     size_t idx = 1;
1028     for (auto& serv : d_servers) {
1029       serv.first = idx++;
1030     }
1031   }
1032
1033   void removeServer(shared_ptr<DownstreamState>& server)
1034   {
1035     WriteLock wl(&d_lock);
1036     size_t idx = 1;
1037     bool found = false;
1038     for (auto it = d_servers.begin(); it != d_servers.end();) {
1039       if (found) {
1040         /* we need to renumber the servers placed
1041            after the removed one, for Lua (custom policies) */
1042         it->first = idx++;
1043         it++;
1044       }
1045       else if (it->second == server) {
1046         it = d_servers.erase(it);
1047         found = true;
1048       } else {
1049         idx++;
1050         it++;
1051       }
1052     }
1053   }
1054
1055 private:
1056   NumberedVector<shared_ptr<DownstreamState>> d_servers;
1057   pthread_rwlock_t d_lock;
1058   bool d_useECS{false};
1059 };
1060 using pools_t=map<std::string,std::shared_ptr<ServerPool>>;
1061 void setPoolPolicy(pools_t& pools, const string& poolName, std::shared_ptr<ServerPolicy> policy);
1062 void addServerToPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server);
1063 void removeServerFromPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server);
1064
1065 struct CarbonConfig
1066 {
1067   ComboAddress server;
1068   std::string namespace_name;
1069   std::string ourname;
1070   std::string instance_name;
1071   unsigned int interval;
1072 };
1073
1074 enum ednsHeaderFlags {
1075   EDNS_HEADER_FLAG_NONE = 0,
1076   EDNS_HEADER_FLAG_DO = 32768
1077 };
1078
1079 struct DNSDistRuleAction
1080 {
1081   std::shared_ptr<DNSRule> d_rule;
1082   std::shared_ptr<DNSAction> d_action;
1083   boost::uuids::uuid d_id;
1084   uint64_t d_creationOrder;
1085 };
1086
1087 struct DNSDistResponseRuleAction
1088 {
1089   std::shared_ptr<DNSRule> d_rule;
1090   std::shared_ptr<DNSResponseAction> d_action;
1091   boost::uuids::uuid d_id;
1092   uint64_t d_creationOrder;
1093 };
1094
1095 extern GlobalStateHolder<SuffixMatchTree<DynBlock>> g_dynblockSMT;
1096 extern DNSAction::Action g_dynBlockAction;
1097
1098 extern GlobalStateHolder<vector<CarbonConfig> > g_carbon;
1099 extern GlobalStateHolder<ServerPolicy> g_policy;
1100 extern GlobalStateHolder<servers_t> g_dstates;
1101 extern GlobalStateHolder<pools_t> g_pools;
1102 extern GlobalStateHolder<vector<DNSDistRuleAction> > g_rulactions;
1103 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_resprulactions;
1104 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_cachehitresprulactions;
1105 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_selfansweredresprulactions;
1106 extern GlobalStateHolder<NetmaskGroup> g_ACL;
1107
1108 extern ComboAddress g_serverControl; // not changed during runtime
1109
1110 extern std::vector<std::tuple<ComboAddress, bool, bool, int, std::string, std::set<int>>> g_locals; // not changed at runtime (we hope XXX)
1111 extern std::vector<shared_ptr<TLSFrontend>> g_tlslocals;
1112 extern std::vector<shared_ptr<DOHFrontend>> g_dohlocals;
1113 extern std::vector<std::unique_ptr<ClientState>> g_frontends;
1114 extern bool g_truncateTC;
1115 extern bool g_fixupCase;
1116 extern int g_tcpRecvTimeout;
1117 extern int g_tcpSendTimeout;
1118 extern int g_udpTimeout;
1119 extern uint16_t g_maxOutstanding;
1120 extern std::atomic<bool> g_configurationDone;
1121 extern uint64_t g_maxTCPClientThreads;
1122 extern uint64_t g_maxTCPQueuedConnections;
1123 extern size_t g_maxTCPQueriesPerConn;
1124 extern size_t g_maxTCPConnectionDuration;
1125 extern size_t g_maxTCPConnectionsPerClient;
1126 extern std::atomic<uint16_t> g_cacheCleaningDelay;
1127 extern std::atomic<uint16_t> g_cacheCleaningPercentage;
1128 extern bool g_verboseHealthChecks;
1129 extern uint32_t g_staleCacheEntriesTTL;
1130 extern bool g_apiReadWrite;
1131 extern std::string g_apiConfigDirectory;
1132 extern bool g_servFailOnNoPolicy;
1133 extern uint32_t g_hashperturb;
1134 extern bool g_useTCPSinglePipe;
1135 extern uint16_t g_downstreamTCPCleanupInterval;
1136 extern size_t g_udpVectorSize;
1137 extern bool g_preserveTrailingData;
1138 extern bool g_allowEmptyResponse;
1139 extern bool g_roundrobinFailOnNoServer;
1140
1141 #ifdef HAVE_EBPF
1142 extern shared_ptr<BPFFilter> g_defaultBPFFilter;
1143 extern std::vector<std::shared_ptr<DynBPFFilter> > g_dynBPFFilters;
1144 #endif /* HAVE_EBPF */
1145
1146 struct LocalHolders
1147 {
1148   LocalHolders(): acl(g_ACL.getLocal()), policy(g_policy.getLocal()), rulactions(g_rulactions.getLocal()), cacheHitRespRulactions(g_cachehitresprulactions.getLocal()), selfAnsweredRespRulactions(g_selfansweredresprulactions.getLocal()), servers(g_dstates.getLocal()), dynNMGBlock(g_dynblockNMG.getLocal()), dynSMTBlock(g_dynblockSMT.getLocal()), pools(g_pools.getLocal())
1149   {
1150   }
1151
1152   LocalStateHolder<NetmaskGroup> acl;
1153   LocalStateHolder<ServerPolicy> policy;
1154   LocalStateHolder<vector<DNSDistRuleAction> > rulactions;
1155   LocalStateHolder<vector<DNSDistResponseRuleAction> > cacheHitRespRulactions;
1156   LocalStateHolder<vector<DNSDistResponseRuleAction> > selfAnsweredRespRulactions;
1157   LocalStateHolder<servers_t> servers;
1158   LocalStateHolder<NetmaskTree<DynBlock> > dynNMGBlock;
1159   LocalStateHolder<SuffixMatchTree<DynBlock> > dynSMTBlock;
1160   LocalStateHolder<pools_t> pools;
1161 };
1162
1163 struct dnsheader;
1164
1165 void controlThread(int fd, ComboAddress local);
1166 vector<std::function<void(void)>> setupLua(bool client, const std::string& config);
1167 std::shared_ptr<ServerPool> getPool(const pools_t& pools, const std::string& poolName);
1168 std::shared_ptr<ServerPool> createPoolIfNotExists(pools_t& pools, const string& poolName);
1169 NumberedServerVector getDownstreamCandidates(const pools_t& pools, const std::string& poolName);
1170
1171 std::shared_ptr<DownstreamState> firstAvailable(const NumberedServerVector& servers, const DNSQuestion* dq);
1172
1173 std::shared_ptr<DownstreamState> leastOutstanding(const NumberedServerVector& servers, const DNSQuestion* dq);
1174 std::shared_ptr<DownstreamState> wrandom(const NumberedServerVector& servers, const DNSQuestion* dq);
1175 std::shared_ptr<DownstreamState> whashed(const NumberedServerVector& servers, const DNSQuestion* dq);
1176 std::shared_ptr<DownstreamState> chashed(const NumberedServerVector& servers, const DNSQuestion* dq);
1177 std::shared_ptr<DownstreamState> roundrobin(const NumberedServerVector& servers, const DNSQuestion* dq);
1178
1179 struct WebserverConfig
1180 {
1181   std::string password;
1182   std::string apiKey;
1183   boost::optional<std::map<std::string, std::string> > customHeaders;
1184   std::mutex lock;
1185 };
1186
1187 void setWebserverAPIKey(const boost::optional<std::string> apiKey);
1188 void setWebserverPassword(const std::string& password);
1189 void setWebserverCustomHeaders(const boost::optional<std::map<std::string, std::string> > customHeaders);
1190
1191 void dnsdistWebserverThread(int sock, const ComboAddress& local);
1192 void tcpAcceptorThread(void* p);
1193 #ifdef HAVE_DNS_OVER_HTTPS
1194 void dohThread(ClientState* cs);
1195 #endif /* HAVE_DNS_OVER_HTTPS */
1196
1197 void setLuaNoSideEffect(); // if nothing has been declared, set that there are no side effects
1198 void setLuaSideEffect();   // set to report a side effect, cancelling all _no_ side effect calls
1199 bool getLuaNoSideEffect(); // set if there were only explicit declarations of _no_ side effect
1200 void resetLuaSideEffect(); // reset to indeterminate state
1201
1202 bool responseContentMatches(const char* response, const uint16_t responseLen, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const ComboAddress& remote, unsigned int& consumed);
1203 bool processResponse(char** response, uint16_t* responseLen, size_t* responseSize, LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr, size_t addRoom, std::vector<uint8_t>& rewrittenResponse, bool muted);
1204 bool processRulesResult(const DNSAction::Action& action, DNSQuestion& dq, std::string& ruleresult, bool& drop);
1205
1206 bool checkQueryHeaders(const struct dnsheader* dh);
1207
1208 extern std::vector<std::shared_ptr<DNSCryptContext>> g_dnsCryptLocals;
1209 int handleDNSCryptQuery(char* packet, uint16_t len, std::shared_ptr<DNSCryptQuery> query, uint16_t* decryptedQueryLen, bool tcp, time_t now, std::vector<uint8_t>& response);
1210 boost::optional<std::vector<uint8_t>> checkDNSCryptQuery(const ClientState& cs, const char* query, uint16_t& len, std::shared_ptr<DNSCryptQuery>& dnsCryptQuery, time_t now, bool tcp);
1211
1212 bool addXPF(DNSQuestion& dq, uint16_t optionCode);
1213
1214 uint16_t getRandomDNSID();
1215
1216 #include "dnsdist-snmp.hh"
1217
1218 extern bool g_snmpEnabled;
1219 extern bool g_snmpTrapsEnabled;
1220 extern DNSDistSNMPAgent* g_snmpAgent;
1221 extern bool g_addEDNSToSelfGeneratedResponses;
1222
1223 static const size_t s_udpIncomingBufferSize{1500};
1224
1225 enum class ProcessQueryResult { Drop, SendAnswer, PassToBackend };
1226 ProcessQueryResult processQuery(DNSQuestion& dq, ClientState& cs, LocalHolders& holders, std::shared_ptr<DownstreamState>& selectedBackend);
1227
1228 DNSResponse makeDNSResponseFromIDState(IDState& ids, struct dnsheader* dh, size_t bufferSize, uint16_t responseLen, bool isTCP);
1229 void setIDStateFromDNSQuestion(IDState& ids, DNSQuestion& dq, DNSName&& qname);
1230
1231 int pickBackendSocketForSending(std::shared_ptr<DownstreamState>& state);
1232 ssize_t udpClientSendRequestToBackend(const std::shared_ptr<DownstreamState>& ss, const int sd, const char* request, const size_t requestLen, bool healthCheck=false);