pdns/dnsdist.hh

   1 /*
   2  * This file is part of PowerDNS or dnsdist.
   3  * Copyright -- PowerDNS.COM B.V. and its contributors
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of version 2 of the GNU General Public License as
   7  * published by the Free Software Foundation.
   8  *
   9  * In addition, for the avoidance of any doubt, permission is granted to
  10  * link this program with OpenSSL and to (re)distribute the binaries
  11  * produced as the result of such linking.
  12  *
  13  * This program is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  21  */
  22 #pragma once
  23 #include "config.h"
  24 #include "ext/luawrapper/include/LuaContext.hpp"
  25
  26 #include <atomic>
  27 #include <mutex>
  28 #include <string>
  29 #include <thread>
  30 #include <time.h>
  31 #include <unistd.h>
  32 #include <unordered_map>
  33
  34 #include <boost/variant.hpp>
  35
  36 #include "bpf-filter.hh"
  37 #include "capabilities.hh"
  38 #include "circular_buffer.hh"
  39 #include "dnscrypt.hh"
  40 #include "dnsdist-cache.hh"
  41 #include "dnsdist-dynbpf.hh"
  42 #include "dnsname.hh"
  43 #include "doh.hh"
  44 #include "ednsoptions.hh"
  45 #include "gettime.hh"
  46 #include "iputils.hh"
  47 #include "misc.hh"
  48 #include "mplexer.hh"
  49 #include "sholder.hh"
  50 #include "tcpiohandler.hh"
  51 #include "uuid-utils.hh"
  52
  53 void carbonDumpThread();
  54 uint64_t uptimeOfProcess(const std::string& str);
  55
  56 extern uint16_t g_ECSSourcePrefixV4;
  57 extern uint16_t g_ECSSourcePrefixV6;
  58 extern bool g_ECSOverride;
  59
  60 typedef std::unordered_map<string, string> QTag;
  61
  62 struct DNSQuestion
  63 {
  64   DNSQuestion(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t queryLen, bool isTcp, const struct timespec* queryTime_):
  65     qname(name), local(lc), remote(rem), dh(header), queryTime(queryTime_), size(bufferSize), consumed(consumed_), tempFailureTTL(boost::none), qtype(type), qclass(class_), len(queryLen), ecsPrefixLength(rem->sin4.sin_family == AF_INET ? g_ECSSourcePrefixV4 : g_ECSSourcePrefixV6), tcp(isTcp), ecsOverride(g_ECSOverride) {
  66     const uint16_t* flags = getFlagsFromDNSHeader(dh);
  67     origFlags = *flags;
  68   }
  69   DNSQuestion(const DNSQuestion&) = delete;
  70   DNSQuestion& operator=(const DNSQuestion&) = delete;
  71   DNSQuestion(DNSQuestion&&) = default;
  72
  73 #ifdef HAVE_PROTOBUF
  74   boost::optional<boost::uuids::uuid> uniqueId;
  75 #endif
  76   Netmask ecs;
  77   boost::optional<Netmask> subnet;
  78   std::string sni; /* Server Name Indication, if any (DoT or DoH) */
  79   std::string poolname;
  80   const DNSName* qname{nullptr};
  81   const ComboAddress* local{nullptr};
  82   const ComboAddress* remote{nullptr};
  83   std::shared_ptr<QTag> qTag{nullptr};
  84   std::shared_ptr<std::map<uint16_t, EDNSOptionView> > ednsOptions;
  85   std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
  86   std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
  87   struct dnsheader* dh{nullptr};
  88   const struct timespec* queryTime{nullptr};
  89   struct DOHUnit* du{nullptr};
  90   size_t size;
  91   unsigned int consumed{0};
  92   int delayMsec{0};
  93   boost::optional<uint32_t> tempFailureTTL;
  94   uint32_t cacheKeyNoECS;
  95   uint32_t cacheKey;
  96   const uint16_t qtype;
  97   const uint16_t qclass;
  98   uint16_t len;
  99   uint16_t ecsPrefixLength;
 100   uint16_t origFlags;
 101   uint8_t ednsRCode{0};
 102   const bool tcp;
 103   bool skipCache{false};
 104   bool ecsOverride;
 105   bool useECS{true};
 106   bool addXPF{true};
 107   bool ecsSet{false};
 108   bool ecsAdded{false};
 109   bool ednsAdded{false};
 110   bool useZeroScope{false};
 111   bool dnssecOK{false};
 112 };
 113
 114 struct DNSResponse : DNSQuestion
 115 {
 116   DNSResponse(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t responseLen, bool isTcp, const struct timespec* queryTime_):
 117     DNSQuestion(name, type, class_, consumed_, lc, rem, header, bufferSize, responseLen, isTcp, queryTime_) { }
 118   DNSResponse(const DNSResponse&) = delete;
 119   DNSResponse& operator=(const DNSResponse&) = delete;
 120   DNSResponse(DNSResponse&&) = default;
 121 };
 122
 123 /* so what could you do:
 124    drop,
 125    fake up nxdomain,
 126    provide actual answer,
 127    allow & and stop processing,
 128    continue processing,
 129    modify header:    (servfail|refused|notimp), set TC=1,
 130    send to pool */
 131
 132 class DNSAction
 133 {
 134 public:
 135   enum class Action { Drop, Nxdomain, Refused, Spoof, Allow, HeaderModify, Pool, Delay, Truncate, ServFail, None, NoOp, NoRecurse, SpoofRaw };
 136   static std::string typeToString(const Action& action)
 137   {
 138     switch(action) {
 139     case Action::Drop:
 140       return "Drop";
 141     case Action::Nxdomain:
 142       return "Send NXDomain";
 143     case Action::Refused:
 144       return "Send Refused";
 145     case Action::Spoof:
 146       return "Spoof an answer";
 147     case Action::SpoofRaw:
 148       return "Spoof an answer from raw bytes";
 149     case Action::Allow:
 150       return "Allow";
 151     case Action::HeaderModify:
 152       return "Modify the header";
 153     case Action::Pool:
 154       return "Route to a pool";
 155     case Action::Delay:
 156       return "Delay";
 157     case Action::Truncate:
 158       return "Truncate over UDP";
 159     case Action::ServFail:
 160       return "Send ServFail";
 161     case Action::None:
 162     case Action::NoOp:
 163       return "Do nothing";
 164     case Action::NoRecurse:
 165       return "Set rd=0";
 166     }
 167
 168     return "Unknown";
 169   }
 170
 171   virtual Action operator()(DNSQuestion*, string* ruleresult) const =0;
 172   virtual ~DNSAction()
 173   {
 174   }
 175   virtual string toString() const = 0;
 176   virtual std::map<string, double> getStats() const
 177   {
 178     return {{}};
 179   }
 180 };
 181
 182 class DNSResponseAction
 183 {
 184 public:
 185   enum class Action { Allow, Delay, Drop, HeaderModify, ServFail, None };
 186   virtual Action operator()(DNSResponse*, string* ruleresult) const =0;
 187   virtual ~DNSResponseAction()
 188   {
 189   }
 190   virtual string toString() const = 0;
 191 };
 192
 193 struct DynBlock
 194 {
 195   DynBlock(): action(DNSAction::Action::None), warning(false)
 196   {
 197   }
 198
 199   DynBlock(const std::string& reason_, const struct timespec& until_, const DNSName& domain_, DNSAction::Action action_): reason(reason_), until(until_), domain(domain_), action(action_), warning(false)
 200   {
 201   }
 202
 203   DynBlock(const DynBlock& rhs): reason(rhs.reason), until(rhs.until), domain(rhs.domain), action(rhs.action), warning(rhs.warning)
 204   {
 205     blocks.store(rhs.blocks);
 206   }
 207
 208   DynBlock& operator=(const DynBlock& rhs)
 209   {
 210     reason=rhs.reason;
 211     until=rhs.until;
 212     domain=rhs.domain;
 213     action=rhs.action;
 214     blocks.store(rhs.blocks);
 215     warning=rhs.warning;
 216     return *this;
 217   }
 218
 219   string reason;
 220   struct timespec until;
 221   DNSName domain;
 222   DNSAction::Action action;
 223   mutable std::atomic<unsigned int> blocks;
 224   bool warning;
 225 };
 226
 227 extern GlobalStateHolder<NetmaskTree<DynBlock>> g_dynblockNMG;
 228
 229 extern vector<pair<struct timeval, std::string> > g_confDelta;
 230
 231 extern uint64_t getLatencyCount(const std::string&);
 232
 233 struct DNSDistStats
 234 {
 235   using stat_t=std::atomic<uint64_t>; // aww yiss ;-)
 236   stat_t responses{0};
 237   stat_t servfailResponses{0};
 238   stat_t queries{0};
 239   stat_t frontendNXDomain{0};
 240   stat_t frontendServFail{0};
 241   stat_t frontendNoError{0};
 242   stat_t nonCompliantQueries{0};
 243   stat_t nonCompliantResponses{0};
 244   stat_t rdQueries{0};
 245   stat_t emptyQueries{0};
 246   stat_t aclDrops{0};
 247   stat_t dynBlocked{0};
 248   stat_t ruleDrop{0};
 249   stat_t ruleNXDomain{0};
 250   stat_t ruleRefused{0};
 251   stat_t ruleServFail{0};
 252   stat_t selfAnswered{0};
 253   stat_t downstreamTimeouts{0};
 254   stat_t downstreamSendErrors{0};
 255   stat_t truncFail{0};
 256   stat_t noPolicy{0};
 257   stat_t cacheHits{0};
 258   stat_t cacheMisses{0};
 259   stat_t latency0_1{0}, latency1_10{0}, latency10_50{0}, latency50_100{0}, latency100_1000{0}, latencySlow{0}, latencySum{0};
 260   stat_t securityStatus{0};
 261
 262   double latencyAvg100{0}, latencyAvg1000{0}, latencyAvg10000{0}, latencyAvg1000000{0};
 263   typedef std::function<uint64_t(const std::string&)> statfunction_t;
 264   typedef boost::variant<stat_t*, double*, statfunction_t> entry_t;
 265   std::vector<std::pair<std::string, entry_t>> entries{
 266     {"responses", &responses},
 267     {"servfail-responses", &servfailResponses},
 268     {"queries", &queries},
 269     {"frontend-nxdomain", &frontendNXDomain},
 270     {"frontend-servfail", &frontendServFail},
 271     {"frontend-noerror", &frontendNoError},
 272     {"acl-drops", &aclDrops},
 273     {"rule-drop", &ruleDrop},
 274     {"rule-nxdomain", &ruleNXDomain},
 275     {"rule-refused", &ruleRefused},
 276     {"rule-servfail", &ruleServFail},
 277     {"self-answered", &selfAnswered},
 278     {"downstream-timeouts", &downstreamTimeouts},
 279     {"downstream-send-errors", &downstreamSendErrors},
 280     {"trunc-failures", &truncFail},
 281     {"no-policy", &noPolicy},
 282     {"latency0-1", &latency0_1},
 283     {"latency1-10", &latency1_10},
 284     {"latency10-50", &latency10_50},
 285     {"latency50-100", &latency50_100},
 286     {"latency100-1000", &latency100_1000},
 287     {"latency-slow", &latencySlow},
 288     {"latency-avg100", &latencyAvg100},
 289     {"latency-avg1000", &latencyAvg1000},
 290     {"latency-avg10000", &latencyAvg10000},
 291     {"latency-avg1000000", &latencyAvg1000000},
 292     {"uptime", uptimeOfProcess},
 293     {"real-memory-usage", getRealMemoryUsage},
 294     {"special-memory-usage", getSpecialMemoryUsage},
 295     {"noncompliant-queries", &nonCompliantQueries},
 296     {"noncompliant-responses", &nonCompliantResponses},
 297     {"rdqueries", &rdQueries},
 298     {"empty-queries", &emptyQueries},
 299     {"cache-hits", &cacheHits},
 300     {"cache-misses", &cacheMisses},
 301     {"cpu-user-msec", getCPUTimeUser},
 302     {"cpu-sys-msec", getCPUTimeSystem},
 303     {"fd-usage", getOpenFileDescriptors},
 304     {"dyn-blocked", &dynBlocked},
 305     {"dyn-block-nmg-size", [](const std::string&) { return g_dynblockNMG.getLocal()->size(); }},
 306     {"security-status", &securityStatus},
 307     // Latency histogram
 308     {"latency-sum", &latencySum},
 309     {"latency-count", getLatencyCount},
 310   };
 311 };
 312
 313 // Metric types for Prometheus
 314 enum class PrometheusMetricType: int {
 315     counter = 1,
 316     gauge = 2
 317 };
 318
 319 // Keeps additional information about metrics
 320 struct MetricDefinition {
 321   MetricDefinition(PrometheusMetricType _prometheusType, const std::string& _description): description(_description), prometheusType(_prometheusType) {
 322   }
 323
 324   MetricDefinition() = default;
 325
 326   // Metric description
 327   std::string description;
 328   // Metric type for Prometheus
 329   PrometheusMetricType prometheusType;
 330 };
 331
 332 struct MetricDefinitionStorage {
 333   // Return metric definition by name
 334   bool getMetricDetails(std::string metricName, MetricDefinition& metric) {
 335   auto metricDetailsIter = metrics.find(metricName);
 336
 337   if (metricDetailsIter == metrics.end()) {
 338     return false;
 339   }
 340
 341   metric = metricDetailsIter->second;
 342     return true;
 343   };
 344
 345   // Return string representation of Prometheus metric type
 346   std::string getPrometheusStringMetricType(PrometheusMetricType metricType) {
 347     switch (metricType) {
 348       case PrometheusMetricType::counter:
 349         return "counter";
 350         break;
 351       case PrometheusMetricType::gauge:
 352         return "gauge";
 353         break;
 354       default:
 355         return "";
 356         break;
 357     }
 358   };
 359
 360   std::map<std::string, MetricDefinition> metrics = {
 361     { "responses",              MetricDefinition(PrometheusMetricType::counter, "Number of responses received from backends") },
 362     { "servfail-responses",     MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received from backends") },
 363     { "queries",                MetricDefinition(PrometheusMetricType::counter, "Number of received queries")},
 364     { "frontend-nxdomain",      MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers sent to clients")},
 365     { "frontend-servfail",      MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers sent to clients")},
 366     { "frontend-noerror",       MetricDefinition(PrometheusMetricType::counter, "Number of NoError answers sent to clients")},
 367     { "acl-drops",              MetricDefinition(PrometheusMetricType::counter, "Number of packets dropped because of the ACL")},
 368     { "rule-drop",              MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a rule")},
 369     { "rule-nxdomain",          MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers returned because of a rule")},
 370     { "rule-refused",           MetricDefinition(PrometheusMetricType::counter, "Number of Refused answers returned because of a rule")},
 371     { "rule-servfail",          MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received because of a rule")},
 372     { "self-answered",          MetricDefinition(PrometheusMetricType::counter, "Number of self-answered responses")},
 373     { "downstream-timeouts",    MetricDefinition(PrometheusMetricType::counter, "Number of queries not answered in time by a backend")},
 374     { "downstream-send-errors", MetricDefinition(PrometheusMetricType::counter, "Number of errors when sending a query to a backend")},
 375     { "trunc-failures",         MetricDefinition(PrometheusMetricType::counter, "Number of errors encountered while truncating an answer")},
 376     { "no-policy",              MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because no server was available")},
 377     { "latency0-1",             MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in less than 1ms")},
 378     { "latency1-10",            MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 1-10 ms")},
 379     { "latency10-50",           MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 10-50 ms")},
 380     { "latency50-100",          MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 50-100 ms")},
 381     { "latency100-1000",        MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 100-1000 ms")},
 382     { "latency-slow",           MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in more than 1 second")},
 383     { "latency-avg100",         MetricDefinition(PrometheusMetricType::gauge,   "Average response latency in microseconds of the last 100 packets")},
 384     { "latency-avg1000",        MetricDefinition(PrometheusMetricType::gauge,   "Average response latency in microseconds of the last 1000 packets")},
 385     { "latency-avg10000",       MetricDefinition(PrometheusMetricType::gauge,   "Average response latency in microseconds of the last 10000 packets")},
 386     { "latency-avg1000000",     MetricDefinition(PrometheusMetricType::gauge,   "Average response latency in microseconds of the last 1000000 packets")},
 387     { "uptime",                 MetricDefinition(PrometheusMetricType::gauge,   "Uptime of the dnsdist process in seconds")},
 388     { "real-memory-usage",      MetricDefinition(PrometheusMetricType::gauge,   "Current memory usage in bytes")},
 389     { "noncompliant-queries",   MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped as non-compliant")},
 390     { "noncompliant-responses", MetricDefinition(PrometheusMetricType::counter, "Number of answers from a backend dropped as non-compliant")},
 391     { "rdqueries",              MetricDefinition(PrometheusMetricType::counter, "Number of received queries with the recursion desired bit set")},
 392     { "empty-queries",          MetricDefinition(PrometheusMetricType::counter, "Number of empty queries received from clients")},
 393     { "cache-hits",             MetricDefinition(PrometheusMetricType::counter, "Number of times an answer was retrieved from cache")},
 394     { "cache-misses",           MetricDefinition(PrometheusMetricType::counter, "Number of times an answer not found in the cache")},
 395     { "cpu-user-msec",          MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the user state")},
 396     { "cpu-sys-msec",           MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the system state")},
 397     { "fd-usage",               MetricDefinition(PrometheusMetricType::gauge,   "Number of currently used file descriptors")},
 398     { "dyn-blocked",            MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a dynamic block")},
 399     { "dyn-block-nmg-size",     MetricDefinition(PrometheusMetricType::gauge,   "Number of dynamic blocks entries") },
 400     { "security-status",        MetricDefinition(PrometheusMetricType::gauge,   "Security status of this software. 0=unknown, 1=OK, 2=upgrade recommended, 3=upgrade mandatory") },
 401   };
 402 };
 403
 404 extern MetricDefinitionStorage g_metricDefinitions;
 405 extern struct DNSDistStats g_stats;
 406 void doLatencyStats(double udiff);
 407
 408
 409 struct StopWatch
 410 {
 411   StopWatch(bool realTime=false): d_needRealTime(realTime)
 412   {
 413   }
 414   struct timespec d_start{0,0};
 415   bool d_needRealTime{false};
 416
 417   void start() {
 418     if(gettime(&d_start, d_needRealTime) < 0)
 419       unixDie("Getting timestamp");
 420
 421   }
 422
 423   void set(const struct timespec& from) {
 424     d_start = from;
 425   }
 426
 427   double udiff() const {
 428     struct timespec now;
 429     if(gettime(&now, d_needRealTime) < 0)
 430       unixDie("Getting timestamp");
 431
 432     return 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
 433   }
 434
 435   double udiffAndSet() {
 436     struct timespec now;
 437     if(gettime(&now, d_needRealTime) < 0)
 438       unixDie("Getting timestamp");
 439
 440     auto ret= 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
 441     d_start = now;
 442     return ret;
 443   }
 444
 445 };
 446
 447 class BasicQPSLimiter
 448 {
 449 public:
 450   BasicQPSLimiter()
 451   {
 452   }
 453
 454   BasicQPSLimiter(unsigned int burst): d_tokens(burst)
 455   {
 456     d_prev.start();
 457   }
 458
 459   bool check(unsigned int rate, unsigned int burst) const // this is not quite fair
 460   {
 461     auto delta = d_prev.udiffAndSet();
 462
 463     if(delta > 0.0) // time, frequently, does go backwards..
 464       d_tokens += 1.0 * rate * (delta/1000000.0);
 465
 466     if(d_tokens > burst) {
 467       d_tokens = burst;
 468     }
 469
 470     bool ret=false;
 471     if(d_tokens >= 1.0) { // we need this because burst=1 is weird otherwise
 472       ret=true;
 473       --d_tokens;
 474     }
 475
 476     return ret;
 477   }
 478
 479   bool seenSince(const struct timespec& cutOff) const
 480   {
 481     return cutOff < d_prev.d_start;
 482   }
 483
 484 protected:
 485   mutable StopWatch d_prev;
 486   mutable double d_tokens;
 487 };
 488
 489 class QPSLimiter : public BasicQPSLimiter
 490 {
 491 public:
 492   QPSLimiter(): BasicQPSLimiter()
 493   {
 494   }
 495
 496   QPSLimiter(unsigned int rate, unsigned int burst): BasicQPSLimiter(burst), d_rate(rate), d_burst(burst), d_passthrough(false)
 497   {
 498     d_prev.start();
 499   }
 500
 501   unsigned int getRate() const
 502   {
 503     return d_passthrough ? 0 : d_rate;
 504   }
 505
 506   int getPassed() const
 507   {
 508     return d_passed;
 509   }
 510
 511   int getBlocked() const
 512   {
 513     return d_blocked;
 514   }
 515
 516   bool check() const // this is not quite fair
 517   {
 518     if (d_passthrough) {
 519       return true;
 520     }
 521
 522     bool ret = BasicQPSLimiter::check(d_rate, d_burst);
 523     if (ret) {
 524       d_passed++;
 525     }
 526     else {
 527       d_blocked++;
 528     }
 529
 530     return ret;
 531   }
 532 private:
 533   mutable unsigned int d_passed{0};
 534   mutable unsigned int d_blocked{0};
 535   unsigned int d_rate;
 536   unsigned int d_burst;
 537   bool d_passthrough{true};
 538 };
 539
 540 struct ClientState;
 541
 542 struct IDState
 543 {
 544   IDState(): sentTime(true), delayMsec(0), tempFailureTTL(boost::none) { origDest.sin4.sin_family = 0;}
 545   IDState(const IDState& orig): origRemote(orig.origRemote), origDest(orig.origDest), age(orig.age)
 546   {
 547     usageIndicator.store(orig.usageIndicator.load());
 548     origFD = orig.origFD;
 549     origID = orig.origID;
 550     delayMsec = orig.delayMsec;
 551     tempFailureTTL = orig.tempFailureTTL;
 552   }
 553
 554   static const int64_t unusedIndicator = -1;
 555
 556   static bool isInUse(int64_t usageIndicator)
 557   {
 558     return usageIndicator != unusedIndicator;
 559   }
 560
 561   bool isInUse() const
 562   {
 563     return usageIndicator != unusedIndicator;
 564   }
 565
 566   /* return true if the value has been successfully replaced meaning that
 567      no-one updated the usage indicator in the meantime */
 568   bool tryMarkUnused(int64_t expectedUsageIndicator)
 569   {
 570     return usageIndicator.compare_exchange_strong(expectedUsageIndicator, unusedIndicator);
 571   }
 572
 573   /* mark as unused no matter what, return true if the state was in use before */
 574   bool markAsUsed()
 575   {
 576     auto currentGeneration = generation++;
 577     return markAsUsed(currentGeneration);
 578   }
 579
 580   /* mark as unused no matter what, return true if the state was in use before */
 581   bool markAsUsed(int64_t currentGeneration)
 582   {
 583     int64_t oldUsage = usageIndicator.exchange(currentGeneration);
 584     return oldUsage != unusedIndicator;
 585   }
 586
 587   /* We use this value to detect whether this state is in use.
 588      For performance reasons we don't want to use a lock here, but that means
 589      we need to be very careful when modifying this value. Modifications happen
 590      from:
 591      - one of the UDP or DoH 'client' threads receiving a query, selecting a backend
 592        then picking one of the states associated to this backend (via the idOffset).
 593        Most of the time this state should not be in use and usageIndicator is -1, but we
 594        might not yet have received a response for the query previously associated to this
 595        state, meaning that we will 'reuse' this state and erase the existing state.
 596        If we ever receive a response for this state, it will be discarded. This is
 597        mostly fine for UDP except that we still need to be careful in order to miss
 598        the 'outstanding' counters, which should only be increased when we are picking
 599        an empty state, and not when reusing ;
 600        For DoH, though, we have dynamically allocated a DOHUnit object that needs to
 601        be freed, as well as internal objects internals to libh2o.
 602      - one of the UDP receiver threads receiving a response from a backend, picking
 603        the corresponding state and sending the response to the client ;
 604      - the 'healthcheck' thread scanning the states to actively discover timeouts,
 605        mostly to keep some counters like the 'outstanding' one sane.
 606      We previously based that logic on the origFD (FD on which the query was received,
 607      and therefore from where the response should be sent) but this suffered from an
 608      ABA problem since it was quite likely that a UDP 'client thread' would reset it to the
 609      same value since we only have so much incoming sockets:
 610      - 1/ 'client' thread gets a query and set origFD to its FD, say 5 ;
 611      - 2/ 'receiver' thread gets a response, read the value of origFD to 5, check that the qname,
 612        qtype and qclass match
 613      - 3/ during that time the 'client' thread reuses the state, setting again origFD to 5 ;
 614      - 4/ the 'receiver' thread uses compare_exchange_strong() to only replace the value if it's still
 615        5, except it's not the same 5 anymore and it overrides a fresh state.
 616      We now use a 32-bit unsigned counter instead, which is incremented every time the state is set,
 617      wrapping around if necessary, and we set an atomic signed 64-bit value, so that we still have -1
 618      when the state is unused and the value of our counter otherwise.
 619   */
 620   std::atomic<int64_t> usageIndicator{unusedIndicator};  // set to unusedIndicator to indicate this state is empty   // 8
 621   std::atomic<uint32_t> generation{0}; // increased every time a state is used, to be able to detect an ABA issue    // 4
 622   ComboAddress origRemote;                                    // 28
 623   ComboAddress origDest;                                      // 28
 624   StopWatch sentTime;                                         // 16
 625   DNSName qname;                                              // 80
 626   std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
 627 #ifdef HAVE_PROTOBUF
 628   boost::optional<boost::uuids::uuid> uniqueId;
 629 #endif
 630   boost::optional<Netmask> subnet{boost::none};
 631   std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
 632   std::shared_ptr<QTag> qTag{nullptr};
 633   const ClientState* cs{nullptr};
 634   DOHUnit* du{nullptr};
 635   uint32_t cacheKey;                                          // 4
 636   uint32_t cacheKeyNoECS;                                     // 4
 637   uint16_t age;                                               // 4
 638   uint16_t qtype;                                             // 2
 639   uint16_t qclass;                                            // 2
 640   uint16_t origID;                                            // 2
 641   uint16_t origFlags;                                         // 2
 642   int origFD{-1};
 643   int delayMsec;
 644   boost::optional<uint32_t> tempFailureTTL;
 645   bool ednsAdded{false};
 646   bool ecsAdded{false};
 647   bool skipCache{false};
 648   bool destHarvested{false}; // if true, origDest holds the original dest addr, otherwise the listening addr
 649   bool dnssecOK{false};
 650   bool useZeroScope;
 651 };
 652
 653 typedef std::unordered_map<string, unsigned int> QueryCountRecords;
 654 typedef std::function<std::tuple<bool, string>(const DNSQuestion* dq)> QueryCountFilter;
 655 struct QueryCount {
 656   QueryCount()
 657   {
 658     pthread_rwlock_init(&queryLock, nullptr);
 659   }
 660   ~QueryCount()
 661   {
 662     pthread_rwlock_destroy(&queryLock);
 663   }
 664   QueryCountRecords records;
 665   QueryCountFilter filter;
 666   pthread_rwlock_t queryLock;
 667   bool enabled{false};
 668 };
 669
 670 extern QueryCount g_qcount;
 671
 672 struct ClientState
 673 {
 674   ClientState(const ComboAddress& local_, bool isTCP_, bool doReusePort, int fastOpenQueue, const std::string& itfName, const std::set<int>& cpus_): cpus(cpus_), local(local_), interface(itfName), fastOpenQueueSize(fastOpenQueue), tcp(isTCP_), reuseport(doReusePort)
 675   {
 676   }
 677
 678   std::set<int> cpus;
 679   ComboAddress local;
 680   std::shared_ptr<DNSCryptContext> dnscryptCtx{nullptr};
 681   std::shared_ptr<TLSFrontend> tlsFrontend{nullptr};
 682   std::shared_ptr<DOHFrontend> dohFrontend{nullptr};
 683   std::string interface;
 684   std::atomic<uint64_t> queries{0};
 685   mutable std::atomic<uint64_t> responses{0};
 686   std::atomic<uint64_t> tcpDiedReadingQuery{0};
 687   std::atomic<uint64_t> tcpDiedSendingResponse{0};
 688   std::atomic<uint64_t> tcpGaveUp{0};
 689   std::atomic<uint64_t> tcpClientTimeouts{0};
 690   std::atomic<uint64_t> tcpDownstreamTimeouts{0};
 691   std::atomic<uint64_t> tcpCurrentConnections{0};
 692   std::atomic<uint64_t> tlsNewSessions{0}; // A new TLS session has been negotiated, no resumption
 693   std::atomic<uint64_t> tlsResumptions{0}; // A TLS session has been resumed, either via session id or via a TLS ticket
 694   std::atomic<uint64_t> tlsUnknownTicketKey{0}; // A TLS ticket has been presented but we don't have the associated key (might have expired)
 695   std::atomic<uint64_t> tlsInactiveTicketKey{0}; // A TLS ticket has been successfully resumed but the key is no longer active, we should issue a new one
 696   std::atomic<uint64_t> tls10queries{0};   // valid DNS queries received via TLSv1.0
 697   std::atomic<uint64_t> tls11queries{0};   // valid DNS queries received via TLSv1.1
 698   std::atomic<uint64_t> tls12queries{0};   // valid DNS queries received via TLSv1.2
 699   std::atomic<uint64_t> tls13queries{0};   // valid DNS queries received via TLSv1.3
 700   std::atomic<uint64_t> tlsUnknownqueries{0};   // valid DNS queries received via unknown TLS version
 701   std::atomic<double> tcpAvgQueriesPerConnection{0.0};
 702   /* in ms */
 703   std::atomic<double> tcpAvgConnectionDuration{0.0};
 704   int udpFD{-1};
 705   int tcpFD{-1};
 706   int fastOpenQueueSize{0};
 707   bool muted{false};
 708   bool tcp;
 709   bool reuseport;
 710   bool ready{false};
 711
 712   int getSocket() const
 713   {
 714     return udpFD != -1 ? udpFD : tcpFD;
 715   }
 716
 717   bool isUDP() const
 718   {
 719     return udpFD != -1;
 720   }
 721
 722   bool isTCP() const
 723   {
 724     return udpFD == -1;
 725   }
 726
 727   bool hasTLS() const
 728   {
 729     return tlsFrontend != nullptr || dohFrontend != nullptr;
 730   }
 731
 732   std::string getType() const
 733   {
 734     std::string result = udpFD != -1 ? "UDP" : "TCP";
 735
 736     if (dohFrontend) {
 737       result += " (DNS over HTTPS)";
 738     }
 739     else if (tlsFrontend) {
 740       result += " (DNS over TLS)";
 741     }
 742     else if (dnscryptCtx) {
 743       result += " (DNSCrypt)";
 744     }
 745
 746     return result;
 747   }
 748
 749 #ifdef HAVE_EBPF
 750   shared_ptr<BPFFilter> d_filter;
 751
 752   void detachFilter()
 753   {
 754     if (d_filter) {
 755       d_filter->removeSocket(getSocket());
 756       d_filter = nullptr;
 757     }
 758   }
 759
 760   void attachFilter(shared_ptr<BPFFilter> bpf)
 761   {
 762     detachFilter();
 763
 764     bpf->addSocket(getSocket());
 765     d_filter = bpf;
 766   }
 767 #endif /* HAVE_EBPF */
 768
 769   void updateTCPMetrics(size_t nbQueries, uint64_t durationMs)
 770   {
 771     tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0);
 772     tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
 773   }
 774 };
 775
 776 class TCPClientCollection {
 777   std::vector<int> d_tcpclientthreads;
 778   std::atomic<uint64_t> d_numthreads{0};
 779   std::atomic<uint64_t> d_pos{0};
 780   std::atomic<uint64_t> d_queued{0};
 781   const uint64_t d_maxthreads{0};
 782   std::mutex d_mutex;
 783   int d_singlePipe[2];
 784   const bool d_useSinglePipe;
 785 public:
 786
 787   TCPClientCollection(size_t maxThreads, bool useSinglePipe=false): d_maxthreads(maxThreads), d_singlePipe{-1,-1}, d_useSinglePipe(useSinglePipe)
 788
 789   {
 790     d_tcpclientthreads.reserve(maxThreads);
 791
 792     if (d_useSinglePipe) {
 793       if (pipe(d_singlePipe) < 0) {
 794         int err = errno;
 795         throw std::runtime_error("Error creating the TCP single communication pipe: " + stringerror(err));
 796       }
 797
 798       if (!setNonBlocking(d_singlePipe[0])) {
 799         int err = errno;
 800         close(d_singlePipe[0]);
 801         close(d_singlePipe[1]);
 802         throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err));
 803       }
 804
 805       if (!setNonBlocking(d_singlePipe[1])) {
 806         int err = errno;
 807         close(d_singlePipe[0]);
 808         close(d_singlePipe[1]);
 809         throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err));
 810       }
 811     }
 812   }
 813   int getThread()
 814   {
 815     uint64_t pos = d_pos++;
 816     ++d_queued;
 817     return d_tcpclientthreads[pos % d_numthreads];
 818   }
 819   bool hasReachedMaxThreads() const
 820   {
 821     return d_numthreads >= d_maxthreads;
 822   }
 823   uint64_t getThreadsCount() const
 824   {
 825     return d_numthreads;
 826   }
 827   uint64_t getQueuedCount() const
 828   {
 829     return d_queued;
 830   }
 831   void decrementQueuedCount()
 832   {
 833     --d_queued;
 834   }
 835   void addTCPClientThread();
 836 };
 837
 838 extern std::unique_ptr<TCPClientCollection> g_tcpclientthreads;
 839
 840 struct DownstreamState
 841 {
 842    typedef std::function<std::tuple<DNSName, uint16_t, uint16_t>(const DNSName&, uint16_t, uint16_t, dnsheader*)> checkfunc_t;
 843
 844   DownstreamState(const ComboAddress& remote_, const ComboAddress& sourceAddr_, unsigned int sourceItf, const std::string& sourceItfName, size_t numberOfSockets, bool connect);
 845   DownstreamState(const ComboAddress& remote_): DownstreamState(remote_, ComboAddress(), 0, std::string(), 1, true) {}
 846   ~DownstreamState()
 847   {
 848     for (auto& fd : sockets) {
 849       if (fd >= 0) {
 850         close(fd);
 851         fd = -1;
 852       }
 853     }
 854     pthread_rwlock_destroy(&d_lock);
 855   }
 856   boost::uuids::uuid id;
 857   std::set<unsigned int> hashes;
 858   mutable pthread_rwlock_t d_lock;
 859   std::vector<int> sockets;
 860   const std::string sourceItfName;
 861   std::mutex socketsLock;
 862   std::mutex connectLock;
 863   std::unique_ptr<FDMultiplexer> mplexer{nullptr};
 864   std::thread tid;
 865   const ComboAddress remote;
 866   QPSLimiter qps;
 867   vector<IDState> idStates;
 868   const ComboAddress sourceAddr;
 869   checkfunc_t checkFunction;
 870   DNSName checkName{"a.root-servers.net."};
 871   QType checkType{QType::A};
 872   uint16_t checkClass{QClass::IN};
 873   std::atomic<uint64_t> idOffset{0};
 874   std::atomic<uint64_t> sendErrors{0};
 875   std::atomic<uint64_t> outstanding{0};
 876   std::atomic<uint64_t> reuseds{0};
 877   std::atomic<uint64_t> queries{0};
 878   std::atomic<uint64_t> responses{0};
 879   struct {
 880     std::atomic<uint64_t> sendErrors{0};
 881     std::atomic<uint64_t> reuseds{0};
 882     std::atomic<uint64_t> queries{0};
 883   } prev;
 884   std::atomic<uint64_t> tcpDiedSendingQuery{0};
 885   std::atomic<uint64_t> tcpDiedReadingResponse{0};
 886   std::atomic<uint64_t> tcpGaveUp{0};
 887   std::atomic<uint64_t> tcpReadTimeouts{0};
 888   std::atomic<uint64_t> tcpWriteTimeouts{0};
 889   std::atomic<uint64_t> tcpCurrentConnections{0};
 890   std::atomic<double> tcpAvgQueriesPerConnection{0.0};
 891   /* in ms */
 892   std::atomic<double> tcpAvgConnectionDuration{0.0};
 893   string name;
 894   size_t socketsOffset{0};
 895   double queryLoad{0.0};
 896   double dropRate{0.0};
 897   double latencyUsec{0.0};
 898   int order{1};
 899   int weight{1};
 900   int tcpConnectTimeout{5};
 901   int tcpRecvTimeout{30};
 902   int tcpSendTimeout{30};
 903   unsigned int checkInterval{1};
 904   unsigned int lastCheck{0};
 905   const unsigned int sourceItf{0};
 906   uint16_t retries{5};
 907   uint16_t xpfRRCode{0};
 908   uint16_t checkTimeout{1000}; /* in milliseconds */
 909   uint8_t currentCheckFailures{0};
 910   uint8_t consecutiveSuccessfulChecks{0};
 911   uint8_t maxCheckFailures{1};
 912   uint8_t minRiseSuccesses{1};
 913   StopWatch sw;
 914   set<string> pools;
 915   enum class Availability { Up, Down, Auto} availability{Availability::Auto};
 916   bool mustResolve{false};
 917   bool upStatus{false};
 918   bool useECS{false};
 919   bool setCD{false};
 920   bool disableZeroScope{false};
 921   std::atomic<bool> connected{false};
 922   std::atomic_flag threadStarted;
 923   bool tcpFastOpen{false};
 924   bool ipBindAddrNoPort{true};
 925
 926   bool isUp() const
 927   {
 928     if(availability == Availability::Down)
 929       return false;
 930     if(availability == Availability::Up)
 931       return true;
 932     return upStatus;
 933   }
 934   void setUp() { availability = Availability::Up; }
 935   void setDown() { availability = Availability::Down; }
 936   void setAuto() { availability = Availability::Auto; }
 937   string getName() const {
 938     if (name.empty()) {
 939       return remote.toStringWithPort();
 940     }
 941     return name;
 942   }
 943   string getNameWithAddr() const {
 944     if (name.empty()) {
 945       return remote.toStringWithPort();
 946     }
 947     return name + " (" + remote.toStringWithPort()+ ")";
 948   }
 949   string getStatus() const
 950   {
 951     string status;
 952     if(availability == DownstreamState::Availability::Up)
 953       status = "UP";
 954     else if(availability == DownstreamState::Availability::Down)
 955       status = "DOWN";
 956     else
 957       status = (upStatus ? "up" : "down");
 958     return status;
 959   }
 960   bool reconnect();
 961   void hash();
 962   void setId(const boost::uuids::uuid& newId);
 963   void setWeight(int newWeight);
 964
 965   void updateTCPMetrics(size_t nbQueries, uint64_t durationMs)
 966   {
 967     tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0);
 968     tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
 969   }
 970 };
 971 using servers_t =vector<std::shared_ptr<DownstreamState>>;
 972
 973 template <class T> using NumberedVector = std::vector<std::pair<unsigned int, T> >;
 974
 975 void responderThread(std::shared_ptr<DownstreamState> state);
 976 extern std::mutex g_luamutex;
 977 extern LuaContext g_lua;
 978 extern std::string g_outputBuffer; // locking for this is ok, as locked by g_luamutex
 979
 980 class DNSRule
 981 {
 982 public:
 983   virtual ~DNSRule ()
 984   {
 985   }
 986   virtual bool matches(const DNSQuestion* dq) const =0;
 987   virtual string toString() const = 0;
 988   mutable std::atomic<uint64_t> d_matches{0};
 989 };
 990
 991 using NumberedServerVector = NumberedVector<shared_ptr<DownstreamState>>;
 992 typedef std::function<shared_ptr<DownstreamState>(const NumberedServerVector& servers, const DNSQuestion*)> policyfunc_t;
 993
 994 struct ServerPolicy
 995 {
 996   string name;
 997   policyfunc_t policy;
 998   bool isLua;
 999   std::string toString() const {
1000     return string("ServerPolicy") + (isLua ? " (Lua)" : "") + " \"" + name + "\"";
1001   }
1002 };
1003
1004 struct ServerPool
1005 {
1006   ServerPool()
1007   {
1008     pthread_rwlock_init(&d_lock, nullptr);
1009   }
1010   ~ServerPool()
1011   {
1012     pthread_rwlock_destroy(&d_lock);
1013   }
1014
1015   const std::shared_ptr<DNSDistPacketCache> getCache() const { return packetCache; };
1016
1017   bool getECS() const
1018   {
1019     return d_useECS;
1020   }
1021
1022   void setECS(bool useECS)
1023   {
1024     d_useECS = useECS;
1025   }
1026
1027   std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
1028   std::shared_ptr<ServerPolicy> policy{nullptr};
1029
1030   size_t countServers(bool upOnly)
1031   {
1032     size_t count = 0;
1033     ReadLock rl(&d_lock);
1034     for (const auto& server : d_servers) {
1035       if (!upOnly || std::get<1>(server)->isUp() ) {
1036         count++;
1037       }
1038     }
1039     return count;
1040   }
1041
1042   NumberedVector<shared_ptr<DownstreamState>> getServers()
1043   {
1044     NumberedVector<shared_ptr<DownstreamState>> result;
1045     {
1046       ReadLock rl(&d_lock);
1047       result = d_servers;
1048     }
1049     return result;
1050   }
1051
1052   void addServer(shared_ptr<DownstreamState>& server)
1053   {
1054     WriteLock wl(&d_lock);
1055     unsigned int count = (unsigned int) d_servers.size();
1056     d_servers.push_back(make_pair(++count, server));
1057     /* we need to reorder based on the server 'order' */
1058     std::stable_sort(d_servers.begin(), d_servers.end(), [](const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& a, const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& b) {
1059       return a.second->order < b.second->order;
1060     });
1061     /* and now we need to renumber for Lua (custom policies) */
1062     size_t idx = 1;
1063     for (auto& serv : d_servers) {
1064       serv.first = idx++;
1065     }
1066   }
1067
1068   void removeServer(shared_ptr<DownstreamState>& server)
1069   {
1070     WriteLock wl(&d_lock);
1071     size_t idx = 1;
1072     bool found = false;
1073     for (auto it = d_servers.begin(); it != d_servers.end();) {
1074       if (found) {
1075         /* we need to renumber the servers placed
1076            after the removed one, for Lua (custom policies) */
1077         it->first = idx++;
1078         it++;
1079       }
1080       else if (it->second == server) {
1081         it = d_servers.erase(it);
1082         found = true;
1083       } else {
1084         idx++;
1085         it++;
1086       }
1087     }
1088   }
1089
1090 private:
1091   NumberedVector<shared_ptr<DownstreamState>> d_servers;
1092   pthread_rwlock_t d_lock;
1093   bool d_useECS{false};
1094 };
1095 using pools_t=map<std::string,std::shared_ptr<ServerPool>>;
1096 void setPoolPolicy(pools_t& pools, const string& poolName, std::shared_ptr<ServerPolicy> policy);
1097 void addServerToPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server);
1098 void removeServerFromPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server);
1099
1100 struct CarbonConfig
1101 {
1102   ComboAddress server;
1103   std::string namespace_name;
1104   std::string ourname;
1105   std::string instance_name;
1106   unsigned int interval;
1107 };
1108
1109 enum ednsHeaderFlags {
1110   EDNS_HEADER_FLAG_NONE = 0,
1111   EDNS_HEADER_FLAG_DO = 32768
1112 };
1113
1114 struct DNSDistRuleAction
1115 {
1116   std::shared_ptr<DNSRule> d_rule;
1117   std::shared_ptr<DNSAction> d_action;
1118   boost::uuids::uuid d_id;
1119   uint64_t d_creationOrder;
1120 };
1121
1122 struct DNSDistResponseRuleAction
1123 {
1124   std::shared_ptr<DNSRule> d_rule;
1125   std::shared_ptr<DNSResponseAction> d_action;
1126   boost::uuids::uuid d_id;
1127   uint64_t d_creationOrder;
1128 };
1129
1130 extern GlobalStateHolder<SuffixMatchTree<DynBlock>> g_dynblockSMT;
1131 extern DNSAction::Action g_dynBlockAction;
1132
1133 extern GlobalStateHolder<vector<CarbonConfig> > g_carbon;
1134 extern GlobalStateHolder<ServerPolicy> g_policy;
1135 extern GlobalStateHolder<servers_t> g_dstates;
1136 extern GlobalStateHolder<pools_t> g_pools;
1137 extern GlobalStateHolder<vector<DNSDistRuleAction> > g_rulactions;
1138 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_resprulactions;
1139 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_cachehitresprulactions;
1140 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_selfansweredresprulactions;
1141 extern GlobalStateHolder<NetmaskGroup> g_ACL;
1142
1143 extern ComboAddress g_serverControl; // not changed during runtime
1144
1145 extern std::vector<std::tuple<ComboAddress, bool, bool, int, std::string, std::set<int>>> g_locals; // not changed at runtime (we hope XXX)
1146 extern std::vector<shared_ptr<TLSFrontend>> g_tlslocals;
1147 extern std::vector<shared_ptr<DOHFrontend>> g_dohlocals;
1148 extern std::vector<std::unique_ptr<ClientState>> g_frontends;
1149 extern bool g_truncateTC;
1150 extern bool g_fixupCase;
1151 extern int g_tcpRecvTimeout;
1152 extern int g_tcpSendTimeout;
1153 extern int g_udpTimeout;
1154 extern uint16_t g_maxOutstanding;
1155 extern std::atomic<bool> g_configurationDone;
1156 extern uint64_t g_maxTCPClientThreads;
1157 extern uint64_t g_maxTCPQueuedConnections;
1158 extern size_t g_maxTCPQueriesPerConn;
1159 extern size_t g_maxTCPConnectionDuration;
1160 extern size_t g_maxTCPConnectionsPerClient;
1161 extern std::atomic<uint16_t> g_cacheCleaningDelay;
1162 extern std::atomic<uint16_t> g_cacheCleaningPercentage;
1163 extern uint32_t g_staleCacheEntriesTTL;
1164 extern bool g_apiReadWrite;
1165 extern std::string g_apiConfigDirectory;
1166 extern bool g_servFailOnNoPolicy;
1167 extern uint32_t g_hashperturb;
1168 extern bool g_useTCPSinglePipe;
1169 extern uint16_t g_downstreamTCPCleanupInterval;
1170 extern size_t g_udpVectorSize;
1171 extern bool g_preserveTrailingData;
1172 extern bool g_allowEmptyResponse;
1173 extern bool g_roundrobinFailOnNoServer;
1174 extern double g_consistentHashBalancingFactor;
1175
1176 #ifdef HAVE_EBPF
1177 extern shared_ptr<BPFFilter> g_defaultBPFFilter;
1178 extern std::vector<std::shared_ptr<DynBPFFilter> > g_dynBPFFilters;
1179 #endif /* HAVE_EBPF */
1180
1181 struct LocalHolders
1182 {
1183   LocalHolders(): acl(g_ACL.getLocal()), policy(g_policy.getLocal()), rulactions(g_rulactions.getLocal()), cacheHitRespRulactions(g_cachehitresprulactions.getLocal()), selfAnsweredRespRulactions(g_selfansweredresprulactions.getLocal()), servers(g_dstates.getLocal()), dynNMGBlock(g_dynblockNMG.getLocal()), dynSMTBlock(g_dynblockSMT.getLocal()), pools(g_pools.getLocal())
1184   {
1185   }
1186
1187   LocalStateHolder<NetmaskGroup> acl;
1188   LocalStateHolder<ServerPolicy> policy;
1189   LocalStateHolder<vector<DNSDistRuleAction> > rulactions;
1190   LocalStateHolder<vector<DNSDistResponseRuleAction> > cacheHitRespRulactions;
1191   LocalStateHolder<vector<DNSDistResponseRuleAction> > selfAnsweredRespRulactions;
1192   LocalStateHolder<servers_t> servers;
1193   LocalStateHolder<NetmaskTree<DynBlock> > dynNMGBlock;
1194   LocalStateHolder<SuffixMatchTree<DynBlock> > dynSMTBlock;
1195   LocalStateHolder<pools_t> pools;
1196 };
1197
1198 struct dnsheader;
1199
1200 void controlThread(int fd, ComboAddress local);
1201 std::shared_ptr<ServerPool> getPool(const pools_t& pools, const std::string& poolName);
1202 std::shared_ptr<ServerPool> createPoolIfNotExists(pools_t& pools, const string& poolName);
1203 NumberedServerVector getDownstreamCandidates(const pools_t& pools, const std::string& poolName);
1204
1205 std::shared_ptr<DownstreamState> firstAvailable(const NumberedServerVector& servers, const DNSQuestion* dq);
1206
1207 std::shared_ptr<DownstreamState> leastOutstanding(const NumberedServerVector& servers, const DNSQuestion* dq);
1208 std::shared_ptr<DownstreamState> wrandom(const NumberedServerVector& servers, const DNSQuestion* dq);
1209 std::shared_ptr<DownstreamState> whashed(const NumberedServerVector& servers, const DNSQuestion* dq);
1210 std::shared_ptr<DownstreamState> chashed(const NumberedServerVector& servers, const DNSQuestion* dq);
1211 std::shared_ptr<DownstreamState> roundrobin(const NumberedServerVector& servers, const DNSQuestion* dq);
1212
1213 struct WebserverConfig
1214 {
1215   std::string password;
1216   std::string apiKey;
1217   boost::optional<std::map<std::string, std::string> > customHeaders;
1218   std::mutex lock;
1219 };
1220
1221 void setWebserverAPIKey(const boost::optional<std::string> apiKey);
1222 void setWebserverPassword(const std::string& password);
1223 void setWebserverCustomHeaders(const boost::optional<std::map<std::string, std::string> > customHeaders);
1224
1225 void dnsdistWebserverThread(int sock, const ComboAddress& local);
1226 void tcpAcceptorThread(void* p);
1227 #ifdef HAVE_DNS_OVER_HTTPS
1228 void dohThread(ClientState* cs);
1229 #endif /* HAVE_DNS_OVER_HTTPS */
1230
1231 void setLuaNoSideEffect(); // if nothing has been declared, set that there are no side effects
1232 void setLuaSideEffect();   // set to report a side effect, cancelling all _no_ side effect calls
1233 bool getLuaNoSideEffect(); // set if there were only explicit declarations of _no_ side effect
1234 void resetLuaSideEffect(); // reset to indeterminate state
1235
1236 bool responseContentMatches(const char* response, const uint16_t responseLen, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const ComboAddress& remote, unsigned int& consumed);
1237 bool processResponse(char** response, uint16_t* responseLen, size_t* responseSize, LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr, size_t addRoom, std::vector<uint8_t>& rewrittenResponse, bool muted);
1238 bool processRulesResult(const DNSAction::Action& action, DNSQuestion& dq, std::string& ruleresult, bool& drop);
1239
1240 bool checkQueryHeaders(const struct dnsheader* dh);
1241
1242 extern std::vector<std::shared_ptr<DNSCryptContext>> g_dnsCryptLocals;
1243 int handleDNSCryptQuery(char* packet, uint16_t len, std::shared_ptr<DNSCryptQuery> query, uint16_t* decryptedQueryLen, bool tcp, time_t now, std::vector<uint8_t>& response);
1244 boost::optional<std::vector<uint8_t>> checkDNSCryptQuery(const ClientState& cs, const char* query, uint16_t& len, std::shared_ptr<DNSCryptQuery>& dnsCryptQuery, time_t now, bool tcp);
1245
1246 bool addXPF(DNSQuestion& dq, uint16_t optionCode);
1247
1248 uint16_t getRandomDNSID();
1249
1250 #include "dnsdist-snmp.hh"
1251
1252 extern bool g_snmpEnabled;
1253 extern bool g_snmpTrapsEnabled;
1254 extern DNSDistSNMPAgent* g_snmpAgent;
1255 extern bool g_addEDNSToSelfGeneratedResponses;
1256
1257 extern std::set<std::string> g_capabilitiesToRetain;
1258 static const uint16_t s_udpIncomingBufferSize{1500}; // don't accept UDP queries larger than this value
1259 static const size_t s_maxPacketCacheEntrySize{4096}; // don't cache responses larger than this value
1260
1261 enum class ProcessQueryResult { Drop, SendAnswer, PassToBackend };
1262 ProcessQueryResult processQuery(DNSQuestion& dq, ClientState& cs, LocalHolders& holders, std::shared_ptr<DownstreamState>& selectedBackend);
1263
1264 DNSResponse makeDNSResponseFromIDState(IDState& ids, struct dnsheader* dh, size_t bufferSize, uint16_t responseLen, bool isTCP);
1265 void setIDStateFromDNSQuestion(IDState& ids, DNSQuestion& dq, DNSName&& qname);
1266
1267 int pickBackendSocketForSending(std::shared_ptr<DownstreamState>& state);
1268 ssize_t udpClientSendRequestToBackend(const std::shared_ptr<DownstreamState>& ss, const int sd, const char* request, const size_t requestLen, bool healthCheck=false);