pdns/dnsdist.hh

   1 /*
   2  * This file is part of PowerDNS or dnsdist.
   3  * Copyright -- PowerDNS.COM B.V. and its contributors
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of version 2 of the GNU General Public License as
   7  * published by the Free Software Foundation.
   8  *
   9  * In addition, for the avoidance of any doubt, permission is granted to
  10  * link this program with OpenSSL and to (re)distribute the binaries
  11  * produced as the result of such linking.
  12  *
  13  * This program is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  21  */
  22 #pragma once
  23 #include "config.h"
  24 #include "ext/luawrapper/include/LuaContext.hpp"
  25
  26 #include <atomic>
  27 #include <mutex>
  28 #include <string>
  29 #include <thread>
  30 #include <time.h>
  31 #include <unistd.h>
  32 #include <unordered_map>
  33
  34 #include <boost/variant.hpp>
  35
  36 #include "bpf-filter.hh"
  37 #include "capabilities.hh"
  38 #include "circular_buffer.hh"
  39 #include "dnscrypt.hh"
  40 #include "dnsdist-cache.hh"
  41 #include "dnsdist-dynbpf.hh"
  42 #include "dnsname.hh"
  43 #include "doh.hh"
  44 #include "ednsoptions.hh"
  45 #include "gettime.hh"
  46 #include "iputils.hh"
  47 #include "misc.hh"
  48 #include "mplexer.hh"
  49 #include "sholder.hh"
  50 #include "tcpiohandler.hh"
  51 #include "uuid-utils.hh"
  52
  53 void carbonDumpThread();
  54 uint64_t uptimeOfProcess(const std::string& str);
  55
  56 extern uint16_t g_ECSSourcePrefixV4;
  57 extern uint16_t g_ECSSourcePrefixV6;
  58 extern bool g_ECSOverride;
  59
  60 typedef std::unordered_map<string, string> QTag;
  61
  62 struct DNSQuestion
  63 {
  64   DNSQuestion(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t queryLen, bool isTcp, const struct timespec* queryTime_):
  65     qname(name), local(lc), remote(rem), dh(header), queryTime(queryTime_), size(bufferSize), consumed(consumed_), tempFailureTTL(boost::none), qtype(type), qclass(class_), len(queryLen), ecsPrefixLength(rem->sin4.sin_family == AF_INET ? g_ECSSourcePrefixV4 : g_ECSSourcePrefixV6), tcp(isTcp), ecsOverride(g_ECSOverride) {
  66     const uint16_t* flags = getFlagsFromDNSHeader(dh);
  67     origFlags = *flags;
  68   }
  69   DNSQuestion(const DNSQuestion&) = delete;
  70   DNSQuestion& operator=(const DNSQuestion&) = delete;
  71   DNSQuestion(DNSQuestion&&) = default;
  72
  73 #ifdef HAVE_PROTOBUF
  74   boost::optional<boost::uuids::uuid> uniqueId;
  75 #endif
  76   Netmask ecs;
  77   boost::optional<Netmask> subnet;
  78   std::string sni; /* Server Name Indication, if any (DoT or DoH) */
  79   std::string poolname;
  80   const DNSName* qname{nullptr};
  81   const ComboAddress* local{nullptr};
  82   const ComboAddress* remote{nullptr};
  83   std::shared_ptr<QTag> qTag{nullptr};
  84   std::shared_ptr<std::map<uint16_t, EDNSOptionView> > ednsOptions;
  85   std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
  86   std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
  87   struct dnsheader* dh{nullptr};
  88   const struct timespec* queryTime{nullptr};
  89   struct DOHUnit* du{nullptr};
  90   size_t size;
  91   unsigned int consumed{0};
  92   int delayMsec{0};
  93   boost::optional<uint32_t> tempFailureTTL;
  94   uint32_t cacheKeyNoECS;
  95   uint32_t cacheKey;
  96   const uint16_t qtype;
  97   const uint16_t qclass;
  98   uint16_t len;
  99   uint16_t ecsPrefixLength;
 100   uint16_t origFlags;
 101   uint8_t ednsRCode{0};
 102   const bool tcp;
 103   bool skipCache{false};
 104   bool ecsOverride;
 105   bool useECS{true};
 106   bool addXPF{true};
 107   bool ecsSet{false};
 108   bool ecsAdded{false};
 109   bool ednsAdded{false};
 110   bool useZeroScope{false};
 111   bool dnssecOK{false};
 112 };
 113
 114 struct DNSResponse : DNSQuestion
 115 {
 116   DNSResponse(const DNSName* name, uint16_t type, uint16_t class_, unsigned int consumed_, const ComboAddress* lc, const ComboAddress* rem, struct dnsheader* header, size_t bufferSize, uint16_t responseLen, bool isTcp, const struct timespec* queryTime_):
 117     DNSQuestion(name, type, class_, consumed_, lc, rem, header, bufferSize, responseLen, isTcp, queryTime_) { }
 118   DNSResponse(const DNSResponse&) = delete;
 119   DNSResponse& operator=(const DNSResponse&) = delete;
 120   DNSResponse(DNSResponse&&) = default;
 121 };
 122
 123 /* so what could you do:
 124    drop,
 125    fake up nxdomain,
 126    provide actual answer,
 127    allow & and stop processing,
 128    continue processing,
 129    modify header:    (servfail|refused|notimp), set TC=1,
 130    send to pool */
 131
 132 class DNSAction
 133 {
 134 public:
 135   enum class Action { Drop, Nxdomain, Refused, Spoof, Allow, HeaderModify, Pool, Delay, Truncate, ServFail, None, NoOp, NoRecurse };
 136   static std::string typeToString(const Action& action)
 137   {
 138     switch(action) {
 139     case Action::Drop:
 140       return "Drop";
 141     case Action::Nxdomain:
 142       return "Send NXDomain";
 143     case Action::Refused:
 144       return "Send Refused";
 145     case Action::Spoof:
 146       return "Spoof an answer";
 147     case Action::Allow:
 148       return "Allow";
 149     case Action::HeaderModify:
 150       return "Modify the header";
 151     case Action::Pool:
 152       return "Route to a pool";
 153     case Action::Delay:
 154       return "Delay";
 155     case Action::Truncate:
 156       return "Truncate over UDP";
 157     case Action::ServFail:
 158       return "Send ServFail";
 159     case Action::None:
 160     case Action::NoOp:
 161       return "Do nothing";
 162     case Action::NoRecurse:
 163       return "Set rd=0";
 164     }
 165
 166     return "Unknown";
 167   }
 168
 169   virtual Action operator()(DNSQuestion*, string* ruleresult) const =0;
 170   virtual ~DNSAction()
 171   {
 172   }
 173   virtual string toString() const = 0;
 174   virtual std::map<string, double> getStats() const
 175   {
 176     return {{}};
 177   }
 178 };
 179
 180 class DNSResponseAction
 181 {
 182 public:
 183   enum class Action { Allow, Delay, Drop, HeaderModify, ServFail, None };
 184   virtual Action operator()(DNSResponse*, string* ruleresult) const =0;
 185   virtual ~DNSResponseAction()
 186   {
 187   }
 188   virtual string toString() const = 0;
 189 };
 190
 191 struct DynBlock
 192 {
 193   DynBlock(): action(DNSAction::Action::None), warning(false)
 194   {
 195   }
 196
 197   DynBlock(const std::string& reason_, const struct timespec& until_, const DNSName& domain_, DNSAction::Action action_): reason(reason_), until(until_), domain(domain_), action(action_), warning(false)
 198   {
 199   }
 200
 201   DynBlock(const DynBlock& rhs): reason(rhs.reason), until(rhs.until), domain(rhs.domain), action(rhs.action), warning(rhs.warning)
 202   {
 203     blocks.store(rhs.blocks);
 204   }
 205
 206   DynBlock& operator=(const DynBlock& rhs)
 207   {
 208     reason=rhs.reason;
 209     until=rhs.until;
 210     domain=rhs.domain;
 211     action=rhs.action;
 212     blocks.store(rhs.blocks);
 213     warning=rhs.warning;
 214     return *this;
 215   }
 216
 217   string reason;
 218   struct timespec until;
 219   DNSName domain;
 220   DNSAction::Action action;
 221   mutable std::atomic<unsigned int> blocks;
 222   bool warning;
 223 };
 224
 225 extern GlobalStateHolder<NetmaskTree<DynBlock>> g_dynblockNMG;
 226
 227 extern vector<pair<struct timeval, std::string> > g_confDelta;
 228
 229 extern uint64_t getLatencyCount(const std::string&);
 230
 231 struct DNSDistStats
 232 {
 233   using stat_t=std::atomic<uint64_t>; // aww yiss ;-)
 234   stat_t responses{0};
 235   stat_t servfailResponses{0};
 236   stat_t queries{0};
 237   stat_t frontendNXDomain{0};
 238   stat_t frontendServFail{0};
 239   stat_t frontendNoError{0};
 240   stat_t nonCompliantQueries{0};
 241   stat_t nonCompliantResponses{0};
 242   stat_t rdQueries{0};
 243   stat_t emptyQueries{0};
 244   stat_t aclDrops{0};
 245   stat_t dynBlocked{0};
 246   stat_t ruleDrop{0};
 247   stat_t ruleNXDomain{0};
 248   stat_t ruleRefused{0};
 249   stat_t ruleServFail{0};
 250   stat_t selfAnswered{0};
 251   stat_t downstreamTimeouts{0};
 252   stat_t downstreamSendErrors{0};
 253   stat_t truncFail{0};
 254   stat_t noPolicy{0};
 255   stat_t cacheHits{0};
 256   stat_t cacheMisses{0};
 257   stat_t latency0_1{0}, latency1_10{0}, latency10_50{0}, latency50_100{0}, latency100_1000{0}, latencySlow{0}, latencySum{0};
 258   stat_t securityStatus{0};
 259
 260   double latencyAvg100{0}, latencyAvg1000{0}, latencyAvg10000{0}, latencyAvg1000000{0};
 261   typedef std::function<uint64_t(const std::string&)> statfunction_t;
 262   typedef boost::variant<stat_t*, double*, statfunction_t> entry_t;
 263   std::vector<std::pair<std::string, entry_t>> entries{
 264     {"responses", &responses},
 265     {"servfail-responses", &servfailResponses},
 266     {"queries", &queries},
 267     {"frontend-nxdomain", &frontendNXDomain},
 268     {"frontend-servfail", &frontendServFail},
 269     {"frontend-noerror", &frontendNoError},
 270     {"acl-drops", &aclDrops},
 271     {"rule-drop", &ruleDrop},
 272     {"rule-nxdomain", &ruleNXDomain},
 273     {"rule-refused", &ruleRefused},
 274     {"rule-servfail", &ruleServFail},
 275     {"self-answered", &selfAnswered},
 276     {"downstream-timeouts", &downstreamTimeouts},
 277     {"downstream-send-errors", &downstreamSendErrors},
 278     {"trunc-failures", &truncFail},
 279     {"no-policy", &noPolicy},
 280     {"latency0-1", &latency0_1},
 281     {"latency1-10", &latency1_10},
 282     {"latency10-50", &latency10_50},
 283     {"latency50-100", &latency50_100},
 284     {"latency100-1000", &latency100_1000},
 285     {"latency-slow", &latencySlow},
 286     {"latency-avg100", &latencyAvg100},
 287     {"latency-avg1000", &latencyAvg1000},
 288     {"latency-avg10000", &latencyAvg10000},
 289     {"latency-avg1000000", &latencyAvg1000000},
 290     {"uptime", uptimeOfProcess},
 291     {"real-memory-usage", getRealMemoryUsage},
 292     {"special-memory-usage", getSpecialMemoryUsage},
 293     {"noncompliant-queries", &nonCompliantQueries},
 294     {"noncompliant-responses", &nonCompliantResponses},
 295     {"rdqueries", &rdQueries},
 296     {"empty-queries", &emptyQueries},
 297     {"cache-hits", &cacheHits},
 298     {"cache-misses", &cacheMisses},
 299     {"cpu-user-msec", getCPUTimeUser},
 300     {"cpu-sys-msec", getCPUTimeSystem},
 301     {"fd-usage", getOpenFileDescriptors},
 302     {"dyn-blocked", &dynBlocked},
 303     {"dyn-block-nmg-size", [](const std::string&) { return g_dynblockNMG.getLocal()->size(); }},
 304     {"security-status", &securityStatus},
 305     // Latency histogram
 306     {"latency-sum", &latencySum},
 307     {"latency-count", getLatencyCount},
 308   };
 309 };
 310
 311 // Metric types for Prometheus
 312 enum class PrometheusMetricType: int {
 313     counter = 1,
 314     gauge = 2
 315 };
 316
 317 // Keeps additional information about metrics
 318 struct MetricDefinition {
 319   MetricDefinition(PrometheusMetricType _prometheusType, const std::string& _description): description(_description), prometheusType(_prometheusType) {
 320   }
 321
 322   MetricDefinition() = default;
 323
 324   // Metric description
 325   std::string description;
 326   // Metric type for Prometheus
 327   PrometheusMetricType prometheusType;
 328 };
 329
 330 struct MetricDefinitionStorage {
 331   // Return metric definition by name
 332   bool getMetricDetails(std::string metricName, MetricDefinition& metric) {
 333   auto metricDetailsIter = metrics.find(metricName);
 334
 335   if (metricDetailsIter == metrics.end()) {
 336     return false;
 337   }
 338
 339   metric = metricDetailsIter->second;
 340     return true;
 341   };
 342
 343   // Return string representation of Prometheus metric type
 344   std::string getPrometheusStringMetricType(PrometheusMetricType metricType) {
 345     switch (metricType) {
 346       case PrometheusMetricType::counter:
 347         return "counter";
 348         break;
 349       case PrometheusMetricType::gauge:
 350         return "gauge";
 351         break;
 352       default:
 353         return "";
 354         break;
 355     }
 356   };
 357
 358   std::map<std::string, MetricDefinition> metrics = {
 359     { "responses",              MetricDefinition(PrometheusMetricType::counter, "Number of responses received from backends") },
 360     { "servfail-responses",     MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received from backends") },
 361     { "queries",                MetricDefinition(PrometheusMetricType::counter, "Number of received queries")},
 362     { "frontend-nxdomain",      MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers sent to clients")},
 363     { "frontend-servfail",      MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers sent to clients")},
 364     { "frontend-noerror",       MetricDefinition(PrometheusMetricType::counter, "Number of NoError answers sent to clients")},
 365     { "acl-drops",              MetricDefinition(PrometheusMetricType::counter, "Number of packets dropped because of the ACL")},
 366     { "rule-drop",              MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a rule")},
 367     { "rule-nxdomain",          MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers returned because of a rule")},
 368     { "rule-refused",           MetricDefinition(PrometheusMetricType::counter, "Number of Refused answers returned because of a rule")},
 369     { "rule-servfail",          MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received because of a rule")},
 370     { "self-answered",          MetricDefinition(PrometheusMetricType::counter, "Number of self-answered responses")},
 371     { "downstream-timeouts",    MetricDefinition(PrometheusMetricType::counter, "Number of queries not answered in time by a backend")},
 372     { "downstream-send-errors", MetricDefinition(PrometheusMetricType::counter, "Number of errors when sending a query to a backend")},
 373     { "trunc-failures",         MetricDefinition(PrometheusMetricType::counter, "Number of errors encountered while truncating an answer")},
 374     { "no-policy",              MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because no server was available")},
 375     { "latency0-1",             MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in less than 1ms")},
 376     { "latency1-10",            MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 1-10 ms")},
 377     { "latency10-50",           MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 10-50 ms")},
 378     { "latency50-100",          MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 50-100 ms")},
 379     { "latency100-1000",        MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 100-1000 ms")},
 380     { "latency-slow",           MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in more than 1 second")},
 381     { "latency-avg100",         MetricDefinition(PrometheusMetricType::gauge,   "Average response latency in microseconds of the last 100 packets")},
 382     { "latency-avg1000",        MetricDefinition(PrometheusMetricType::gauge,   "Average response latency in microseconds of the last 1000 packets")},
 383     { "latency-avg10000",       MetricDefinition(PrometheusMetricType::gauge,   "Average response latency in microseconds of the last 10000 packets")},
 384     { "latency-avg1000000",     MetricDefinition(PrometheusMetricType::gauge,   "Average response latency in microseconds of the last 1000000 packets")},
 385     { "uptime",                 MetricDefinition(PrometheusMetricType::gauge,   "Uptime of the dnsdist process in seconds")},
 386     { "real-memory-usage",      MetricDefinition(PrometheusMetricType::gauge,   "Current memory usage in bytes")},
 387     { "noncompliant-queries",   MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped as non-compliant")},
 388     { "noncompliant-responses", MetricDefinition(PrometheusMetricType::counter, "Number of answers from a backend dropped as non-compliant")},
 389     { "rdqueries",              MetricDefinition(PrometheusMetricType::counter, "Number of received queries with the recursion desired bit set")},
 390     { "empty-queries",          MetricDefinition(PrometheusMetricType::counter, "Number of empty queries received from clients")},
 391     { "cache-hits",             MetricDefinition(PrometheusMetricType::counter, "Number of times an answer was retrieved from cache")},
 392     { "cache-misses",           MetricDefinition(PrometheusMetricType::counter, "Number of times an answer not found in the cache")},
 393     { "cpu-user-msec",          MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the user state")},
 394     { "cpu-sys-msec",           MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the system state")},
 395     { "fd-usage",               MetricDefinition(PrometheusMetricType::gauge,   "Number of currently used file descriptors")},
 396     { "dyn-blocked",            MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a dynamic block")},
 397     { "dyn-block-nmg-size",     MetricDefinition(PrometheusMetricType::gauge,   "Number of dynamic blocks entries") },
 398     { "security-status",        MetricDefinition(PrometheusMetricType::gauge,   "Security status of this software. 0=unknown, 1=OK, 2=upgrade recommended, 3=upgrade mandatory") },
 399   };
 400 };
 401
 402 extern MetricDefinitionStorage g_metricDefinitions;
 403 extern struct DNSDistStats g_stats;
 404 void doLatencyStats(double udiff);
 405
 406
 407 struct StopWatch
 408 {
 409   StopWatch(bool realTime=false): d_needRealTime(realTime)
 410   {
 411   }
 412   struct timespec d_start{0,0};
 413   bool d_needRealTime{false};
 414
 415   void start() {
 416     if(gettime(&d_start, d_needRealTime) < 0)
 417       unixDie("Getting timestamp");
 418
 419   }
 420
 421   void set(const struct timespec& from) {
 422     d_start = from;
 423   }
 424
 425   double udiff() const {
 426     struct timespec now;
 427     if(gettime(&now, d_needRealTime) < 0)
 428       unixDie("Getting timestamp");
 429
 430     return 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
 431   }
 432
 433   double udiffAndSet() {
 434     struct timespec now;
 435     if(gettime(&now, d_needRealTime) < 0)
 436       unixDie("Getting timestamp");
 437
 438     auto ret= 1000000.0*(now.tv_sec - d_start.tv_sec) + (now.tv_nsec - d_start.tv_nsec)/1000.0;
 439     d_start = now;
 440     return ret;
 441   }
 442
 443 };
 444
 445 class BasicQPSLimiter
 446 {
 447 public:
 448   BasicQPSLimiter()
 449   {
 450   }
 451
 452   BasicQPSLimiter(unsigned int burst): d_tokens(burst)
 453   {
 454     d_prev.start();
 455   }
 456
 457   bool check(unsigned int rate, unsigned int burst) const // this is not quite fair
 458   {
 459     auto delta = d_prev.udiffAndSet();
 460
 461     if(delta > 0.0) // time, frequently, does go backwards..
 462       d_tokens += 1.0 * rate * (delta/1000000.0);
 463
 464     if(d_tokens > burst) {
 465       d_tokens = burst;
 466     }
 467
 468     bool ret=false;
 469     if(d_tokens >= 1.0) { // we need this because burst=1 is weird otherwise
 470       ret=true;
 471       --d_tokens;
 472     }
 473
 474     return ret;
 475   }
 476
 477   bool seenSince(const struct timespec& cutOff) const
 478   {
 479     return cutOff < d_prev.d_start;
 480   }
 481
 482 protected:
 483   mutable StopWatch d_prev;
 484   mutable double d_tokens;
 485 };
 486
 487 class QPSLimiter : public BasicQPSLimiter
 488 {
 489 public:
 490   QPSLimiter(): BasicQPSLimiter()
 491   {
 492   }
 493
 494   QPSLimiter(unsigned int rate, unsigned int burst): BasicQPSLimiter(burst), d_rate(rate), d_burst(burst), d_passthrough(false)
 495   {
 496     d_prev.start();
 497   }
 498
 499   unsigned int getRate() const
 500   {
 501     return d_passthrough ? 0 : d_rate;
 502   }
 503
 504   int getPassed() const
 505   {
 506     return d_passed;
 507   }
 508
 509   int getBlocked() const
 510   {
 511     return d_blocked;
 512   }
 513
 514   bool check() const // this is not quite fair
 515   {
 516     if (d_passthrough) {
 517       return true;
 518     }
 519
 520     bool ret = BasicQPSLimiter::check(d_rate, d_burst);
 521     if (ret) {
 522       d_passed++;
 523     }
 524     else {
 525       d_blocked++;
 526     }
 527
 528     return ret;
 529   }
 530 private:
 531   mutable unsigned int d_passed{0};
 532   mutable unsigned int d_blocked{0};
 533   unsigned int d_rate;
 534   unsigned int d_burst;
 535   bool d_passthrough{true};
 536 };
 537
 538 struct ClientState;
 539
 540 struct IDState
 541 {
 542   IDState(): sentTime(true), delayMsec(0), tempFailureTTL(boost::none) { origDest.sin4.sin_family = 0;}
 543   IDState(const IDState& orig): origRemote(orig.origRemote), origDest(orig.origDest), age(orig.age)
 544   {
 545     usageIndicator.store(orig.usageIndicator.load());
 546     origFD = orig.origFD;
 547     origID = orig.origID;
 548     delayMsec = orig.delayMsec;
 549     tempFailureTTL = orig.tempFailureTTL;
 550   }
 551
 552   static const int64_t unusedIndicator = -1;
 553
 554   static bool isInUse(int64_t usageIndicator)
 555   {
 556     return usageIndicator != unusedIndicator;
 557   }
 558
 559   bool isInUse() const
 560   {
 561     return usageIndicator != unusedIndicator;
 562   }
 563
 564   /* return true if the value has been successfully replaced meaning that
 565      no-one updated the usage indicator in the meantime */
 566   bool tryMarkUnused(int64_t expectedUsageIndicator)
 567   {
 568     return usageIndicator.compare_exchange_strong(expectedUsageIndicator, unusedIndicator);
 569   }
 570
 571   /* mark as unused no matter what, return true if the state was in use before */
 572   bool markAsUsed()
 573   {
 574     auto currentGeneration = generation++;
 575     return markAsUsed(currentGeneration);
 576   }
 577
 578   /* mark as unused no matter what, return true if the state was in use before */
 579   bool markAsUsed(int64_t currentGeneration)
 580   {
 581     int64_t oldUsage = usageIndicator.exchange(currentGeneration);
 582     return oldUsage != unusedIndicator;
 583   }
 584
 585   /* We use this value to detect whether this state is in use.
 586      For performance reasons we don't want to use a lock here, but that means
 587      we need to be very careful when modifying this value. Modifications happen
 588      from:
 589      - one of the UDP or DoH 'client' threads receiving a query, selecting a backend
 590        then picking one of the states associated to this backend (via the idOffset).
 591        Most of the time this state should not be in use and usageIndicator is -1, but we
 592        might not yet have received a response for the query previously associated to this
 593        state, meaning that we will 'reuse' this state and erase the existing state.
 594        If we ever receive a response for this state, it will be discarded. This is
 595        mostly fine for UDP except that we still need to be careful in order to miss
 596        the 'outstanding' counters, which should only be increased when we are picking
 597        an empty state, and not when reusing ;
 598        For DoH, though, we have dynamically allocated a DOHUnit object that needs to
 599        be freed, as well as internal objects internals to libh2o.
 600      - one of the UDP receiver threads receiving a response from a backend, picking
 601        the corresponding state and sending the response to the client ;
 602      - the 'healthcheck' thread scanning the states to actively discover timeouts,
 603        mostly to keep some counters like the 'outstanding' one sane.
 604      We previously based that logic on the origFD (FD on which the query was received,
 605      and therefore from where the response should be sent) but this suffered from an
 606      ABA problem since it was quite likely that a UDP 'client thread' would reset it to the
 607      same value since we only have so much incoming sockets:
 608      - 1/ 'client' thread gets a query and set origFD to its FD, say 5 ;
 609      - 2/ 'receiver' thread gets a response, read the value of origFD to 5, check that the qname,
 610        qtype and qclass match
 611      - 3/ during that time the 'client' thread reuses the state, setting again origFD to 5 ;
 612      - 4/ the 'receiver' thread uses compare_exchange_strong() to only replace the value if it's still
 613        5, except it's not the same 5 anymore and it overrides a fresh state.
 614      We now use a 32-bit unsigned counter instead, which is incremented every time the state is set,
 615      wrapping around if necessary, and we set an atomic signed 64-bit value, so that we still have -1
 616      when the state is unused and the value of our counter otherwise.
 617   */
 618   std::atomic<int64_t> usageIndicator{unusedIndicator};  // set to unusedIndicator to indicate this state is empty   // 8
 619   std::atomic<uint32_t> generation{0}; // increased every time a state is used, to be able to detect an ABA issue    // 4
 620   ComboAddress origRemote;                                    // 28
 621   ComboAddress origDest;                                      // 28
 622   StopWatch sentTime;                                         // 16
 623   DNSName qname;                                              // 80
 624   std::shared_ptr<DNSCryptQuery> dnsCryptQuery{nullptr};
 625 #ifdef HAVE_PROTOBUF
 626   boost::optional<boost::uuids::uuid> uniqueId;
 627 #endif
 628   boost::optional<Netmask> subnet{boost::none};
 629   std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
 630   std::shared_ptr<QTag> qTag{nullptr};
 631   const ClientState* cs{nullptr};
 632   DOHUnit* du{nullptr};
 633   uint32_t cacheKey;                                          // 4
 634   uint32_t cacheKeyNoECS;                                     // 4
 635   uint16_t age;                                               // 4
 636   uint16_t qtype;                                             // 2
 637   uint16_t qclass;                                            // 2
 638   uint16_t origID;                                            // 2
 639   uint16_t origFlags;                                         // 2
 640   int origFD{-1};
 641   int delayMsec;
 642   boost::optional<uint32_t> tempFailureTTL;
 643   bool ednsAdded{false};
 644   bool ecsAdded{false};
 645   bool skipCache{false};
 646   bool destHarvested{false}; // if true, origDest holds the original dest addr, otherwise the listening addr
 647   bool dnssecOK{false};
 648   bool useZeroScope;
 649 };
 650
 651 typedef std::unordered_map<string, unsigned int> QueryCountRecords;
 652 typedef std::function<std::tuple<bool, string>(const DNSQuestion* dq)> QueryCountFilter;
 653 struct QueryCount {
 654   QueryCount()
 655   {
 656     pthread_rwlock_init(&queryLock, nullptr);
 657   }
 658   ~QueryCount()
 659   {
 660     pthread_rwlock_destroy(&queryLock);
 661   }
 662   QueryCountRecords records;
 663   QueryCountFilter filter;
 664   pthread_rwlock_t queryLock;
 665   bool enabled{false};
 666 };
 667
 668 extern QueryCount g_qcount;
 669
 670 struct ClientState
 671 {
 672   ClientState(const ComboAddress& local_, bool isTCP_, bool doReusePort, int fastOpenQueue, const std::string& itfName, const std::set<int>& cpus_): cpus(cpus_), local(local_), interface(itfName), fastOpenQueueSize(fastOpenQueue), tcp(isTCP_), reuseport(doReusePort)
 673   {
 674   }
 675
 676   std::set<int> cpus;
 677   ComboAddress local;
 678   std::shared_ptr<DNSCryptContext> dnscryptCtx{nullptr};
 679   std::shared_ptr<TLSFrontend> tlsFrontend{nullptr};
 680   std::shared_ptr<DOHFrontend> dohFrontend{nullptr};
 681   std::string interface;
 682   std::atomic<uint64_t> queries{0};
 683   mutable std::atomic<uint64_t> responses{0};
 684   std::atomic<uint64_t> tcpDiedReadingQuery{0};
 685   std::atomic<uint64_t> tcpDiedSendingResponse{0};
 686   std::atomic<uint64_t> tcpGaveUp{0};
 687   std::atomic<uint64_t> tcpClientTimeouts{0};
 688   std::atomic<uint64_t> tcpDownstreamTimeouts{0};
 689   std::atomic<uint64_t> tcpCurrentConnections{0};
 690   std::atomic<uint64_t> tlsNewSessions{0}; // A new TLS session has been negotiated, no resumption
 691   std::atomic<uint64_t> tlsResumptions{0}; // A TLS session has been resumed, either via session id or via a TLS ticket
 692   std::atomic<uint64_t> tlsUnknownTicketKey{0}; // A TLS ticket has been presented but we don't have the associated key (might have expired)
 693   std::atomic<uint64_t> tlsInactiveTicketKey{0}; // A TLS ticket has been successfully resumed but the key is no longer active, we should issue a new one
 694   std::atomic<uint64_t> tls10queries{0};   // valid DNS queries received via TLSv1.0
 695   std::atomic<uint64_t> tls11queries{0};   // valid DNS queries received via TLSv1.1
 696   std::atomic<uint64_t> tls12queries{0};   // valid DNS queries received via TLSv1.2
 697   std::atomic<uint64_t> tls13queries{0};   // valid DNS queries received via TLSv1.3
 698   std::atomic<uint64_t> tlsUnknownqueries{0};   // valid DNS queries received via unknown TLS version
 699   std::atomic<double> tcpAvgQueriesPerConnection{0.0};
 700   /* in ms */
 701   std::atomic<double> tcpAvgConnectionDuration{0.0};
 702   int udpFD{-1};
 703   int tcpFD{-1};
 704   int fastOpenQueueSize{0};
 705   bool muted{false};
 706   bool tcp;
 707   bool reuseport;
 708   bool ready{false};
 709
 710   int getSocket() const
 711   {
 712     return udpFD != -1 ? udpFD : tcpFD;
 713   }
 714
 715   bool isUDP() const
 716   {
 717     return udpFD != -1;
 718   }
 719
 720   bool isTCP() const
 721   {
 722     return udpFD == -1;
 723   }
 724
 725   bool hasTLS() const
 726   {
 727     return tlsFrontend != nullptr || dohFrontend != nullptr;
 728   }
 729
 730   std::string getType() const
 731   {
 732     std::string result = udpFD != -1 ? "UDP" : "TCP";
 733
 734     if (dohFrontend) {
 735       result += " (DNS over HTTPS)";
 736     }
 737     else if (tlsFrontend) {
 738       result += " (DNS over TLS)";
 739     }
 740     else if (dnscryptCtx) {
 741       result += " (DNSCrypt)";
 742     }
 743
 744     return result;
 745   }
 746
 747 #ifdef HAVE_EBPF
 748   shared_ptr<BPFFilter> d_filter;
 749
 750   void detachFilter()
 751   {
 752     if (d_filter) {
 753       d_filter->removeSocket(getSocket());
 754       d_filter = nullptr;
 755     }
 756   }
 757
 758   void attachFilter(shared_ptr<BPFFilter> bpf)
 759   {
 760     detachFilter();
 761
 762     bpf->addSocket(getSocket());
 763     d_filter = bpf;
 764   }
 765 #endif /* HAVE_EBPF */
 766
 767   void updateTCPMetrics(size_t nbQueries, uint64_t durationMs)
 768   {
 769     tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0);
 770     tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
 771   }
 772 };
 773
 774 class TCPClientCollection {
 775   std::vector<int> d_tcpclientthreads;
 776   std::atomic<uint64_t> d_numthreads{0};
 777   std::atomic<uint64_t> d_pos{0};
 778   std::atomic<uint64_t> d_queued{0};
 779   const uint64_t d_maxthreads{0};
 780   std::mutex d_mutex;
 781   int d_singlePipe[2];
 782   const bool d_useSinglePipe;
 783 public:
 784
 785   TCPClientCollection(size_t maxThreads, bool useSinglePipe=false): d_maxthreads(maxThreads), d_singlePipe{-1,-1}, d_useSinglePipe(useSinglePipe)
 786
 787   {
 788     d_tcpclientthreads.reserve(maxThreads);
 789
 790     if (d_useSinglePipe) {
 791       if (pipe(d_singlePipe) < 0) {
 792         int err = errno;
 793         throw std::runtime_error("Error creating the TCP single communication pipe: " + stringerror(err));
 794       }
 795
 796       if (!setNonBlocking(d_singlePipe[0])) {
 797         int err = errno;
 798         close(d_singlePipe[0]);
 799         close(d_singlePipe[1]);
 800         throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err));
 801       }
 802
 803       if (!setNonBlocking(d_singlePipe[1])) {
 804         int err = errno;
 805         close(d_singlePipe[0]);
 806         close(d_singlePipe[1]);
 807         throw std::runtime_error("Error setting the TCP single communication pipe non-blocking: " + stringerror(err));
 808       }
 809     }
 810   }
 811   int getThread()
 812   {
 813     uint64_t pos = d_pos++;
 814     ++d_queued;
 815     return d_tcpclientthreads[pos % d_numthreads];
 816   }
 817   bool hasReachedMaxThreads() const
 818   {
 819     return d_numthreads >= d_maxthreads;
 820   }
 821   uint64_t getThreadsCount() const
 822   {
 823     return d_numthreads;
 824   }
 825   uint64_t getQueuedCount() const
 826   {
 827     return d_queued;
 828   }
 829   void decrementQueuedCount()
 830   {
 831     --d_queued;
 832   }
 833   void addTCPClientThread();
 834 };
 835
 836 extern std::unique_ptr<TCPClientCollection> g_tcpclientthreads;
 837
 838 struct DownstreamState
 839 {
 840    typedef std::function<std::tuple<DNSName, uint16_t, uint16_t>(const DNSName&, uint16_t, uint16_t, dnsheader*)> checkfunc_t;
 841
 842   DownstreamState(const ComboAddress& remote_, const ComboAddress& sourceAddr_, unsigned int sourceItf, const std::string& sourceItfName, size_t numberOfSockets, bool connect);
 843   DownstreamState(const ComboAddress& remote_): DownstreamState(remote_, ComboAddress(), 0, std::string(), 1, true) {}
 844   ~DownstreamState()
 845   {
 846     for (auto& fd : sockets) {
 847       if (fd >= 0) {
 848         close(fd);
 849         fd = -1;
 850       }
 851     }
 852     pthread_rwlock_destroy(&d_lock);
 853   }
 854   boost::uuids::uuid id;
 855   std::set<unsigned int> hashes;
 856   mutable pthread_rwlock_t d_lock;
 857   std::vector<int> sockets;
 858   const std::string sourceItfName;
 859   std::mutex socketsLock;
 860   std::mutex connectLock;
 861   std::unique_ptr<FDMultiplexer> mplexer{nullptr};
 862   std::thread tid;
 863   const ComboAddress remote;
 864   QPSLimiter qps;
 865   vector<IDState> idStates;
 866   const ComboAddress sourceAddr;
 867   checkfunc_t checkFunction;
 868   DNSName checkName{"a.root-servers.net."};
 869   QType checkType{QType::A};
 870   uint16_t checkClass{QClass::IN};
 871   std::atomic<uint64_t> idOffset{0};
 872   std::atomic<uint64_t> sendErrors{0};
 873   std::atomic<uint64_t> outstanding{0};
 874   std::atomic<uint64_t> reuseds{0};
 875   std::atomic<uint64_t> queries{0};
 876   std::atomic<uint64_t> responses{0};
 877   struct {
 878     std::atomic<uint64_t> sendErrors{0};
 879     std::atomic<uint64_t> reuseds{0};
 880     std::atomic<uint64_t> queries{0};
 881   } prev;
 882   std::atomic<uint64_t> tcpDiedSendingQuery{0};
 883   std::atomic<uint64_t> tcpDiedReadingResponse{0};
 884   std::atomic<uint64_t> tcpGaveUp{0};
 885   std::atomic<uint64_t> tcpReadTimeouts{0};
 886   std::atomic<uint64_t> tcpWriteTimeouts{0};
 887   std::atomic<uint64_t> tcpCurrentConnections{0};
 888   std::atomic<double> tcpAvgQueriesPerConnection{0.0};
 889   /* in ms */
 890   std::atomic<double> tcpAvgConnectionDuration{0.0};
 891   string name;
 892   size_t socketsOffset{0};
 893   double queryLoad{0.0};
 894   double dropRate{0.0};
 895   double latencyUsec{0.0};
 896   int order{1};
 897   int weight{1};
 898   int tcpConnectTimeout{5};
 899   int tcpRecvTimeout{30};
 900   int tcpSendTimeout{30};
 901   unsigned int checkInterval{1};
 902   unsigned int lastCheck{0};
 903   const unsigned int sourceItf{0};
 904   uint16_t retries{5};
 905   uint16_t xpfRRCode{0};
 906   uint16_t checkTimeout{1000}; /* in milliseconds */
 907   uint8_t currentCheckFailures{0};
 908   uint8_t consecutiveSuccessfulChecks{0};
 909   uint8_t maxCheckFailures{1};
 910   uint8_t minRiseSuccesses{1};
 911   StopWatch sw;
 912   set<string> pools;
 913   enum class Availability { Up, Down, Auto} availability{Availability::Auto};
 914   bool mustResolve{false};
 915   bool upStatus{false};
 916   bool useECS{false};
 917   bool setCD{false};
 918   bool disableZeroScope{false};
 919   std::atomic<bool> connected{false};
 920   std::atomic_flag threadStarted;
 921   bool tcpFastOpen{false};
 922   bool ipBindAddrNoPort{true};
 923
 924   bool isUp() const
 925   {
 926     if(availability == Availability::Down)
 927       return false;
 928     if(availability == Availability::Up)
 929       return true;
 930     return upStatus;
 931   }
 932   void setUp() { availability = Availability::Up; }
 933   void setDown() { availability = Availability::Down; }
 934   void setAuto() { availability = Availability::Auto; }
 935   string getName() const {
 936     if (name.empty()) {
 937       return remote.toStringWithPort();
 938     }
 939     return name;
 940   }
 941   string getNameWithAddr() const {
 942     if (name.empty()) {
 943       return remote.toStringWithPort();
 944     }
 945     return name + " (" + remote.toStringWithPort()+ ")";
 946   }
 947   string getStatus() const
 948   {
 949     string status;
 950     if(availability == DownstreamState::Availability::Up)
 951       status = "UP";
 952     else if(availability == DownstreamState::Availability::Down)
 953       status = "DOWN";
 954     else
 955       status = (upStatus ? "up" : "down");
 956     return status;
 957   }
 958   bool reconnect();
 959   void hash();
 960   void setId(const boost::uuids::uuid& newId);
 961   void setWeight(int newWeight);
 962
 963   void updateTCPMetrics(size_t nbQueries, uint64_t durationMs)
 964   {
 965     tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0);
 966     tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
 967   }
 968 };
 969 using servers_t =vector<std::shared_ptr<DownstreamState>>;
 970
 971 template <class T> using NumberedVector = std::vector<std::pair<unsigned int, T> >;
 972
 973 void responderThread(std::shared_ptr<DownstreamState> state);
 974 extern std::mutex g_luamutex;
 975 extern LuaContext g_lua;
 976 extern std::string g_outputBuffer; // locking for this is ok, as locked by g_luamutex
 977
 978 class DNSRule
 979 {
 980 public:
 981   virtual ~DNSRule ()
 982   {
 983   }
 984   virtual bool matches(const DNSQuestion* dq) const =0;
 985   virtual string toString() const = 0;
 986   mutable std::atomic<uint64_t> d_matches{0};
 987 };
 988
 989 using NumberedServerVector = NumberedVector<shared_ptr<DownstreamState>>;
 990 typedef std::function<shared_ptr<DownstreamState>(const NumberedServerVector& servers, const DNSQuestion*)> policyfunc_t;
 991
 992 struct ServerPolicy
 993 {
 994   string name;
 995   policyfunc_t policy;
 996   bool isLua;
 997   std::string toString() const {
 998     return string("ServerPolicy") + (isLua ? " (Lua)" : "") + " \"" + name + "\"";
 999   }
1000 };
1001
1002 struct ServerPool
1003 {
1004   ServerPool()
1005   {
1006     pthread_rwlock_init(&d_lock, nullptr);
1007   }
1008   ~ServerPool()
1009   {
1010     pthread_rwlock_destroy(&d_lock);
1011   }
1012
1013   const std::shared_ptr<DNSDistPacketCache> getCache() const { return packetCache; };
1014
1015   bool getECS() const
1016   {
1017     return d_useECS;
1018   }
1019
1020   void setECS(bool useECS)
1021   {
1022     d_useECS = useECS;
1023   }
1024
1025   std::shared_ptr<DNSDistPacketCache> packetCache{nullptr};
1026   std::shared_ptr<ServerPolicy> policy{nullptr};
1027
1028   size_t countServers(bool upOnly)
1029   {
1030     size_t count = 0;
1031     ReadLock rl(&d_lock);
1032     for (const auto& server : d_servers) {
1033       if (!upOnly || std::get<1>(server)->isUp() ) {
1034         count++;
1035       }
1036     }
1037     return count;
1038   }
1039
1040   NumberedVector<shared_ptr<DownstreamState>> getServers()
1041   {
1042     NumberedVector<shared_ptr<DownstreamState>> result;
1043     {
1044       ReadLock rl(&d_lock);
1045       result = d_servers;
1046     }
1047     return result;
1048   }
1049
1050   void addServer(shared_ptr<DownstreamState>& server)
1051   {
1052     WriteLock wl(&d_lock);
1053     unsigned int count = (unsigned int) d_servers.size();
1054     d_servers.push_back(make_pair(++count, server));
1055     /* we need to reorder based on the server 'order' */
1056     std::stable_sort(d_servers.begin(), d_servers.end(), [](const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& a, const std::pair<unsigned int,std::shared_ptr<DownstreamState> >& b) {
1057       return a.second->order < b.second->order;
1058     });
1059     /* and now we need to renumber for Lua (custom policies) */
1060     size_t idx = 1;
1061     for (auto& serv : d_servers) {
1062       serv.first = idx++;
1063     }
1064   }
1065
1066   void removeServer(shared_ptr<DownstreamState>& server)
1067   {
1068     WriteLock wl(&d_lock);
1069     size_t idx = 1;
1070     bool found = false;
1071     for (auto it = d_servers.begin(); it != d_servers.end();) {
1072       if (found) {
1073         /* we need to renumber the servers placed
1074            after the removed one, for Lua (custom policies) */
1075         it->first = idx++;
1076         it++;
1077       }
1078       else if (it->second == server) {
1079         it = d_servers.erase(it);
1080         found = true;
1081       } else {
1082         idx++;
1083         it++;
1084       }
1085     }
1086   }
1087
1088 private:
1089   NumberedVector<shared_ptr<DownstreamState>> d_servers;
1090   pthread_rwlock_t d_lock;
1091   bool d_useECS{false};
1092 };
1093 using pools_t=map<std::string,std::shared_ptr<ServerPool>>;
1094 void setPoolPolicy(pools_t& pools, const string& poolName, std::shared_ptr<ServerPolicy> policy);
1095 void addServerToPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server);
1096 void removeServerFromPool(pools_t& pools, const string& poolName, std::shared_ptr<DownstreamState> server);
1097
1098 struct CarbonConfig
1099 {
1100   ComboAddress server;
1101   std::string namespace_name;
1102   std::string ourname;
1103   std::string instance_name;
1104   unsigned int interval;
1105 };
1106
1107 enum ednsHeaderFlags {
1108   EDNS_HEADER_FLAG_NONE = 0,
1109   EDNS_HEADER_FLAG_DO = 32768
1110 };
1111
1112 struct DNSDistRuleAction
1113 {
1114   std::shared_ptr<DNSRule> d_rule;
1115   std::shared_ptr<DNSAction> d_action;
1116   boost::uuids::uuid d_id;
1117   uint64_t d_creationOrder;
1118 };
1119
1120 struct DNSDistResponseRuleAction
1121 {
1122   std::shared_ptr<DNSRule> d_rule;
1123   std::shared_ptr<DNSResponseAction> d_action;
1124   boost::uuids::uuid d_id;
1125   uint64_t d_creationOrder;
1126 };
1127
1128 extern GlobalStateHolder<SuffixMatchTree<DynBlock>> g_dynblockSMT;
1129 extern DNSAction::Action g_dynBlockAction;
1130
1131 extern GlobalStateHolder<vector<CarbonConfig> > g_carbon;
1132 extern GlobalStateHolder<ServerPolicy> g_policy;
1133 extern GlobalStateHolder<servers_t> g_dstates;
1134 extern GlobalStateHolder<pools_t> g_pools;
1135 extern GlobalStateHolder<vector<DNSDistRuleAction> > g_rulactions;
1136 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_resprulactions;
1137 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_cachehitresprulactions;
1138 extern GlobalStateHolder<vector<DNSDistResponseRuleAction> > g_selfansweredresprulactions;
1139 extern GlobalStateHolder<NetmaskGroup> g_ACL;
1140
1141 extern ComboAddress g_serverControl; // not changed during runtime
1142
1143 extern std::vector<std::tuple<ComboAddress, bool, bool, int, std::string, std::set<int>>> g_locals; // not changed at runtime (we hope XXX)
1144 extern std::vector<shared_ptr<TLSFrontend>> g_tlslocals;
1145 extern std::vector<shared_ptr<DOHFrontend>> g_dohlocals;
1146 extern std::vector<std::unique_ptr<ClientState>> g_frontends;
1147 extern bool g_truncateTC;
1148 extern bool g_fixupCase;
1149 extern int g_tcpRecvTimeout;
1150 extern int g_tcpSendTimeout;
1151 extern int g_udpTimeout;
1152 extern uint16_t g_maxOutstanding;
1153 extern std::atomic<bool> g_configurationDone;
1154 extern uint64_t g_maxTCPClientThreads;
1155 extern uint64_t g_maxTCPQueuedConnections;
1156 extern size_t g_maxTCPQueriesPerConn;
1157 extern size_t g_maxTCPConnectionDuration;
1158 extern size_t g_maxTCPConnectionsPerClient;
1159 extern std::atomic<uint16_t> g_cacheCleaningDelay;
1160 extern std::atomic<uint16_t> g_cacheCleaningPercentage;
1161 extern uint32_t g_staleCacheEntriesTTL;
1162 extern bool g_apiReadWrite;
1163 extern std::string g_apiConfigDirectory;
1164 extern bool g_servFailOnNoPolicy;
1165 extern uint32_t g_hashperturb;
1166 extern bool g_useTCPSinglePipe;
1167 extern uint16_t g_downstreamTCPCleanupInterval;
1168 extern size_t g_udpVectorSize;
1169 extern bool g_preserveTrailingData;
1170 extern bool g_allowEmptyResponse;
1171 extern bool g_roundrobinFailOnNoServer;
1172 extern double g_consistentHashBalancingFactor;
1173
1174 #ifdef HAVE_EBPF
1175 extern shared_ptr<BPFFilter> g_defaultBPFFilter;
1176 extern std::vector<std::shared_ptr<DynBPFFilter> > g_dynBPFFilters;
1177 #endif /* HAVE_EBPF */
1178
1179 struct LocalHolders
1180 {
1181   LocalHolders(): acl(g_ACL.getLocal()), policy(g_policy.getLocal()), rulactions(g_rulactions.getLocal()), cacheHitRespRulactions(g_cachehitresprulactions.getLocal()), selfAnsweredRespRulactions(g_selfansweredresprulactions.getLocal()), servers(g_dstates.getLocal()), dynNMGBlock(g_dynblockNMG.getLocal()), dynSMTBlock(g_dynblockSMT.getLocal()), pools(g_pools.getLocal())
1182   {
1183   }
1184
1185   LocalStateHolder<NetmaskGroup> acl;
1186   LocalStateHolder<ServerPolicy> policy;
1187   LocalStateHolder<vector<DNSDistRuleAction> > rulactions;
1188   LocalStateHolder<vector<DNSDistResponseRuleAction> > cacheHitRespRulactions;
1189   LocalStateHolder<vector<DNSDistResponseRuleAction> > selfAnsweredRespRulactions;
1190   LocalStateHolder<servers_t> servers;
1191   LocalStateHolder<NetmaskTree<DynBlock> > dynNMGBlock;
1192   LocalStateHolder<SuffixMatchTree<DynBlock> > dynSMTBlock;
1193   LocalStateHolder<pools_t> pools;
1194 };
1195
1196 struct dnsheader;
1197
1198 void controlThread(int fd, ComboAddress local);
1199 std::shared_ptr<ServerPool> getPool(const pools_t& pools, const std::string& poolName);
1200 std::shared_ptr<ServerPool> createPoolIfNotExists(pools_t& pools, const string& poolName);
1201 NumberedServerVector getDownstreamCandidates(const pools_t& pools, const std::string& poolName);
1202
1203 std::shared_ptr<DownstreamState> firstAvailable(const NumberedServerVector& servers, const DNSQuestion* dq);
1204
1205 std::shared_ptr<DownstreamState> leastOutstanding(const NumberedServerVector& servers, const DNSQuestion* dq);
1206 std::shared_ptr<DownstreamState> wrandom(const NumberedServerVector& servers, const DNSQuestion* dq);
1207 std::shared_ptr<DownstreamState> whashed(const NumberedServerVector& servers, const DNSQuestion* dq);
1208 std::shared_ptr<DownstreamState> chashed(const NumberedServerVector& servers, const DNSQuestion* dq);
1209 std::shared_ptr<DownstreamState> roundrobin(const NumberedServerVector& servers, const DNSQuestion* dq);
1210
1211 struct WebserverConfig
1212 {
1213   std::string password;
1214   std::string apiKey;
1215   boost::optional<std::map<std::string, std::string> > customHeaders;
1216   std::mutex lock;
1217 };
1218
1219 void setWebserverAPIKey(const boost::optional<std::string> apiKey);
1220 void setWebserverPassword(const std::string& password);
1221 void setWebserverCustomHeaders(const boost::optional<std::map<std::string, std::string> > customHeaders);
1222
1223 void dnsdistWebserverThread(int sock, const ComboAddress& local);
1224 void tcpAcceptorThread(void* p);
1225 #ifdef HAVE_DNS_OVER_HTTPS
1226 void dohThread(ClientState* cs);
1227 #endif /* HAVE_DNS_OVER_HTTPS */
1228
1229 void setLuaNoSideEffect(); // if nothing has been declared, set that there are no side effects
1230 void setLuaSideEffect();   // set to report a side effect, cancelling all _no_ side effect calls
1231 bool getLuaNoSideEffect(); // set if there were only explicit declarations of _no_ side effect
1232 void resetLuaSideEffect(); // reset to indeterminate state
1233
1234 bool responseContentMatches(const char* response, const uint16_t responseLen, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const ComboAddress& remote, unsigned int& consumed);
1235 bool processResponse(char** response, uint16_t* responseLen, size_t* responseSize, LocalStateHolder<vector<DNSDistResponseRuleAction> >& localRespRulactions, DNSResponse& dr, size_t addRoom, std::vector<uint8_t>& rewrittenResponse, bool muted);
1236 bool processRulesResult(const DNSAction::Action& action, DNSQuestion& dq, std::string& ruleresult, bool& drop);
1237
1238 bool checkQueryHeaders(const struct dnsheader* dh);
1239
1240 extern std::vector<std::shared_ptr<DNSCryptContext>> g_dnsCryptLocals;
1241 int handleDNSCryptQuery(char* packet, uint16_t len, std::shared_ptr<DNSCryptQuery> query, uint16_t* decryptedQueryLen, bool tcp, time_t now, std::vector<uint8_t>& response);
1242 boost::optional<std::vector<uint8_t>> checkDNSCryptQuery(const ClientState& cs, const char* query, uint16_t& len, std::shared_ptr<DNSCryptQuery>& dnsCryptQuery, time_t now, bool tcp);
1243
1244 bool addXPF(DNSQuestion& dq, uint16_t optionCode);
1245
1246 uint16_t getRandomDNSID();
1247
1248 #include "dnsdist-snmp.hh"
1249
1250 extern bool g_snmpEnabled;
1251 extern bool g_snmpTrapsEnabled;
1252 extern DNSDistSNMPAgent* g_snmpAgent;
1253 extern bool g_addEDNSToSelfGeneratedResponses;
1254
1255 extern std::set<std::string> g_capabilitiesToRetain;
1256 static const uint16_t s_udpIncomingBufferSize{1500}; // don't accept UDP queries larger than this value
1257 static const size_t s_maxPacketCacheEntrySize{4096}; // don't cache responses larger than this value
1258
1259 enum class ProcessQueryResult { Drop, SendAnswer, PassToBackend };
1260 ProcessQueryResult processQuery(DNSQuestion& dq, ClientState& cs, LocalHolders& holders, std::shared_ptr<DownstreamState>& selectedBackend);
1261
1262 DNSResponse makeDNSResponseFromIDState(IDState& ids, struct dnsheader* dh, size_t bufferSize, uint16_t responseLen, bool isTCP);
1263 void setIDStateFromDNSQuestion(IDState& ids, DNSQuestion& dq, DNSName&& qname);
1264
1265 int pickBackendSocketForSending(std::shared_ptr<DownstreamState>& state);
1266 ssize_t udpClientSendRequestToBackend(const std::shared_ptr<DownstreamState>& ss, const int sd, const char* request, const size_t requestLen, bool healthCheck=false);