*/
#include "dnsdist.hh"
#include "dnsdist-healthchecks.hh"
+#include "dnsdist-prometheus.hh"
#include "sstuff.hh"
#include "ext/json11/json11.hpp"
bool g_apiReadWrite{false};
WebserverConfig g_webserverConfig;
std::string g_apiConfigDirectory;
+static const MetricDefinitionStorage s_metricDefinitions;
+
+const std::map<std::string, MetricDefinition> MetricDefinitionStorage::metrics{
+ { "responses", MetricDefinition(PrometheusMetricType::counter, "Number of responses received from backends") },
+ { "servfail-responses", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received from backends") },
+ { "queries", MetricDefinition(PrometheusMetricType::counter, "Number of received queries")},
+ { "frontend-nxdomain", MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers sent to clients")},
+ { "frontend-servfail", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers sent to clients")},
+ { "frontend-noerror", MetricDefinition(PrometheusMetricType::counter, "Number of NoError answers sent to clients")},
+ { "acl-drops", MetricDefinition(PrometheusMetricType::counter, "Number of packets dropped because of the ACL")},
+ { "rule-drop", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a rule")},
+ { "rule-nxdomain", MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers returned because of a rule")},
+ { "rule-refused", MetricDefinition(PrometheusMetricType::counter, "Number of Refused answers returned because of a rule")},
+ { "rule-servfail", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received because of a rule")},
+ { "self-answered", MetricDefinition(PrometheusMetricType::counter, "Number of self-answered responses")},
+ { "downstream-timeouts", MetricDefinition(PrometheusMetricType::counter, "Number of queries not answered in time by a backend")},
+ { "downstream-send-errors", MetricDefinition(PrometheusMetricType::counter, "Number of errors when sending a query to a backend")},
+ { "trunc-failures", MetricDefinition(PrometheusMetricType::counter, "Number of errors encountered while truncating an answer")},
+ { "no-policy", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because no server was available")},
+ { "latency0-1", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in less than 1ms")},
+ { "latency1-10", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 1-10 ms")},
+ { "latency10-50", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 10-50 ms")},
+ { "latency50-100", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 50-100 ms")},
+ { "latency100-1000", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 100-1000 ms")},
+ { "latency-slow", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in more than 1 second")},
+ { "latency-avg100", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 100 packets")},
+ { "latency-avg1000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 1000 packets")},
+ { "latency-avg10000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 10000 packets")},
+ { "latency-avg1000000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 1000000 packets")},
+ { "uptime", MetricDefinition(PrometheusMetricType::gauge, "Uptime of the dnsdist process in seconds")},
+ { "real-memory-usage", MetricDefinition(PrometheusMetricType::gauge, "Current memory usage in bytes")},
+ { "noncompliant-queries", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped as non-compliant")},
+ { "noncompliant-responses", MetricDefinition(PrometheusMetricType::counter, "Number of answers from a backend dropped as non-compliant")},
+ { "rdqueries", MetricDefinition(PrometheusMetricType::counter, "Number of received queries with the recursion desired bit set")},
+ { "empty-queries", MetricDefinition(PrometheusMetricType::counter, "Number of empty queries received from clients")},
+ { "cache-hits", MetricDefinition(PrometheusMetricType::counter, "Number of times an answer was retrieved from cache")},
+ { "cache-misses", MetricDefinition(PrometheusMetricType::counter, "Number of times an answer not found in the cache")},
+ { "cpu-iowait", MetricDefinition(PrometheusMetricType::counter, "Time waiting for I/O to complete by the whole system")},
+ { "cpu-user-msec", MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the user state")},
+ { "cpu-steal", MetricDefinition(PrometheusMetricType::counter, "Stolen time, which is the time spent by the whole system in other operating systems when running in a virtualized environment")},
+ { "cpu-sys-msec", MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the system state")},
+ { "fd-usage", MetricDefinition(PrometheusMetricType::gauge, "Number of currently used file descriptors")},
+ { "dyn-blocked", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a dynamic block")},
+ { "dyn-block-nmg-size", MetricDefinition(PrometheusMetricType::gauge, "Number of dynamic blocks entries") },
+ { "security-status", MetricDefinition(PrometheusMetricType::gauge, "Security status of this software. 0=unknown, 1=OK, 2=upgrade recommended, 3=upgrade mandatory") },
+ { "udp-in-errors", MetricDefinition(PrometheusMetricType::counter, "From /proc/net/snmp InErrors") },
+ { "udp-noport-errors", MetricDefinition(PrometheusMetricType::counter, "From /proc/net/snmp NoPorts") },
+ { "udp-recvbuf-errors", MetricDefinition(PrometheusMetricType::counter, "From /proc/net/snmp RcvbufErrors") },
+ { "udp-sndbuf-errors", MetricDefinition(PrometheusMetricType::counter, "From /proc/net/snmp SndbufErrors") },
+};
static bool apiWriteConfigFile(const string& filebasename, const string& content)
{
}
MetricDefinition metricDetails;
- if (!g_metricDefinitions.getMetricDetails(metricName, metricDetails)) {
+ if (!s_metricDefinitions.getMetricDetails(metricName, metricDetails)) {
vinfolog("Do not have metric details for %s", metricName);
continue;
}
- std::string prometheusTypeName = g_metricDefinitions.getPrometheusStringMetricType(metricDetails.prometheusType);
+ std::string prometheusTypeName = s_metricDefinitions.getPrometheusStringMetricType(metricDetails.prometheusType);
if (prometheusTypeName == "") {
vinfolog("Unknown Prometheus type for %s", metricName);
bool g_verbose;
struct DNSDistStats g_stats;
-MetricDefinitionStorage g_metricDefinitions;
uint16_t g_maxOutstanding{std::numeric_limits<uint16_t>::max()};
uint32_t g_staleCacheEntriesTTL{0};
{"uptime", uptimeOfProcess},
{"real-memory-usage", getRealMemoryUsage},
{"special-memory-usage", getSpecialMemoryUsage},
+ {"udp-in-errors", boost::bind(udpErrorStats, "udp-in-errors")},
+ {"udp-noport-errors", boost::bind(udpErrorStats, "udp-noport-errors")},
+ {"udp-recvbuf-errors", boost::bind(udpErrorStats, "udp-recvbuf-errors")},
+ {"udp-sndbuf-errors", boost::bind(udpErrorStats, "udp-sndbuf-errors")},
{"noncompliant-queries", &nonCompliantQueries},
{"noncompliant-responses", &nonCompliantResponses},
{"rdqueries", &rdQueries},
{"empty-queries", &emptyQueries},
{"cache-hits", &cacheHits},
{"cache-misses", &cacheMisses},
- {"cpu-user-msec", getCPUTimeUser},
+ {"cpu-iowait", getCPUIOWait},
+ {"cpu-steal", getCPUSteal},
{"cpu-sys-msec", getCPUTimeSystem},
+ {"cpu-user-msec", getCPUTimeUser},
{"fd-usage", getOpenFileDescriptors},
{"dyn-blocked", &dynBlocked},
{"dyn-block-nmg-size", [](const std::string&) { return g_dynblockNMG.getLocal()->size(); }},
};
};
-// Metric types for Prometheus
-enum class PrometheusMetricType: int {
- counter = 1,
- gauge = 2
-};
-
-// Keeps additional information about metrics
-struct MetricDefinition {
- MetricDefinition(PrometheusMetricType _prometheusType, const std::string& _description): description(_description), prometheusType(_prometheusType) {
- }
-
- MetricDefinition() = default;
-
- // Metric description
- std::string description;
- // Metric type for Prometheus
- PrometheusMetricType prometheusType;
-};
-
-struct MetricDefinitionStorage {
- // Return metric definition by name
- bool getMetricDetails(std::string metricName, MetricDefinition& metric) {
- auto metricDetailsIter = metrics.find(metricName);
-
- if (metricDetailsIter == metrics.end()) {
- return false;
- }
-
- metric = metricDetailsIter->second;
- return true;
- };
-
- // Return string representation of Prometheus metric type
- std::string getPrometheusStringMetricType(PrometheusMetricType metricType) {
- switch (metricType) {
- case PrometheusMetricType::counter:
- return "counter";
- break;
- case PrometheusMetricType::gauge:
- return "gauge";
- break;
- default:
- return "";
- break;
- }
- };
-
- std::map<std::string, MetricDefinition> metrics = {
- { "responses", MetricDefinition(PrometheusMetricType::counter, "Number of responses received from backends") },
- { "servfail-responses", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received from backends") },
- { "queries", MetricDefinition(PrometheusMetricType::counter, "Number of received queries")},
- { "frontend-nxdomain", MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers sent to clients")},
- { "frontend-servfail", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers sent to clients")},
- { "frontend-noerror", MetricDefinition(PrometheusMetricType::counter, "Number of NoError answers sent to clients")},
- { "acl-drops", MetricDefinition(PrometheusMetricType::counter, "Number of packets dropped because of the ACL")},
- { "rule-drop", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a rule")},
- { "rule-nxdomain", MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers returned because of a rule")},
- { "rule-refused", MetricDefinition(PrometheusMetricType::counter, "Number of Refused answers returned because of a rule")},
- { "rule-servfail", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received because of a rule")},
- { "self-answered", MetricDefinition(PrometheusMetricType::counter, "Number of self-answered responses")},
- { "downstream-timeouts", MetricDefinition(PrometheusMetricType::counter, "Number of queries not answered in time by a backend")},
- { "downstream-send-errors", MetricDefinition(PrometheusMetricType::counter, "Number of errors when sending a query to a backend")},
- { "trunc-failures", MetricDefinition(PrometheusMetricType::counter, "Number of errors encountered while truncating an answer")},
- { "no-policy", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because no server was available")},
- { "latency0-1", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in less than 1ms")},
- { "latency1-10", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 1-10 ms")},
- { "latency10-50", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 10-50 ms")},
- { "latency50-100", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 50-100 ms")},
- { "latency100-1000", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 100-1000 ms")},
- { "latency-slow", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in more than 1 second")},
- { "latency-avg100", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 100 packets")},
- { "latency-avg1000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 1000 packets")},
- { "latency-avg10000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 10000 packets")},
- { "latency-avg1000000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 1000000 packets")},
- { "uptime", MetricDefinition(PrometheusMetricType::gauge, "Uptime of the dnsdist process in seconds")},
- { "real-memory-usage", MetricDefinition(PrometheusMetricType::gauge, "Current memory usage in bytes")},
- { "noncompliant-queries", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped as non-compliant")},
- { "noncompliant-responses", MetricDefinition(PrometheusMetricType::counter, "Number of answers from a backend dropped as non-compliant")},
- { "rdqueries", MetricDefinition(PrometheusMetricType::counter, "Number of received queries with the recursion desired bit set")},
- { "empty-queries", MetricDefinition(PrometheusMetricType::counter, "Number of empty queries received from clients")},
- { "cache-hits", MetricDefinition(PrometheusMetricType::counter, "Number of times an answer was retrieved from cache")},
- { "cache-misses", MetricDefinition(PrometheusMetricType::counter, "Number of times an answer not found in the cache")},
- { "cpu-user-msec", MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the user state")},
- { "cpu-sys-msec", MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the system state")},
- { "fd-usage", MetricDefinition(PrometheusMetricType::gauge, "Number of currently used file descriptors")},
- { "dyn-blocked", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a dynamic block")},
- { "dyn-block-nmg-size", MetricDefinition(PrometheusMetricType::gauge, "Number of dynamic blocks entries") },
- { "security-status", MetricDefinition(PrometheusMetricType::gauge, "Security status of this software. 0=unknown, 1=OK, 2=upgrade recommended, 3=upgrade mandatory") },
- };
-};
-
-extern MetricDefinitionStorage g_metricDefinitions;
extern struct DNSDistStats g_stats;
void doLatencyStats(double udiff);
dnsdist-lua-inspection-ffi.cc dnsdist-lua-inspection-ffi.hh \
dnsdist-lua-rules.cc \
dnsdist-lua-vars.cc \
+ dnsdist-prometheus.hh \
dnsdist-protobuf.cc dnsdist-protobuf.hh \
dnsdist-rings.cc dnsdist-rings.hh \
dnsdist-rules.hh \
--- /dev/null
+/*
+ * This file is part of PowerDNS or dnsdist.
+ * Copyright -- PowerDNS.COM B.V. and its contributors
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * In addition, for the avoidance of any doubt, permission is granted to
+ * link this program with OpenSSL and to (re)distribute the binaries
+ * produced as the result of such linking.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#pragma once
+
+// Metric types for Prometheus
+enum class PrometheusMetricType: int {
+ counter = 1,
+ gauge = 2
+};
+
+// Keeps additional information about metrics
+struct MetricDefinition {
+ MetricDefinition(PrometheusMetricType _prometheusType, const std::string& _description): description(_description), prometheusType(_prometheusType) {
+ }
+
+ MetricDefinition() = default;
+
+ // Metric description
+ std::string description;
+ // Metric type for Prometheus
+ PrometheusMetricType prometheusType;
+};
+
+struct MetricDefinitionStorage {
+ // Return metric definition by name
+ bool getMetricDetails(const std::string& metricName, MetricDefinition& metric) const {
+ const auto& metricDetailsIter = metrics.find(metricName);
+
+ if (metricDetailsIter == metrics.end()) {
+ return false;
+ }
+
+ metric = metricDetailsIter->second;
+ return true;
+ };
+
+ // Return string representation of Prometheus metric type
+ std::string getPrometheusStringMetricType(PrometheusMetricType metricType) const {
+ switch (metricType) {
+ case PrometheusMetricType::counter:
+ return "counter";
+ break;
+ case PrometheusMetricType::gauge:
+ return "gauge";
+ break;
+ default:
+ return "";
+ break;
+ }
+ };
+
+ static const std::map<std::string, MetricDefinition> metrics;
+};
------------
Number of times an answer was not found in the :doc:`packet cache <guides/cache>`. Only counted if a packet cache was setup for the selected pool.
+cpu-iowait
+----------
+.. versionadded:: 1.5.0
+
+Time spent waiting for I/O to complete by the whole system.
+
+cpu-steal
+---------
+.. versionadded:: 1.5.0
+
+Stolen time, which is the time spent by the whole system in other operating systems when running in a virtualized environment.
+
cpu-sys-msec
------------
Milliseconds spent by :program:`dnsdist` in the "system" state.
--------------
Number of errors encountered while truncating an answer.
+udp-in-errors
+-------------
+.. versionadded:: 1.5.0
+
+From /proc/net/snmp InErrors.
+
+udp-noport-errors
+-----------------
+.. versionadded:: 1.5.0
+
+From /proc/net/snmp NoPorts.
+
+udp-recvbuf-errors
+------------------
+.. versionadded:: 1.5.0
+
+From /proc/net/snmp RcvbufErrors.
+
+udp-sndbuf-errors
+-----------------
+.. versionadded:: 1.5.0
+
+From /proc/net/snmp SndbufErrors.
+
uptime
------
Uptime of the dnsdist process, in seconds.
-
'latency-slow', 'latency-sum', 'latency-count', 'latency-avg100', 'latency-avg1000',
'latency-avg10000', 'latency-avg1000000', 'uptime', 'real-memory-usage', 'noncompliant-queries',
'noncompliant-responses', 'rdqueries', 'empty-queries', 'cache-hits',
- 'cache-misses', 'cpu-user-msec', 'cpu-sys-msec', 'fd-usage', 'dyn-blocked',
- 'dyn-block-nmg-size', 'rule-servfail', 'security-status']
+ 'cache-misses', 'cpu-iowait', 'cpu-steal', 'cpu-sys-msec', 'cpu-user-msec', 'fd-usage', 'dyn-blocked',
+ 'dyn-block-nmg-size', 'rule-servfail', 'security-status',
+ 'udp-in-errors', 'udp-noport-errors', 'udp-recvbuf-errors', 'udp-sndbuf-errors']
for key in expected:
self.assertIn(key, values)