From d18e30704a2879034914b4afd090d7f66c1fbc0d Mon Sep 17 00:00:00 2001 From: Pavel Odintsov Date: Thu, 30 Aug 2018 16:21:54 +0100 Subject: [PATCH] Added Prometheus stats endpoint. Patch by Kai Storbeck --- pdns/dnsdist-web.cc | 85 +++++++++- pdns/dnsdistdist/docs/guides/webserver.rst | 178 +++++++++++++++++++++ 2 files changed, 262 insertions(+), 1 deletion(-) diff --git a/pdns/dnsdist-web.cc b/pdns/dnsdist-web.cc index 16ff2ff2eb..f1d11693f7 100644 --- a/pdns/dnsdist-web.cc +++ b/pdns/dnsdist-web.cc @@ -128,7 +128,7 @@ static bool isAnAPIRequestAllowedWithWebAuth(const YaHTTP::Request& req) static bool isAStatsRequest(const YaHTTP::Request& req) { - return req.url.path == "/jsonstat"; + return req.url.path == "/jsonstat" || req.url.path == "/prometheus"; } static bool compareAuthorization(const YaHTTP::Request& req, const string &expected_password, const string& expectedApiKey) @@ -386,6 +386,89 @@ static void connectionThread(int sock, ComboAddress remote, string password, str resp.status=404; } } + else if(req.url.path=="/prometheus") { + handleCORS(req, resp); + resp.status=200; + + ostringstream str; + for(const auto& e : g_stats.entries) { + string metricName = "dnsdist_main_" + std::get<0>(e); + boost::replace_all(metricName, "-", "_"); + + // for these we have the help and types encoded in the sources: + str<<"# HELP "<(e)<<"\n"; + str<<"# TYPE "<(e)<<"\n"; + str<(&std::get<1>(e))) + str<<(*val)->load(); + else if (const auto& dval = boost::get(&std::get<1>(e))) + str<<**dval; + else + str<<(*boost::get(&std::get<1>(e)))(std::get<0>(e)); + str<<"\n"; + } + const auto states = g_dstates.getCopy(); + const string statesbase = "dnsdist_main_servers_"; + for(const auto& state : states) { + string serverName = state->name.empty() ? (state->remote.toString() + ":" + std::to_string(state->remote.getPort())) : state->getName(); + boost::replace_all(serverName, ".", "_"); + const string label = "{server=\"" + serverName + "\"}"; + str<queries.load() <<"\n"; + str<reuseds.load() << "\n"; + str<latencyUsec/1000.0 << "\n"; + str<sendErrors.load() << "\n"; + str<outstanding.load() << "\n"; + } + for(const auto& front : g_frontends) { + if (front->udpFD == -1 && front->tcpFD == -1) + continue; + + string frontName = front->local.toString() + ":" + std::to_string(front->local.getPort()); + boost::replace_all(frontName, ".", "_"); + string proto = (front->udpFD >= 0 ? "udp" : "tcp"); + str<<"dnsdist_main_frontend_queries{frontend=\""<queries.load() << "\n"; + } + const auto localPools = g_pools.getCopy(); + const string cachebase = "dnsdist_pool_"; + for (const auto& entry : localPools) { + string poolName = entry.first; + boost::replace_all(poolName, ".", "_"); + if (poolName.empty()) { + poolName = "_default_"; + } + const string label = "{pool=\"" + poolName + "\"}"; + const std::shared_ptr pool = entry.second; + str<<"dnsdist_main_pools_servers"<servers.size() <<"\n"; + if (pool->packetCache != nullptr) { + const auto& cache = pool->packetCache; + str<getMaxEntries() << "\n"; + str<getEntriesCount() << "\n"; + str<getHits() << "\n"; + str<getMisses() << "\n"; + str<getDeferredInserts() << "\n"; + str<getDeferredLookups() << "\n"; + str<getLookupCollisions() << "\n"; + str<getInsertCollisions() << "\n"; + str<getTTLTooShorts() << "\n"; + } + } + + { + WriteLock wl(&g_qcount.queryLock); + std::string qname; + const string qnamebase = "dnsdist_querycount_queries"; + for(auto &record: g_qcount.records) { + qname = record.first; + boost::replace_all(qname, ".", "_"); + const string label = "{qname=\"" + qname + "\"}"; + str<`_ format. + + **Example request**: + + .. sourcecode:: http + + GET /prometheus + + **Example response**: + .. sourcecode:: http + + HTTP/1.1 200 OK + Transfer-Encoding: chunked + Content-Security-Policy: default-src 'self'; style-src 'self' 'unsafe-inline' + Content-Type: text/plain + X-Content-Type-Options: nosniff + X-Frame-Options: deny + X-Permitted-Cross-Domain-Policies: none + X-Xss-Protection: 1; mode=block + + + # HELP dnsdist_main_responses Number of responses received from backends + # TYPE dnsdist_main_responses counter + dnsdist_main_responses 0 + # HELP dnsdist_main_servfail_responses Number of SERVFAIL answers received from backends + # TYPE dnsdist_main_servfail_responses counter + dnsdist_main_servfail_responses 0 + # HELP dnsdist_main_queries Number of received queries + # TYPE dnsdist_main_queries counter + dnsdist_main_queries 0 + # HELP dnsdist_main_acl_drops Number of packets dropped because of the ACL + # TYPE dnsdist_main_acl_drops counter + dnsdist_main_acl_drops 0 + # HELP dnsdist_main_rule_drop Number of queries dropped because of a rule + # TYPE dnsdist_main_rule_drop counter + dnsdist_main_rule_drop 0 + # HELP dnsdist_main_rule_nxdomain Number of NXDomain answers returned because of a rule + # TYPE dnsdist_main_rule_nxdomain counter + dnsdist_main_rule_nxdomain 0 + # HELP dnsdist_main_rule_refused Number of Refused answers returned because of a rule + # TYPE dnsdist_main_rule_refused counter + dnsdist_main_rule_refused 0 + # HELP dnsdist_main_rule_servfail Number of SERVFAIL answers received because of a rule + # TYPE dnsdist_main_rule_servfail counter + dnsdist_main_rule_servfail 0 + # HELP dnsdist_main_self_answered Number of self-answered responses + # TYPE dnsdist_main_self_answered counter + dnsdist_main_self_answered 0 + # HELP dnsdist_main_downstream_timeouts Number of queries not answered in time by a backend + # TYPE dnsdist_main_downstream_timeouts counter + dnsdist_main_downstream_timeouts 0 + # HELP dnsdist_main_downstream_send_errors Number of errors when sending a query to a backend + # TYPE dnsdist_main_downstream_send_errors counter + dnsdist_main_downstream_send_errors 0 + # HELP dnsdist_main_trunc_failures Number of errors encountered while truncating an answer + # TYPE dnsdist_main_trunc_failures counter + dnsdist_main_trunc_failures 0 + # HELP dnsdist_main_no_policy Number of queries dropped because no server was available + # TYPE dnsdist_main_no_policy counter + dnsdist_main_no_policy 0 + # HELP dnsdist_main_latency0_1 Number of queries answered in less than 1ms + # TYPE dnsdist_main_latency0_1 counter + dnsdist_main_latency0_1 0 + # HELP dnsdist_main_latency1_10 Number of queries answered in 1-10 ms + # TYPE dnsdist_main_latency1_10 counter + dnsdist_main_latency1_10 0 + # HELP dnsdist_main_latency10_50 Number of queries answered in 10-50 ms + # TYPE dnsdist_main_latency10_50 counter + dnsdist_main_latency10_50 0 + # HELP dnsdist_main_latency50_100 Number of queries answered in 50-100 ms + # TYPE dnsdist_main_latency50_100 counter + dnsdist_main_latency50_100 0 + # HELP dnsdist_main_latency100_1000 Number of queries answered in 100-1000 ms + # TYPE dnsdist_main_latency100_1000 counter + dnsdist_main_latency100_1000 0 + # HELP dnsdist_main_latency_slow Number of queries answered in more than 1 second + # TYPE dnsdist_main_latency_slow counter + dnsdist_main_latency_slow 0 + # HELP dnsdist_main_latency_avg100 Average response latency in microseconds of the last 100 packets + # TYPE dnsdist_main_latency_avg100 gauge + dnsdist_main_latency_avg100 0 + # HELP dnsdist_main_latency_avg1000 Average response latency in microseconds of the last 1000 packets + # TYPE dnsdist_main_latency_avg1000 gauge + dnsdist_main_latency_avg1000 0 + # HELP dnsdist_main_latency_avg10000 Average response latency in microseconds of the last 10000 packets + # TYPE dnsdist_main_latency_avg10000 gauge + dnsdist_main_latency_avg10000 0 + # HELP dnsdist_main_latency_avg1000000 Average response latency in microseconds of the last 1000000 packets + # TYPE dnsdist_main_latency_avg1000000 gauge + dnsdist_main_latency_avg1000000 0 + # HELP dnsdist_main_uptime Uptime of the dnsdist process in seconds + # TYPE dnsdist_main_uptime gauge + dnsdist_main_uptime 42 + # HELP dnsdist_main_real_memory_usage Current memory usage in bytes + # TYPE dnsdist_main_real_memory_usage gauge + dnsdist_main_real_memory_usage 11292672 + # HELP dnsdist_main_noncompliant_queries Number of queries dropped as non-compliant + # TYPE dnsdist_main_noncompliant_queries counter + dnsdist_main_noncompliant_queries 0 + # HELP dnsdist_main_noncompliant_responses Number of answers from a backend dropped as non-compliant + # TYPE dnsdist_main_noncompliant_responses counter + dnsdist_main_noncompliant_responses 0 + # HELP dnsdist_main_rdqueries Number of received queries with the recursion desired bit set + # TYPE dnsdist_main_rdqueries counter + dnsdist_main_rdqueries 0 + # HELP dnsdist_main_empty_queries Number of empty queries received from clients + # TYPE dnsdist_main_empty_queries counter + dnsdist_main_empty_queries 0 + # HELP dnsdist_main_cache_hits Number of times an answer was retrieved from cache + # TYPE dnsdist_main_cache_hits counter + dnsdist_main_cache_hits 0 + # HELP dnsdist_main_cache_misses Number of times an answer not found in the cache + # TYPE dnsdist_main_cache_misses counter + dnsdist_main_cache_misses 0 + # HELP dnsdist_main_cpu_user_msec Milliseconds spent by dnsdist in the user state + # TYPE dnsdist_main_cpu_user_msec counter + dnsdist_main_cpu_user_msec 58 + # HELP dnsdist_main_cpu_sys_msec Milliseconds spent by dnsdist in the system state + # TYPE dnsdist_main_cpu_sys_msec counter + dnsdist_main_cpu_sys_msec 35 + # HELP dnsdist_main_fd_usage Number of currently used file descriptors + # TYPE dnsdist_main_fd_usage gauge + dnsdist_main_fd_usage 18 + # HELP dnsdist_main_dyn_blocked Number of queries dropped because of a dynamic block + # TYPE dnsdist_main_dyn_blocked counter + dnsdist_main_dyn_blocked 0 + # HELP dnsdist_main_dyn_block_nmg_size Number of dynamic blocks entries + # TYPE dnsdist_main_dyn_block_nmg_size gauge + dnsdist_main_dyn_block_nmg_size 0 + dnsdist_main_servers_queries{server="9_9_9_9:53"} 0 + dnsdist_main_servers_drops{server="9_9_9_9:53"} 0 + dnsdist_main_servers_latency{server="9_9_9_9:53"} 0 + dnsdist_main_servers_senderrors{server="9_9_9_9:53"} 0 + dnsdist_main_servers_outstanding{server="9_9_9_9:53"} 0 + dnsdist_main_servers_queries{server="8_8_8_8:53"} 0 + dnsdist_main_servers_drops{server="8_8_8_8:53"} 0 + dnsdist_main_servers_latency{server="8_8_8_8:53"} 0 + dnsdist_main_servers_senderrors{server="8_8_8_8:53"} 0 + dnsdist_main_servers_outstanding{server="8_8_8_8:53"} 0 + dnsdist_main_servers_queries{server="::1:53"} 0 + dnsdist_main_servers_drops{server="::1:53"} 0 + dnsdist_main_servers_latency{server="::1:53"} 0 + dnsdist_main_servers_senderrors{server="::1:53"} 0 + dnsdist_main_servers_outstanding{server="::1:53"} 0 + dnsdist_main_servers_queries{server="194_109_6_66:53"} 0 + dnsdist_main_servers_drops{server="194_109_6_66:53"} 0 + dnsdist_main_servers_latency{server="194_109_6_66:53"} 0 + dnsdist_main_servers_senderrors{server="194_109_6_66:53"} 0 + dnsdist_main_servers_outstanding{server="194_109_6_66:53"} 0 + dnsdist_main_frontend_queries{frontend="127_0_0_1:5300",proto="udp"} 0 + dnsdist_main_frontend_queries{frontend="127_0_0_1:5300",proto="tcp"} 0 + dnsdist_main_pools_servers{pool="_default_"} 4 + dnsdist_pool_cache_size{pool="_default_"} 1000 + dnsdist_pool_cache_entries{pool="_default_"} 0 + dnsdist_pool_cache_hits{pool="_default_"} 0 + dnsdist_pool_cache_misses{pool="_default_"} 0 + dnsdist_pool_cache_deferred_inserts{pool="_default_"} 0 + dnsdist_pool_cache_deferred_lookups{pool="_default_"} 0 + dnsdist_pool_cache_lookup_collisions{pool="_default_"} 0 + dnsdist_pool_cache_insert_collisions{pool="_default_"} 0 + dnsdist_pool_cache_ttl_too_shorts{pool="_default_"} 0 + + **Example prometheus configuration**: + + This is just the scrape job description, for details see the prometheus documentation. + + .. sourcecode:: yaml + job_name: dnsdist + scrape_interval: 10s + scrape_timeout: 2s + metrics_path: /prometheus + basic_auth: + username: dontcare + password: yoursecret + + .. http:get:: /api/v1/servers/localhost Get a quick overview of several parameters. -- 2.47.2