From b4448848e54305eab678f67fe8ac904dcf4311f5 Mon Sep 17 00:00:00 2001 From: Otto Date: Tue, 23 Feb 2021 15:46:38 +0100 Subject: [PATCH] Cumulative Prometheus style histograms --- pdns/histogram.hh | 3 +- pdns/pdns_recursor.cc | 1 + pdns/rec_channel.hh | 12 +++++- pdns/rec_channel_rec.cc | 86 ++++++++++++++++++++++++++++++++++++++--- pdns/syncres.cc | 2 + pdns/syncres.hh | 12 ++++-- 6 files changed, 105 insertions(+), 11 deletions(-) diff --git a/pdns/histogram.hh b/pdns/histogram.hh index 4042ce1fb7..039829b057 100644 --- a/pdns/histogram.hh +++ b/pdns/histogram.hh @@ -57,7 +57,7 @@ template class BaseHistogram { public: - BaseHistogram(const std::string& prefix, const std::vector& boundaries) + BaseHistogram(const std::string& prefix, const std::vector& boundaries) : d_name(prefix) { if (!std::is_sorted(boundaries.cbegin(), boundaries.cend())) { throw std::invalid_argument("boundary array must be sorted"); @@ -130,6 +130,7 @@ public: private: std::vector d_buckets; + std::string d_name; }; template diff --git a/pdns/pdns_recursor.cc b/pdns/pdns_recursor.cc index 3053633f77..b827632c3c 100644 --- a/pdns/pdns_recursor.cc +++ b/pdns/pdns_recursor.cc @@ -2317,6 +2317,7 @@ static void startDoResolve(void *p) } g_stats.answers(spentUsec); + g_stats.cumulativeAnswers(spentUsec); double newLat = spentUsec; newLat = min(newLat, g_networkTimeoutMsec * 1000.0); // outliers of several minutes exist.. diff --git a/pdns/rec_channel.hh b/pdns/rec_channel.hh index 287a793832..7b197665e8 100644 --- a/pdns/rec_channel.hh +++ b/pdns/rec_channel.hh @@ -94,7 +94,17 @@ struct StatsMapEntry { std::string d_prometheusName; std::string d_value; }; -typedef std::map StatsMap; + +class SimpleNaturalCompare +{ +private: + static std::pair prefixAndTrailingNum(const std::string& a); +public: + bool operator()(const std::string& a, const std::string& b) const; +}; + +typedef std::map StatsMap; + StatsMap getAllStatsMap(StatComponent component); extern std::mutex g_carbon_config_lock; diff --git a/pdns/rec_channel_rec.cc b/pdns/rec_channel_rec.cc index 1c432c3198..35a3dcc786 100644 --- a/pdns/rec_channel_rec.cc +++ b/pdns/rec_channel_rec.cc @@ -40,11 +40,45 @@ #include "namespaces.hh" #include "rec-taskqueue.hh" +std::pair SimpleNaturalCompare::prefixAndTrailingNum(const std::string& a) +{ + auto i = a.length(); + if (i == 0) { + return make_pair(a, ""); + } + --i; + if (!std::isdigit(a[i])) { + return make_pair(a, ""); + } + while (i > 0) { + if (!std::isdigit(a[i])) { + break; + } + --i; + } + return make_pair(a.substr(0, i + 1), a.substr(i + 1, a.size() - i - 1)); +} + +bool SimpleNaturalCompare::operator()(const std::string& a, const std::string& b) const +{ + auto [aprefix, anum] = prefixAndTrailingNum(a); + auto [bprefix, bnum] = prefixAndTrailingNum(b); + + if (aprefix != bprefix || anum.length() == 0 || bnum.length() == 0) { + return a < b; + } + auto aa = std::stoull(anum); + auto bb = std::stoull(bnum); + return aa < bb; +} + std::mutex g_carbon_config_lock; static map d_get32bitpointers; static map*> d_getatomics; -static map > d_get64bitmembers; +static map> d_get64bitmembers; +static map> d_getmultimembers; + static std::mutex d_dynmetricslock; struct dynmetrics { std::atomic *d_ptr; @@ -77,17 +111,22 @@ void disableStats(StatComponent component, const string& stats) static void addGetStat(const string& name, const uint32_t* place) { - d_get32bitpointers[name]=place; + d_get32bitpointers[name] = place; } static void addGetStat(const string& name, const std::atomic* place) { - d_getatomics[name]=place; + d_getatomics[name] = place; +} + +static void addGetStat(const string& name, std::function f) +{ + d_get64bitmembers[name] = f; } -static void addGetStat(const string& name, std::function f ) +static void addGetStat(const string& name, std::function f) { - d_get64bitmembers[name]=f; + d_getmultimembers[name] = f; } static std::string getPrometheusName(const std::string& arg) @@ -132,7 +171,15 @@ static boost::optional get(const string& name) auto f = rplookup(d_dynmetrics, name); if (f) return f->d_ptr->load(); - + + for(const auto& themultimember : d_getmultimembers) { + const auto items = themultimember.second(); + const auto item = items.find(name); + if (item != items.end()) { + return std::stoull(item->second.d_value); + } + } + return ret; } @@ -163,6 +210,12 @@ StatsMap getAllStatsMap(StatComponent component) } } + for(const auto& themultimember : d_getmultimembers) { + if (blacklistMap.count(themultimember.first) == 0) { + ret.merge(themultimember.second()); + } + } + { std::lock_guard l(d_dynmetricslock); for(const auto& a : d_dynmetrics) { @@ -1058,6 +1111,17 @@ static uint64_t doGetMallocated() return 0; } +static StatsMap toStatsMap(const string& name, const vector& data) +{ + StatsMap entries; + for (const auto& bucket : data) { + std::string pname = getPrometheusName(name) + '{' + "le=\"" + + (bucket.d_boundary == std::numeric_limits::max() ? "+Inf" : std::to_string(bucket.d_boundary)) + "\"}"; + entries.emplace(make_pair(bucket.d_name, StatsMapEntry{pname, std::to_string(bucket.d_count)})); + } + return entries; +} + extern ResponseStats g_rs; static void registerAllStats1() @@ -1300,6 +1364,16 @@ static void registerAllStats1() const std::string name = "ecs-v6-response-bits-" + std::to_string(idx + 1); addGetStat(name, &(SyncRes::s_ecsResponsesBySubnetSize6.at(idx))); } + + addGetStat("cumulativeAnswers-usec-", []() { + return toStatsMap(g_stats.cumulativeAnswers.getName(), g_stats.cumulativeAnswers.getCumulativeBuckets()); + }); + addGetStat("cumulativeAuth4Answers-usec-", []() { + return toStatsMap(g_stats.cumulativeAuth4Answers.getName(), g_stats.cumulativeAuth4Answers.getCumulativeBuckets()); + }); + addGetStat("cumulativeAuth6Answers-usec-", []() { + return toStatsMap(g_stats.cumulativeAuth6Answers.getName(), g_stats.cumulativeAuth6Answers.getCumulativeBuckets()); + }); } void registerAllStats() diff --git a/pdns/syncres.cc b/pdns/syncres.cc index e7b5ea0600..4c34384137 100644 --- a/pdns/syncres.cc +++ b/pdns/syncres.cc @@ -107,8 +107,10 @@ static inline void accountAuthLatency(uint64_t usec, int family) { if (family == AF_INET) { g_stats.auth4Answers(usec); + g_stats.cumulativeAuth4Answers(usec); } else { g_stats.auth6Answers(usec); + g_stats.cumulativeAuth6Answers(usec); } } diff --git a/pdns/syncres.hh b/pdns/syncres.hh index b66e808dd9..bf3f852c30 100644 --- a/pdns/syncres.hh +++ b/pdns/syncres.hh @@ -1007,6 +1007,9 @@ struct RecursorStats pdns::AtomicHistogram auth4Answers; pdns::AtomicHistogram auth6Answers; pdns::AtomicHistogram ourtime; + pdns::AtomicHistogram cumulativeAnswers; + pdns::AtomicHistogram cumulativeAuth4Answers; + pdns::AtomicHistogram cumulativeAuth6Answers; std::atomic avgLatencyUsec; std::atomic avgLatencyOursUsec; std::atomic qcounter; // not increased for unauth packets @@ -1052,9 +1055,12 @@ struct RecursorStats RecursorStats() : answers("answers", { 1000, 10000, 100000, 1000000 }), - auth4Answers("answers", { 1000, 10000, 100000, 1000000 }), - auth6Answers("answers", { 1000, 10000, 100000, 1000000 }), - ourtime("ourtime", { 1000, 2000, 4000, 8000, 16000, 32000 }) + auth4Answers("auth4answers", { 1000, 10000, 100000, 1000000 }), + auth6Answers("auth6answers", { 1000, 10000, 100000, 1000000 }), + ourtime("ourtime", { 1000, 2000, 4000, 8000, 16000, 32000 }), + cumulativeAnswers("cumulAnswers-us", { 100, 200, 400, 800, 1600, 3200, 6400, 12800, 25600, 51200, 102400, 204800, 409600, 819200, 1638400, 3276800, 6553600 }), + cumulativeAuth4Answers("cumulAuth4Answers-us", { 100, 200, 400, 800, 1600, 3200, 6400, 12800, 25600, 51200, 102400, 204800, 409600, 819200, 1638400, 3276800, 6553600 }), + cumulativeAuth6Answers("cumulAuth6Answers-us", { 100, 200, 400, 800, 1600, 3200, 6400, 12800, 25600, 51200, 102400, 204800, 409600, 819200, 1638400, 3276800, 6553600 }) { } }; -- 2.47.2