]> git.ipfire.org Git - thirdparty/knot-resolver.git/commitdiff
manager/metrics/prometheus: fix the answer latency histogram
authorAleš Mrázek <ales.mrazek@nic.cz>
Tue, 19 Aug 2025 13:44:13 +0000 (15:44 +0200)
committerVladimír Čunát <vladimir.cunat@nic.cz>
Mon, 15 Sep 2025 08:02:11 +0000 (10:02 +0200)
NEWS
python/knot_resolver/manager/metrics/prometheus.py

diff --git a/NEWS b/NEWS
index 6246e7abfab6dc9637dc9add1ac3b46d5d0dbef5..9becaf0b4038b394c01d1025af150d7d7a72ac97 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -8,6 +8,7 @@ Improvements
 Bugfixes
 --------
 - /options/query-case-randomization: respect this even on TCP issues (!1732)
+- prometheus metrics: make the latency histogram cumulative (!1731, GH#117)
 
 
 Knot Resolver 6.0.15 (2025-07-17)
index 5e8a130dced9a0cf2b619fe98f5d0c4ce5b463e6..4138b549fb59ef3d3ed9461e17b47aaeae11d36c 100644 (file)
@@ -52,12 +52,18 @@ if PROMETHEUS_LIB:
         # response latency histogram
         bucket_names_in_resolver = ("1ms", "10ms", "50ms", "100ms", "250ms", "500ms", "1000ms", "1500ms", "slow")
         bucket_names_in_prometheus = ("0.001", "0.01", "0.05", "0.1", "0.25", "0.5", "1.0", "1.5", "+Inf")
+
+        # add smaller bucket counts
+        def _bucket_count(answer: Dict[str, int], duration: str) -> int:
+            index = bucket_names_in_resolver.index(duration)
+            return sum([int(answer[bucket_names_in_resolver[i]]) for i in range(index + 1)])
+
         yield _histogram(
             "resolver_response_latency",
             "Time it takes to respond to queries in seconds",
             label=("instance_id", sid),
             buckets=[
-                (bnp, metrics["answer"][f"{duration}"])
+                (bnp, _bucket_count(metrics["answer"], duration))
                 for bnp, duration in zip(bucket_names_in_prometheus, bucket_names_in_resolver)
             ],
             sum_value=metrics["answer"]["sum_ms"] / 1_000,