]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
BUG/MEDIUM: resolvers: Properly cache do-resolv resolution
authorChristopher Faulet <cfaulet@haproxy.com>
Thu, 11 Sep 2025 08:34:46 +0000 (10:34 +0200)
committerChristopher Faulet <cfaulet@haproxy.com>
Thu, 11 Sep 2025 13:46:45 +0000 (15:46 +0200)
As stated by the documentation, when a do-resolv resolution is performed,
the result should be cached for <hold.valid> milliseconds. However, the only
way to cache the result is to always have a requester. When the last
requester is unlink from the resolution, the resolution is released. So, for
a do-resolv resolution, it means it could only work by chance if the same
FQDN is requested enough to always have at least two streams waiting for the
resolution. And because in that case, the cached result is used, it means
the traffic must be quite high.

In fact, a good approach to fix the issue is to keep orphan resolutions to
be able cache the result and only release them after hold.valid milliseconds
after the last real resolution. The resolver's task already releases orphan
resolutions. So we only need to check the expiration date and take care to
not release the resolution when the last stream is unlink from it.

This patch should be backported to all stable versions. We can start to
backport it as far as 3.1 and then wait a bit.

src/resolvers.c

index 8471c7344303427571edbcac2786b5fa6bb46edb..7753422a9a9798a672ed1018ad5985a29039b6f6 100644 (file)
@@ -370,7 +370,7 @@ static inline uint16_t resolv_rnd16(void)
 
 static inline int resolv_resolution_timeout(struct resolv_resolution *res)
 {
-       return res->resolvers->timeout.resolve;
+       return (!LIST_ISEMPTY(&res->requesters) ? res->resolvers->timeout.resolve : res->resolvers->hold.valid);
 }
 
 /* Updates a resolvers' task timeout for next wake up and queue it */
@@ -2205,7 +2205,19 @@ static void _resolv_unlink_resolution(struct resolv_requester *requester)
        if (!LIST_ISEMPTY(&res->requesters))
                req = LIST_NEXT(&res->requesters, struct resolv_requester *, list);
        else {
-               abort_resolution(res);
+               /* If the last requester was a stream and the resolution was a
+                * success, keep it to use it as a cache for <hold.valid>
+                * milliseconds.
+                */
+               if (obj_type(requester->owner) != OBJ_TYPE_STREAM ||
+                   res->status != RSLV_STATUS_VALID ||
+                   res->resolvers->hold.valid == 0)
+                       abort_resolution(res);
+               else {
+                       if (!tick_isset(res->last_resolution))
+                               res->last_resolution = now_ms;
+                       resolv_update_resolvers_timeout(res->resolvers);
+               }
                return;
        }
 
@@ -2549,7 +2561,12 @@ struct task *process_resolvers(struct task *t, void *context, unsigned int state
                }
 
                if (LIST_ISEMPTY(&res->requesters)) {
-                       abort_resolution(res);
+                       /* Abort inactive resolution only if expired */
+                       exp = tick_add(res->last_resolution, resolvers->hold.valid);
+                       if (!tick_isset(res->last_resolution) || tick_is_expired(exp, now_ms)) {
+                               abort_resolution(res);
+                               res = resback;
+                       }
                        continue;
                }