]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
BUG/MINOR: dns: wrong resolution interval lead to 100% CPU
authorBaptiste Assmann <bassmann@haproxy.com>
Mon, 21 Aug 2017 11:21:48 +0000 (13:21 +0200)
committerWilly Tarreau <w@1wt.eu>
Tue, 22 Aug 2017 09:40:00 +0000 (11:40 +0200)
Since the DNS layer split and the use of obj_type structure, we did not
updated propoerly the code used to compute the interval between 2
resolutions.
A nasty loop was then created when:
- resolver's hold.valid is shorter than servers' check.inter
- a valid response is available in the DNS cache

A task was woken up for a server's resolution. The servers pick up the IP
in the cache and returns without updating the 'last update' timestamp of
the resolution (which is normal...). Then the task is woken up again for
the same server.
The fix simply computes now properly the interval between 2 resolutions
and the cache is used properly while a new resolution is triggered if
the data is not fresh enough.

src/dns.c

index 421beab2a5807b660623c1bfb2dfd8810cd59b91..2cf1ec954aea2a0b4e14e71e33cd5fdc9efabc8e 100644 (file)
--- a/src/dns.c
+++ b/src/dns.c
@@ -183,7 +183,7 @@ int dns_trigger_resolution(struct dns_resolution *resolution)
 {
        struct dns_requester *requester = NULL, *tmprequester;
        struct dns_resolvers *resolvers = NULL;
-       int inter;
+       int inter, valid_period;
 
        /* process the element of the wait queue */
        list_for_each_entry_safe(requester, tmprequester, &resolution->requester.wait, list) {
@@ -191,11 +191,11 @@ int dns_trigger_resolution(struct dns_resolution *resolution)
 
                switch (obj_type(requester->requester)) {
                        case OBJ_TYPE_SERVER:
-                               inter = objt_server(requester->requester)->check.inter;
+                               valid_period = objt_server(requester->requester)->check.inter;
                                resolvers = objt_server(requester->requester)->resolvers;
                                break;
                        case OBJ_TYPE_SRVRQ:
-                               inter = objt_dns_srvrq(requester->requester)->inter;
+                               valid_period = objt_dns_srvrq(requester->requester)->inter;
                                resolvers = objt_dns_srvrq(requester->requester)->resolvers;
                                break;
                        case OBJ_TYPE_NONE:
@@ -203,6 +203,11 @@ int dns_trigger_resolution(struct dns_resolution *resolution)
                                return -1;
                }
 
+               if (resolvers->hold.valid < valid_period)
+                       inter = resolvers->hold.valid;
+               else
+                       inter = valid_period;
+
                /* if data is fresh enough, let's use it */
                if (!tick_is_expired(tick_add(resolution->last_resolution, inter), now_ms)) {
                        /* we only use cache if the response there is valid.