]> git.ipfire.org Git - thirdparty/pdns.git/commitdiff
dnsdist: Reset origFD asap to keep the outstanding count correct 4365/head
authorRemi Gacogne <remi.gacogne@powerdns.com>
Thu, 25 Aug 2016 15:15:54 +0000 (17:15 +0200)
committerRemi Gacogne <remi.gacogne@powerdns.com>
Thu, 25 Aug 2016 15:15:54 +0000 (17:15 +0200)
Previously the health check thread waited until we had finished
with the IDState to set `origFD` to -1, but:
* for the UDP client thread, the only difference it makes is that
`outstanding` will not be incremented if `origFD` is not -1,
which is not what we want since we are going to decrement it
* for the UDP responder thread, it actually increases the
likelihood of decrementing `outstanding` twice, once in the
responder threader and once in the health check thread.

This was especially likely to be an issue because the health check
thread used to call `gettime()` and to acquire a mutex before
setting `origFD` to -1.

pdns/dnsdist.cc

index 569f277addce762e4cc2b264a7adde5a8b41da48..f4fcd72f3360a7ef220d27c7419a827ea2fc9bd1 100644 (file)
@@ -1319,9 +1319,20 @@ void* healthChecksThread()
       
       for(IDState& ids  : dss->idStates) { // timeouts
         if(ids.origFD >=0 && ids.age++ > 2) {
+          /* We set origFD to -1 as soon as possible
+             to limit the risk of racing with the
+             responder thread.
+             The UDP client thread only checks origFD to
+             know whether outstanding has to be incremented,
+             so the sooner the better any way since we _will_
+             decrement it.
+          */
+          ids.origFD = -1;
           ids.age = 0;
           dss->reuseds++;
           --dss->outstanding;
+          g_stats.downstreamTimeouts++; // this is an 'actively' discovered timeout
+
           struct timespec ts;
           gettime(&ts);
 
@@ -1331,10 +1342,6 @@ void* healthChecksThread()
 
           std::lock_guard<std::mutex> lock(g_rings.respMutex);
           g_rings.respRing.push_back({ts, ids.origRemote, ids.qname, ids.qtype, std::numeric_limits<unsigned int>::max(), 0, fake, dss->remote});
-          g_stats.downstreamTimeouts++; // this is an 'actively' discovered timeout
-          // we keep track of 'reuseds' seperately
-
-          ids.origFD = -1; // don't touch 'ids' beyond this point!
         }          
       }
     }