From: Remi Gacogne Date: Fri, 14 Nov 2025 12:05:45 +0000 (+0100) Subject: dnsdist: Better performance when using `recvmmsg` X-Git-Tag: rec-5.4.0-alpha1~69^2~3 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c9e3cb9e619b16e50021116272e783bac533cc12;p=thirdparty%2Fpdns.git dnsdist: Better performance when using `recvmmsg` `recvmmsg` and `sendmmsg` are Linux-specific syscalls to receive and send multiple messages on a socket via a single call, which in theory improves performance a lot, especially with the speculative execution vulnerabilities mitigations making syscalls expensive. We had several reports that turning this feature on in DNSdist did not really improve performance, which is quite unexpected. Looking at a CPU profile in `perf`, we were spending an awful lot of time resizing buffers that did not need to be resized most of the time because they had not been used. This commit ensures that we only resize and reset buffers for slots that have been used in the previous round. With that I'm getting a nice performance boost from using `recvmmsg` and `sendmmsg` while processing 80k QPS at 100% cache-hit ratio from a single thread: the CPU usage shrinks by roughly 15%. Signed-off-by: Remi Gacogne --- diff --git a/pdns/dnsdistdist/dnsdist.cc b/pdns/dnsdistdist/dnsdist.cc index 787bd72d9e..f49fde8e4b 100644 --- a/pdns/dnsdistdist/dnsdist.cc +++ b/pdns/dnsdistdist/dnsdist.cc @@ -2235,23 +2235,32 @@ static void MultipleMessagesUDPClientThread(ClientState* clientState) fillMSGHdr(&msgVec[idx].msg_hdr, &recvData[idx].iov, &recvData[idx].cbuf, sizeof(recvData[idx].cbuf), reinterpret_cast(recvData[idx].packet.data()), maxIncomingPacketSize, &recvData[idx].remote); } + int msgsGot = vectSize; /* go now */ for (;;) { /* reset the IO vector, since it's also used to send the vector of responses - to avoid having to copy the data around */ - for (size_t idx = 0; idx < vectSize; idx++) { - recvData[idx].packet.resize(initialBufferSize); - recvData[idx].iov.iov_base = &recvData[idx].packet.at(0); - recvData[idx].iov.iov_len = recvData[idx].packet.size(); + to avoid having to copy the data around + No need to reset the parts that have not been used, though. */ + for (size_t idx = 0; idx < static_cast(msgsGot); idx++) { + auto& slot = recvData[idx]; + /* only resize if the buffer is actually smaller than expected */ + if (slot.packet.size() < initialBufferSize) { + slot.packet.resize(initialBufferSize); + } + /* but we need to set the IOv pointer and size + anyway, because if we resized it the pointer might + now be invalid */ + slot.iov.iov_base = &slot.packet.at(0); + slot.iov.iov_len = slot.packet.size(); } /* block until we have at least one message ready, but return as many as possible to save the syscall costs */ - int msgsGot = recvmmsg(clientState->udpFD, msgVec.data(), vectSize, MSG_WAITFORONE | MSG_TRUNC, nullptr); - + msgsGot = recvmmsg(clientState->udpFD, msgVec.data(), vectSize, MSG_WAITFORONE | MSG_TRUNC, nullptr); if (msgsGot <= 0) { vinfolog("Getting UDP messages via recvmmsg() failed with: %s", stringerror()); + msgsGot = 0; continue; } @@ -2259,8 +2268,9 @@ static void MultipleMessagesUDPClientThread(ClientState* clientState) /* process the received messages */ for (int msgIdx = 0; msgIdx < msgsGot; msgIdx++) { - const struct msghdr* msgh = &msgVec[msgIdx].msg_hdr; - unsigned int got = msgVec[msgIdx].msg_len; + auto& msg = msgVec[msgIdx]; + const struct msghdr* msgh = &msg.msg_hdr; + unsigned int got = msg.msg_len; const ComboAddress& remote = recvData[msgIdx].remote; if (static_cast(got) < sizeof(struct dnsheader)) { @@ -2269,9 +2279,10 @@ static void MultipleMessagesUDPClientThread(ClientState* clientState) continue; } - recvData[msgIdx].packet.resize(got); + auto& data = recvData[msgIdx]; + data.packet.resize(got); dnsdist::configuration::refreshLocalRuntimeConfiguration(); - processUDPQuery(*clientState, msgh, remote, recvData[msgIdx].dest, recvData[msgIdx].packet, &outMsgVec, &msgsToSend, &recvData[msgIdx].iov, &recvData[msgIdx].cbuf); + processUDPQuery(*clientState, msgh, remote, data.dest, data.packet, &outMsgVec, &msgsToSend, &data.iov, &data.cbuf); } /* immediate (not delayed or sent to a backend) responses (mostly from a rule, dynamic block