From: Remi Gacogne Date: Thu, 10 Feb 2022 14:27:34 +0000 (+0100) Subject: dnsdist: Use a locked map to store the UDP states when randomizing the IDs X-Git-Tag: rec-4.7.0-alpha1~11^2~2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7686bce9caccd6730b511dd803dbbeacfe2f2af1;p=thirdparty%2Fpdns.git dnsdist: Use a locked map to store the UDP states when randomizing the IDs --- diff --git a/pdns/dnsdist-lua.cc b/pdns/dnsdist-lua.cc index 8b7bceb1b3..24cfe202b0 100644 --- a/pdns/dnsdist-lua.cc +++ b/pdns/dnsdist-lua.cc @@ -1279,7 +1279,7 @@ static void setupLuaConfig(LuaContext& luaCtx, bool client, bool configCheck) luaCtx.writeFunction("setTCPSendTimeout", [](int timeout) { g_tcpSendTimeout = timeout; }); - luaCtx.writeFunction("setUDPTimeout", [](int timeout) { g_udpTimeout = timeout; }); + luaCtx.writeFunction("setUDPTimeout", [](int timeout) { DownstreamState::s_udpTimeout = timeout; }); luaCtx.writeFunction("setMaxUDPOutstanding", [](uint64_t max) { if (!g_configurationDone) { diff --git a/pdns/dnsdist.cc b/pdns/dnsdist.cc index 9695d1619d..d3c6e7d397 100644 --- a/pdns/dnsdist.cc +++ b/pdns/dnsdist.cc @@ -137,7 +137,6 @@ GlobalStateHolder g_dstates; GlobalStateHolder> g_dynblockNMG; GlobalStateHolder> g_dynblockSMT; DNSAction::Action g_dynBlockAction = DNSAction::Action::Drop; -int g_udpTimeout{2}; bool g_servFailOnNoPolicy{false}; bool g_truncateTC{false}; @@ -597,11 +596,11 @@ void responderThread(std::shared_ptr dss) dnsheader* dh = reinterpret_cast(response.data()); queryId = dh->id; - if (queryId >= dss->idStates.size()) { + IDState* ids = dss->getExistingState(queryId); + if (ids == nullptr) { continue; } - IDState* ids = &dss->idStates[queryId]; int64_t usageIndicator = ids->usageIndicator; if (!IDState::isInUse(usageIndicator)) { @@ -684,6 +683,7 @@ void responderThread(std::shared_ptr dss) vinfolog("Got answer from %s, relayed to %s, took %f usec", dss->remote.toStringWithPort(), ids->origRemote.toStringWithPort(), udiff); handleResponseSent(*ids, udiff, *dr.remote, dss->remote, static_cast(got), cleartextDH, dss->getProtocol()); + dss->releaseState(queryId); dss->latencyUsec = (127.0 * dss->latencyUsec / 128.0) + udiff/128.0; @@ -1846,41 +1846,7 @@ static void healthChecksThread() dss->prev.queries.store(dss->queries.load()); dss->prev.reuseds.store(dss->reuseds.load()); - for (IDState& ids : dss->idStates) { // timeouts - int64_t usageIndicator = ids.usageIndicator; - if(IDState::isInUse(usageIndicator) && ids.age++ > g_udpTimeout) { - /* We mark the state as unused as soon as possible - to limit the risk of racing with the - responder thread. - */ - auto oldDU = ids.du; - - if (!ids.tryMarkUnused(usageIndicator)) { - /* this state has been altered in the meantime, - don't go anywhere near it */ - continue; - } - ids.du = nullptr; - handleDOHTimeout(DOHUnitUniquePtr(oldDU, DOHUnit::release)); - oldDU = nullptr; - ids.age = 0; - dss->reuseds++; - --dss->outstanding; - ++g_stats.downstreamTimeouts; // this is an 'actively' discovered timeout - vinfolog("Had a downstream timeout from %s (%s) for query for %s|%s from %s", - dss->remote.toStringWithPort(), dss->getName(), - ids.qname.toLogString(), QType(ids.qtype).toString(), ids.origRemote.toStringWithPort()); - - struct timespec ts; - gettime(&ts); - - struct dnsheader fake; - memset(&fake, 0, sizeof(fake)); - fake.id = ids.origID; - - g_rings.insertResponse(ts, ids.origRemote, ids.qname, ids.qtype, std::numeric_limits::max(), 0, fake, dss->remote, dss->getProtocol()); - } - } + dss->handleTimeouts(); } handleQueuedHealthChecks(*mplexer); diff --git a/pdns/dnsdist.hh b/pdns/dnsdist.hh index 6205895df2..4ee2baf5eb 100644 --- a/pdns/dnsdist.hh +++ b/pdns/dnsdist.hh @@ -738,10 +738,11 @@ struct DownstreamState private: std::string name; std::string nameWithAddr; + LockGuarded> d_idStatesMap; + vector idStates; public: std::shared_ptr d_tlsCtx{nullptr}; std::vector sockets; - vector idStates; set pools; std::mutex connectLock; std::thread tid; @@ -882,7 +883,10 @@ public: bool passCrossProtocolQuery(std::unique_ptr&& cpq); int pickSocketForSending(); void pickSocketsReadyForReceiving(std::vector& ready); + void handleTimeouts(); IDState* getIDState(unsigned int& id, int64_t& generation); + IDState* getExistingState(unsigned int id); + void releaseState(unsigned int id); dnsdist::Protocol getProtocol() const { @@ -898,8 +902,11 @@ public: return dnsdist::Protocol::DoUDP; } + static int s_udpTimeout; static bool s_randomizeSockets; static bool s_randomizeIDs; +private: + void handleTimeout(IDState& ids); }; using servers_t =vector>; @@ -998,7 +1005,6 @@ extern bool g_truncateTC; extern bool g_fixupCase; extern int g_tcpRecvTimeout; extern int g_tcpSendTimeout; -extern int g_udpTimeout; extern uint16_t g_maxOutstanding; extern std::atomic g_configurationDone; extern boost::optional g_maxTCPClientThreads; diff --git a/pdns/dnsdistdist/dnsdist-backend.cc b/pdns/dnsdistdist/dnsdist-backend.cc index e005595aaa..88631b1d63 100644 --- a/pdns/dnsdistdist/dnsdist-backend.cc +++ b/pdns/dnsdistdist/dnsdist-backend.cc @@ -23,6 +23,7 @@ #include "dnsdist.hh" #include "dnsdist-nghttp2.hh" #include "dnsdist-random.hh" +#include "dnsdist-rings.hh" #include "dnsdist-tcp.hh" #include "dolog.hh" @@ -175,7 +176,12 @@ DownstreamState::DownstreamState(const ComboAddress& remote_, const ComboAddress void DownstreamState::connectUDPSockets(size_t numberOfSockets) { - idStates.resize(g_maxOutstanding); + if (s_randomizeIDs) { + idStates.clear(); + } + else { + idStates.resize(g_maxOutstanding); + } sockets.resize(numberOfSockets); if (sockets.size() > 1) { @@ -245,6 +251,105 @@ void DownstreamState::pickSocketsReadyForReceiving(std::vector& ready) bool DownstreamState::s_randomizeSockets{false}; bool DownstreamState::s_randomizeIDs{false}; +int DownstreamState::s_udpTimeout{2}; + +static bool isIDSExpired(IDState& ids) +{ + auto age = ids.age++; + return age > DownstreamState::s_udpTimeout; +} + +void DownstreamState::handleTimeout(IDState& ids) +{ + /* We mark the state as unused as soon as possible + to limit the risk of racing with the + responder thread. + */ + auto oldDU = ids.du; + + ids.du = nullptr; + handleDOHTimeout(DOHUnitUniquePtr(oldDU, DOHUnit::release)); + oldDU = nullptr; + ids.age = 0; + reuseds++; + --outstanding; + ++g_stats.downstreamTimeouts; // this is an 'actively' discovered timeout + vinfolog("Had a downstream timeout from %s (%s) for query for %s|%s from %s", + remote.toStringWithPort(), getName(), + ids.qname.toLogString(), QType(ids.qtype).toString(), ids.origRemote.toStringWithPort()); + + struct timespec ts; + gettime(&ts); + + struct dnsheader fake; + memset(&fake, 0, sizeof(fake)); + fake.id = ids.origID; + + g_rings.insertResponse(ts, ids.origRemote, ids.qname, ids.qtype, std::numeric_limits::max(), 0, fake, remote, getProtocol()); +} + +void DownstreamState::handleTimeouts() +{ + if (s_randomizeIDs) { + auto map = d_idStatesMap.lock(); + for (auto it = map->begin(); it != map->end(); ) { + auto& ids = it->second; + if (isIDSExpired(ids)) { + handleTimeout(ids); + it = map->erase(it); + continue; + } + ++it; + } + } + else { + for (IDState& ids : idStates) { + int64_t usageIndicator = ids.usageIndicator; + if (IDState::isInUse(usageIndicator) && isIDSExpired(ids)) { + if (!ids.tryMarkUnused(usageIndicator)) { + /* this state has been altered in the meantime, + don't go anywhere near it */ + continue; + } + + handleTimeout(ids); + } + } + } +} + +IDState* DownstreamState::getExistingState(unsigned int stateId) +{ + if (s_randomizeIDs) { + auto map = d_idStatesMap.lock(); + auto it = map->find(stateId); + if (it == map->end()) { + return nullptr; + } + return &it->second; + } + else { + if (stateId >= idStates.size()) { + return nullptr; + } + return &idStates[stateId]; + } +} + +void DownstreamState::releaseState(unsigned int stateId) +{ + if (s_randomizeIDs) { + auto map = d_idStatesMap.lock(); + auto it = map->find(stateId); + if (it == map->end()) { + return; + } + if (it->second.isInUse()) { + return; + } + map->erase(it); + } +} IDState* DownstreamState::getIDState(unsigned int& selectedID, int64_t& generation) { @@ -255,12 +360,21 @@ IDState* DownstreamState::getIDState(unsigned int& selectedID, int64_t& generati up to 5 five times. The last selected one is used even if it was already in use */ size_t remainingAttempts = 5; + auto map = d_idStatesMap.lock(); + + bool done = false; do { - selectedID = dnsdist::getRandomValue(idStates.size()); - ids = &idStates[selectedID]; - remainingAttempts--; + selectedID = dnsdist::getRandomValue(std::numeric_limits::max()); + auto [it, inserted] = map->insert({selectedID, IDState()}); + ids = &it->second; + if (inserted) { + done = true; + } + else { + remainingAttempts--; + } } - while (ids->isInUse() && remainingAttempts > 0); + while (!done && remainingAttempts > 0); } else { selectedID = (idOffset++) % idStates.size(); diff --git a/pdns/dnsdistdist/docs/reference/tuning.rst b/pdns/dnsdistdist/docs/reference/tuning.rst index c8d058fe03..9e2f517d22 100644 --- a/pdns/dnsdistdist/docs/reference/tuning.rst +++ b/pdns/dnsdistdist/docs/reference/tuning.rst @@ -128,9 +128,8 @@ Tuning related functions .. versionadded:: 1.8.0 - Setting this parameter to true (default is false) will randomize the IDs in outgoing UDP queries, at a small performance cost. :func:`setMaxUDPOutstanding` - should be set at its highest possible value (default since 1.4.0) to make that setting fully efficient. This is only useful if the path between dnsdist - and the backend is not trusted and the 'TCP-only', DNS over TLS or DNS over HTTPS transports cannot be used. + Setting this parameter to true (default is false) will randomize the IDs in outgoing UDP queries, at a small performance cost, ignoring the :func:`setMaxUDPOutstanding` + value. This is only useful if the path between dnsdist and the backend is not trusted and the 'TCP-only', DNS over TLS or DNS over HTTPS transports cannot be used. See also :func:`setRandomizedOutgoingSockets`. .. function:: setRandomizedOutgoingSockets(val):