From: Otto Moerbeek Date: Tue, 21 Nov 2023 15:33:21 +0000 (+0100) Subject: Compute the auth response delay we are wiling to accept based on the number of X-Git-Tag: rec-5.1.0-beta1~22^2~8 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=862859394ec38437fc1d571c5a4fdce8e3a139dd;p=thirdparty%2Fpdns.git Compute the auth response delay we are wiling to accept based on the number of mthread slots available. --- diff --git a/pdns/recursordist/pdns_recursor.cc b/pdns/recursordist/pdns_recursor.cc index b360e94dd7..7713869ce6 100644 --- a/pdns/recursordist/pdns_recursor.cc +++ b/pdns/recursordist/pdns_recursor.cc @@ -251,7 +251,7 @@ PacketBuffer GenUDPQueryResponse(const ComboAddress& dest, const string& query) t_fdm->addReadFD(socket.getHandle(), handleGenUDPQueryResponse, pident); PacketBuffer data; - int ret = g_multiTasker->waitEvent(pident, &data, g_networkTimeoutMsec); + int ret = g_multiTasker->waitEvent(pident, &data, authWaitTime(g_multiTasker)); if (ret == 0 || ret == -1) { // timeout t_fdm->removeReadFD(socket.getHandle()); @@ -267,6 +267,21 @@ static void handleUDPServerResponse(int fileDesc, FDMultiplexer::funcparam_t& va thread_local std::unique_ptr t_udpclientsocks; +// If we have plenty of mthreads slot left, use default timeout. +// Othwerwise reduce the timeout to be between g_networkTimeoutMsec/10 and g_networkTimeoutMsec +unsigned int authWaitTime(const std::unique_ptr& mtasker) +{ + const auto max = g_maxMThreads; + const auto current = mtasker->numProcesses(); + const unsigned int cutoff = max / 10; /// if we have less than 10% used, do not reduce auth timeout + if (current < cutoff) { + return g_networkTimeoutMsec; + } + // current is between cutoff and max + const auto avail = max - current; + return std::max(g_networkTimeoutMsec / 10, g_networkTimeoutMsec * avail / (max - cutoff)); +} + /* these two functions are used by LWRes */ LWResult::Result asendto(const void* data, size_t len, int /* flags */, const ComboAddress& toAddress, uint16_t qid, const DNSName& domain, uint16_t qtype, bool ecs, int* fileDesc) @@ -291,11 +306,11 @@ LWResult::Result asendto(const void* data, size_t len, int /* flags */, assert(chain.first->key->domain == pident->domain); // NOLINT // don't chain onto existing chained waiter or a chain already processed if (chain.first->key->fd > -1 && !chain.first->key->closed) { + *fileDesc = -1; // gets used in waitEvent / sendEvent later on if (g_maxChainLength > 0 && chain.first->key->authReqChain.size() >= g_maxChainLength) { return LWResult::Result::OSLimitError; } chain.first->key->authReqChain.insert(qid); // we can chain - *fileDesc = -1; // gets used in waitEvent / sendEvent later on auto maxLength = t_Counters.at(rec::Counter::maxChainLength); if (chain.first->key->authReqChain.size() > maxLength) { t_Counters.at(rec::Counter::maxChainLength) = chain.first->key->authReqChain.size(); @@ -339,7 +354,7 @@ LWResult::Result arecvfrom(PacketBuffer& packet, int /* flags */, const ComboAdd pident->type = qtype; pident->remote = fromAddr; - int ret = g_multiTasker->waitEvent(pident, &packet, g_networkTimeoutMsec, &now); + int ret = g_multiTasker->waitEvent(pident, &packet, authWaitTime(g_multiTasker), &now); len = 0; /* -1 means error, 0 means timeout, 1 means a result from handleUDPServerResponse() which might still be an error */ @@ -2363,7 +2378,7 @@ static string* doProcessUDPQuestion(const std::string& question, const ComboAddr variable = true; } - if (g_multiTasker->numProcesses() > g_maxMThreads) { + if (g_multiTasker->numProcesses() >= g_maxMThreads) { if (!g_quiet) { SLOG(g_log << Logger::Notice << RecThreadInfo::id() << " [" << g_multiTasker->getTid() << "/" << g_multiTasker->numProcesses() << "] DROPPED question from " << source.toStringWithPort() << (source != fromaddr ? " (via " + fromaddr.toStringWithPort() + ")" : "") << ", over capacity" << endl, g_slogudpin->info(Logr::Notice, "Dropped question, over capacity", "source", Logging::Loggable(source), "remote", Logging::Loggable(fromaddr))); diff --git a/pdns/recursordist/rec-main.hh b/pdns/recursordist/rec-main.hh index 701cf82d97..7bec94eb39 100644 --- a/pdns/recursordist/rec-main.hh +++ b/pdns/recursordist/rec-main.hh @@ -624,6 +624,7 @@ string doTraceRegex(FDWrapper file, vector::const_iterator begin, vector extern bool g_luaSettingsInYAML; void startLuaConfigDelayedThreads(const vector& rpzs, uint64_t generation); void activateLuaConfig(LuaConfigItems& lci); +unsigned int authWaitTime(const std::unique_ptr& mtasker); #define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10" #define LOCAL_NETS_INVERSE "!127.0.0.0/8, !10.0.0.0/8, !100.64.0.0/10, !169.254.0.0/16, !192.168.0.0/16, !172.16.0.0/12, !::1/128, !fc00::/7, !fe80::/10" diff --git a/pdns/recursordist/rec-tcp.cc b/pdns/recursordist/rec-tcp.cc index c68d245b49..d391df5322 100644 --- a/pdns/recursordist/rec-tcp.cc +++ b/pdns/recursordist/rec-tcp.cc @@ -681,7 +681,7 @@ void handleNewTCPQuestion(int fileDesc, [[maybe_unused]] FDMultiplexer::funcpara socklen_t addrlen = sizeof(addr); int newsock = accept(fileDesc, reinterpret_cast(&addr), &addrlen); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) if (newsock >= 0) { - if (g_multiTasker->numProcesses() > g_maxMThreads) { + if (g_multiTasker->numProcesses() >= g_maxMThreads) { t_Counters.at(rec::Counter::overCapacityDrops)++; try { closesocket(newsock); @@ -1060,7 +1060,7 @@ LWResult::Result arecvtcp(PacketBuffer& data, const size_t len, shared_ptrlowState TCPIOHandlerStateChange(IOState::Done, state, pident); - int ret = g_multiTasker->waitEvent(pident, &data, g_networkTimeoutMsec); + int ret = g_multiTasker->waitEvent(pident, &data, authWaitTime(g_multiTasker)); TCPLOG(pident->tcpsock, "arecvtcp " << ret << ' ' << data.size() << ' '); if (ret == 0) { TCPLOG(pident->tcpsock, "timeout" << endl); diff --git a/pdns/recursordist/syncres.cc b/pdns/recursordist/syncres.cc index 268a55d850..01ee4cdc4e 100644 --- a/pdns/recursordist/syncres.cc +++ b/pdns/recursordist/syncres.cc @@ -5400,6 +5400,7 @@ bool SyncRes::doResolveAtThisIP(const std::string& prefix, const DNSName& qname, // don't account for resource limits, they are our own fault // And don't throttle when the IP address is on the dontThrottleNetmasks list or the name is part of dontThrottleNames if (resolveret != LWResult::Result::OSLimitError && !chained && !dontThrottle) { + cerr << "THROTTLING !!!!" << remoteIP.toString() << ' ' << int(resolveret) << endl; s_nsSpeeds.lock()->find_or_enter(nsName.empty() ? DNSName(remoteIP.toStringWithPort()) : nsName, d_now).submit(remoteIP, 1000000, d_now); // 1 sec // make sure we don't throttle the root