From 9ddb314d02ab1072567802152ea68588615e623a Mon Sep 17 00:00:00 2001 From: Remi Gacogne Date: Thu, 5 Jan 2023 12:10:46 +0100 Subject: [PATCH] rec: Implement a small cache of MTasker stack objects --- pdns/mtasker.cc | 44 ++++++++++++++++++++------ pdns/mtasker.hh | 35 ++++++++++++-------- pdns/recursordist/docs/performance.rst | 11 +++++++ pdns/recursordist/docs/settings.rst | 14 +++++++- pdns/recursordist/mtasker_context.hh | 2 +- pdns/recursordist/rec-main.cc | 3 +- 6 files changed, 82 insertions(+), 27 deletions(-) diff --git a/pdns/mtasker.cc b/pdns/mtasker.cc index c02e27781f..82d1bbe875 100644 --- a/pdns/mtasker.cc +++ b/pdns/mtasker.cc @@ -257,6 +257,27 @@ templateint MTasker std::shared_ptr MTasker::getUContext() +{ + auto uc = std::make_shared(); + if (d_cachedStacks.empty()) { + uc->uc_stack.resize(d_stacksize + 1); + } + else { + uc->uc_stack = std::move(d_cachedStacks.top()); + d_cachedStacks.pop(); + } + + uc->uc_link = &d_kernel; // come back to kernel after dying + +#ifdef PDNS_USE_VALGRIND + uc->valgrind_id = VALGRIND_STACK_REGISTER(&uc->uc_stack[0], + &uc->uc_stack[uc->uc_stack.size()-1]); +#endif /* PDNS_USE_VALGRIND */ + + return uc; +} + //! launches a new thread /** The kernel can call this to make a new thread, which starts at the function start and gets passed the val void pointer. \param start Pointer to the function which will form the start of the thread @@ -264,14 +285,7 @@ templateint MTaskervoid MTasker::makeThread(tfunc_t *start, void* val) { - auto uc=std::make_shared(); - - uc->uc_link = &d_kernel; // come back to kernel after dying - uc->uc_stack.resize (d_stacksize+1); -#ifdef PDNS_USE_VALGRIND - uc->valgrind_id = VALGRIND_STACK_REGISTER(&uc->uc_stack[0], - &uc->uc_stack[uc->uc_stack.size()-1]); -#endif /* PDNS_USE_VALGRIND */ + auto uc = getUContext(); ++d_threadsCount; auto& thread = d_threads[d_maxtid]; @@ -317,8 +331,18 @@ templatebool MTasker::schedule(con d_runQueue.pop(); return true; } - if(!d_zombiesQueue.empty()) { - d_threads.erase(d_zombiesQueue.front()); + if (!d_zombiesQueue.empty()) { + auto zombi = d_zombiesQueue.front(); + if (d_cachedStacks.size() < d_maxCachedStacks) { + auto thread = d_threads.find(zombi); + if (thread != d_threads.end()) { + d_cachedStacks.push(std::move(thread->second.context->uc_stack)); + } + d_threads.erase(thread); + } + else { + d_threads.erase(zombi); + } --d_threadsCount; d_zombiesQueue.pop(); return true; diff --git a/pdns/mtasker.hh b/pdns/mtasker.hh index 0097a1e8ad..0733706b9f 100644 --- a/pdns/mtasker.hh +++ b/pdns/mtasker.hh @@ -20,25 +20,25 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #pragma once -#include +#include +#include #include -#include #include -#include +#include +#include +#include + #include #include #include #include "namespaces.hh" #include "misc.hh" #include "mtasker_context.hh" -#include using namespace ::boost::multi_index; // #define MTASKERTIMING 1 -struct KeyTag {}; - //! The main MTasker class /** The main MTasker class. See the main page for more information. \tparam EventKey Type of the key with which events are to be identified. Defaults to int. @@ -57,23 +57,27 @@ private: { std::shared_ptr context; std::function start; - char* startOfStack; - char* highestStackSeen; + const char* startOfStack; + const char* highestStackSeen; #ifdef MTASKERTIMING CPUTime dt; unsigned int totTime; #endif }; - typedef std::map mthreads_t; + using pdns_mtasker_stack_t = std::vector>; + using mthreads_t = std::map; + mthreads_t d_threads; + std::stack d_cachedStacks; size_t d_stacksize; - size_t d_threadsCount; - int d_tid; - int d_maxtid; + size_t d_threadsCount{0}; + size_t d_maxCachedStacks{0}; + int d_tid{0}; + int d_maxtid{0}; EventVal d_waitval; - enum waitstatusenum {Error=-1,TimeOut=0,Answer} d_waitstatus; + enum waitstatusenum : int8_t {Error=-1,TimeOut=0,Answer} d_waitstatus; public: struct Waiter @@ -83,6 +87,7 @@ public: struct timeval ttd; int tid; }; + struct KeyTag {}; typedef multi_index_container< Waiter, @@ -119,7 +124,7 @@ public: This limit applies solely to the stack, the heap is not limited in any way. If threads need to allocate a lot of data, the use of new/delete is suggested. */ - MTasker(size_t stacksize=16*8192) : d_stacksize(stacksize), d_threadsCount(0), d_tid(0), d_maxtid(0), d_waitstatus(Error) + MTasker(size_t stacksize=16*8192, size_t stackCacheSize=0) : d_stacksize(stacksize), d_maxCachedStacks(stackCacheSize), d_waitstatus(Error) { initMainStackBounds(); @@ -141,6 +146,8 @@ public: unsigned int getUsec(); private: + std::shared_ptr getUContext(); + EventKey d_eventkey; // for waitEvent, contains exact key it was awoken for }; #include "mtasker.cc" diff --git a/pdns/recursordist/docs/performance.rst b/pdns/recursordist/docs/performance.rst index 12f00025aa..6e5d246161 100644 --- a/pdns/recursordist/docs/performance.rst +++ b/pdns/recursordist/docs/performance.rst @@ -37,6 +37,17 @@ If ``SO_REUSEPORT`` support is available and :ref:`setting-reuseport` is set to .. versionadded:: 4.2.0 The :ref:`setting-distributor-threads` parameter can be used to run more than one distributor thread. +MTasker and MThreads +-------------------- + +PowerDNS Recursor uses a cooperative multitasking in userspace called ``MTasker``, based either on ``boost::context`` if available, or on ``System V ucontexts`` otherwise. For maximum performance, please make sure that your system supports ``boost::context``, as the alternative has been known to be quite slower. + +The maximum number of simultaneous MTasker threads, called ``MThreads``, can be tuned via :ref:`setting-max-mthreads`, as the default value of 2048 might not be enough for large-scale installations. + +When a ``MThread`` is started, a new stack is dynamically allocated for it on the heap. The size of that stack can be configured via the :ref:`setting-stack-size` parameter, whose default value is 200 kB which should be enough in most cases. + +To reduce the cost of allocating a new stack for every query, the recursor can cache a small amount of stacks to make sure that the allocation stays cheap. This can be configured via the :ref:`setting-stack-cache-size` setting. The only trade-off of enabling this cache is a slightly increased memory consumption, at worst equals to the number of stacks specified by :ref:`setting-stack-cache-size` multiplied by the size of one stack, itself specified via :ref:`setting-stack-size`. + Performance tips ---------------- diff --git a/pdns/recursordist/docs/settings.rst b/pdns/recursordist/docs/settings.rst index aeace308a0..31f0d43adf 100644 --- a/pdns/recursordist/docs/settings.rst +++ b/pdns/recursordist/docs/settings.rst @@ -1971,6 +1971,18 @@ Owner and group can be specified by name, mode is in octal. If set to non-zero, PowerDNS will assume it is being spoofed after seeing this many answers with the wrong id. +.. _setting-stack-cache-size: + +``stack-cache-size`` +-------------------- +.. versionadded:: 4.9.0 + +- Integer +- Default: 100 + +Maximum number of mthread stacks that can be cached for later reuse, per thread. Caching these stacks reduces the CPU load at the cost of a slightly higher memory usage, each cached stack consuming `stack-size` bytes of memory. +It makes no sense to cache more stacks than the value of `max-mthreads`, since there will never be more stacks than that in use at a given time. + .. _setting-stack-size: ``stack-size`` @@ -1978,7 +1990,7 @@ If set to non-zero, PowerDNS will assume it is being spoofed after seeing this m - Integer - Default: 200000 -Size of the stack of each mthread. +Size in bytes of the stack of each mthread. .. _setting-statistics-interval: diff --git a/pdns/recursordist/mtasker_context.hh b/pdns/recursordist/mtasker_context.hh index e03a1709e4..fe8c1757ca 100644 --- a/pdns/recursordist/mtasker_context.hh +++ b/pdns/recursordist/mtasker_context.hh @@ -54,7 +54,7 @@ extern __thread void* t_mainStack; extern __thread size_t t_mainStackSize; #endif /* HAVE_FIBER_SANITIZER */ -static inline void notifyStackSwitch(void* startOfStack, size_t stackSize) +static inline void notifyStackSwitch(const void* startOfStack, size_t stackSize) { #ifdef HAVE_FIBER_SANITIZER __sanitizer_start_switch_fiber(nullptr, startOfStack, stackSize); diff --git a/pdns/recursordist/rec-main.cc b/pdns/recursordist/rec-main.cc index b30f7fa262..ff87da5096 100644 --- a/pdns/recursordist/rec-main.cc +++ b/pdns/recursordist/rec-main.cc @@ -2408,7 +2408,7 @@ static void recursorThread() t_bogusqueryring = std::make_unique>>(); t_bogusqueryring->set_capacity(ringsize); } - MT = std::make_unique(::arg().asNum("stack-size")); + MT = std::make_unique(::arg().asNum("stack-size"), ::arg().asNum("stack-cache-size")); threadInfo.mt = MT.get(); /* start protobuf export threads if needed */ @@ -2601,6 +2601,7 @@ int main(int argc, char** argv) #else ::arg().set("stack-size", "stack size per mthread") = "200000"; #endif + ::arg().set("stack-cache-size", "Size of the stack cache, per mthread") = "100"; // This mode forces metrics snap updates and disable root-refresh, to get consistent counters ::arg().setSwitch("devonly-regression-test-mode", "internal use only") = "no"; ::arg().set("soa-minimum-ttl", "Don't change") = "0"; -- 2.47.2