rec: Implement a small cache of MTasker stack objects

author Remi Gacogne <remi.gacogne@powerdns.com>

Thu, 5 Jan 2023 11:10:46 +0000 (12:10 +0100)

committer Remi Gacogne <remi.gacogne@powerdns.com>

Thu, 5 Jan 2023 11:28:14 +0000 (12:28 +0100)
author Remi Gacogne <remi.gacogne@powerdns.com>
Thu, 5 Jan 2023 11:10:46 +0000 (12:10 +0100)
committer Remi Gacogne <remi.gacogne@powerdns.com>
Thu, 5 Jan 2023 11:28:14 +0000 (12:28 +0100)
diff --git a/pdns/mtasker.cc b/pdns/mtasker.cc

index c02e27781fe0f7ca619046c09e2e5e20147a3778..82d1bbe87547b39652a8bb7b8accc19723a4fdb8 100644 (file)
--- a/pdns/mtasker.cc
+++ b/pdns/mtasker.cc
@@ -257,6 +257,27 @@ template<class EventKey, class EventVal, class Cmp>int MTasker<EventKey,EventVal
    return 1;
  }
  
+template<class Key, class Val, class Cmp> std::shared_ptr<pdns_ucontext_t> MTasker<Key,Val,Cmp>::getUContext()
+{
+  auto uc = std::make_shared<pdns_ucontext_t>();
+  if (d_cachedStacks.empty()) {
+    uc->uc_stack.resize(d_stacksize + 1);
+  }
+  else {
+    uc->uc_stack = std::move(d_cachedStacks.top());
+    d_cachedStacks.pop();
+  }
+
+  uc->uc_link = &d_kernel; // come back to kernel after dying
+
+#ifdef PDNS_USE_VALGRIND
+  uc->valgrind_id = VALGRIND_STACK_REGISTER(&uc->uc_stack[0],
+                                            &uc->uc_stack[uc->uc_stack.size()-1]);
+#endif /* PDNS_USE_VALGRIND */
+
+  return uc;
+}
+
  //! launches a new thread
  /** The kernel can call this to make a new thread, which starts at the function start and gets passed the val void pointer.
      \param start Pointer to the function which will form the start of the thread
@@ -264,14 +285,7 @@ template<class EventKey, class EventVal, class Cmp>int MTasker<EventKey,EventVal
  */
  template<class Key, class Val, class Cmp>void MTasker<Key,Val,Cmp>::makeThread(tfunc_t *start, void* val)
  {
-  auto uc=std::make_shared<pdns_ucontext_t>();
-  
-  uc->uc_link = &d_kernel; // come back to kernel after dying
-  uc->uc_stack.resize (d_stacksize+1);
-#ifdef PDNS_USE_VALGRIND
-  uc->valgrind_id = VALGRIND_STACK_REGISTER(&uc->uc_stack[0],
-                                            &uc->uc_stack[uc->uc_stack.size()-1]);
-#endif /* PDNS_USE_VALGRIND */
+  auto uc = getUContext();
  
    ++d_threadsCount;
    auto& thread = d_threads[d_maxtid];
@@ -317,8 +331,18 @@ template<class Key, class Val, class Cmp>bool MTasker<Key,Val,Cmp>::schedule(con
      d_runQueue.pop();
      return true;
    }
-  if(!d_zombiesQueue.empty()) {
-    d_threads.erase(d_zombiesQueue.front());
+  if (!d_zombiesQueue.empty()) {
+    auto zombi = d_zombiesQueue.front();
+    if (d_cachedStacks.size() < d_maxCachedStacks) {
+      auto thread = d_threads.find(zombi);
+      if (thread != d_threads.end()) {
+        d_cachedStacks.push(std::move(thread->second.context->uc_stack));
+      }
+      d_threads.erase(thread);
+    }
+    else {
+      d_threads.erase(zombi);
+    }
      --d_threadsCount;
      d_zombiesQueue.pop();
      return true;
diff --git a/pdns/mtasker.hh b/pdns/mtasker.hh

index 0097a1e8ad33350deb4e516764e27b602809b7f0..0733706b9f44aa24430759d40a38d228120b07d8 100644 (file)
--- a/pdns/mtasker.hh
+++ b/pdns/mtasker.hh
@@ -20,25 +20,25 @@
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
   */
  #pragma once
-#include <stdint.h>
+#include <cstdint>
+#include <ctime>
  #include <queue>
-#include <vector>
  #include <map>
-#include <time.h>
+#include <memory>
+#include <stack>
+#include <vector>
+
  #include <boost/multi_index_container.hpp>
  #include <boost/multi_index/ordered_index.hpp>
  #include <boost/multi_index/key_extractors.hpp>
  #include "namespaces.hh"
  #include "misc.hh"
  #include "mtasker_context.hh"
-#include <memory>
  
  using namespace ::boost::multi_index;
  
  // #define MTASKERTIMING 1
  
-struct KeyTag {};
-
  //! The main MTasker class    
  /** The main MTasker class. See the main page for more information.
      \tparam EventKey Type of the key with which events are to be identified. Defaults to int.
@@ -57,23 +57,27 @@ private:
    {
         std::shared_ptr<pdns_ucontext_t> context;
         std::function<void(void)> start;
-       char* startOfStack;
-       char* highestStackSeen;
+       const char* startOfStack;
+       const char* highestStackSeen;
  #ifdef MTASKERTIMING
         CPUTime dt;
         unsigned int totTime;
  #endif
    };
  
-  typedef std::map<int, ThreadInfo> mthreads_t;
+  using pdns_mtasker_stack_t = std::vector<char, lazy_allocator<char>>;
+  using mthreads_t = std::map<int, ThreadInfo>;
+
    mthreads_t d_threads;
+  std::stack<pdns_mtasker_stack_t> d_cachedStacks;
    size_t d_stacksize;
-  size_t d_threadsCount;
-  int d_tid;
-  int d_maxtid;
+  size_t d_threadsCount{0};
+  size_t d_maxCachedStacks{0};
+  int d_tid{0};
+  int d_maxtid{0};
  
    EventVal d_waitval;
-  enum waitstatusenum {Error=-1,TimeOut=0,Answer} d_waitstatus;
+  enum waitstatusenum : int8_t {Error=-1,TimeOut=0,Answer} d_waitstatus;
  
  public:
    struct Waiter
@@ -83,6 +87,7 @@ public:
      struct timeval ttd;
      int tid;
    };
+  struct KeyTag {};
  
    typedef multi_index_container<
      Waiter,
@@ -119,7 +124,7 @@ public:
        This limit applies solely to the stack, the heap is not limited in any way. If threads need to allocate a lot of data,
        the use of new/delete is suggested. 
     */
-  MTasker(size_t stacksize=16*8192) : d_stacksize(stacksize), d_threadsCount(0), d_tid(0), d_maxtid(0), d_waitstatus(Error)
+  MTasker(size_t stacksize=16*8192, size_t stackCacheSize=0) : d_stacksize(stacksize), d_maxCachedStacks(stackCacheSize), d_waitstatus(Error)
    {
      initMainStackBounds();
  
@@ -141,6 +146,8 @@ public:
    unsigned int getUsec();
  
  private:
+  std::shared_ptr<pdns_ucontext_t> getUContext();
+
    EventKey d_eventkey;   // for waitEvent, contains exact key it was awoken for
  };
  #include "mtasker.cc"
diff --git a/pdns/recursordist/docs/performance.rst b/pdns/recursordist/docs/performance.rst

index 12f00025aa21a196f8d50f169f53b4c04da62f11..6e5d24616181c8095292ab26dc1fa35651ccac6a 100644 (file)
--- a/pdns/recursordist/docs/performance.rst
+++ b/pdns/recursordist/docs/performance.rst
@@ -37,6 +37,17 @@ If ``SO_REUSEPORT`` support is available and :ref:`setting-reuseport` is set to
  .. versionadded:: 4.2.0
     The :ref:`setting-distributor-threads` parameter can be used to run more than one distributor thread.
  
+MTasker and MThreads
+--------------------
+
+PowerDNS Recursor uses a cooperative multitasking in userspace called ``MTasker``, based either on ``boost::context`` if available, or on ``System V ucontexts`` otherwise. For maximum performance, please make sure that your system supports ``boost::context``, as the alternative has been known to be quite slower.
+
+The maximum number of simultaneous MTasker threads, called ``MThreads``, can be tuned via :ref:`setting-max-mthreads`, as the default value of 2048 might not be enough for large-scale installations.
+
+When a ``MThread`` is started, a new stack is dynamically allocated for it on the heap. The size of that stack can be configured via the :ref:`setting-stack-size` parameter, whose default value is 200 kB which should be enough in most cases.
+
+To reduce the cost of allocating a new stack for every query, the recursor can cache a small amount of stacks to make sure that the allocation stays cheap. This can be configured via the :ref:`setting-stack-cache-size` setting. The only trade-off of enabling this cache is a slightly increased memory consumption, at worst equals to the number of stacks specified by :ref:`setting-stack-cache-size` multiplied by the size of one stack, itself specified via :ref:`setting-stack-size`.
+
  Performance tips
  ----------------
  
diff --git a/pdns/recursordist/docs/settings.rst b/pdns/recursordist/docs/settings.rst

index aeace308a09abd0a04b3500e3fed049f27170fbf..31f0d43adff14e7e47c0e967f446f9ce82f2965c 100644 (file)
--- a/pdns/recursordist/docs/settings.rst
+++ b/pdns/recursordist/docs/settings.rst
@@ -1971,6 +1971,18 @@ Owner and group can be specified by name, mode is in octal.
  
  If set to non-zero, PowerDNS will assume it is being spoofed after seeing this many answers with the wrong id.
  
+.. _setting-stack-cache-size:
+
+``stack-cache-size``
+--------------------
+.. versionadded:: 4.9.0
+
+-  Integer
+-  Default: 100
+
+Maximum number of mthread stacks that can be cached for later reuse, per thread. Caching these stacks reduces the CPU load at the cost of a slightly higher memory usage, each cached stack consuming `stack-size` bytes of memory.
+It makes no sense to cache more stacks than the value of `max-mthreads`, since there will never be more stacks than that in use at a given time.
+
  .. _setting-stack-size:
  
  ``stack-size``
@@ -1978,7 +1990,7 @@ If set to non-zero, PowerDNS will assume it is being spoofed after seeing this m
  -  Integer
  -  Default: 200000
  
-Size of the stack of each mthread.
+Size in bytes of the stack of each mthread.
  
  .. _setting-statistics-interval:
  
diff --git a/pdns/recursordist/mtasker_context.hh b/pdns/recursordist/mtasker_context.hh

index e03a1709e42c6e959d61a8a6f70e48fb87015758..fe8c1757ca9fe8166654dc0ba16522ebfeaf63aa 100644 (file)
--- a/pdns/recursordist/mtasker_context.hh
+++ b/pdns/recursordist/mtasker_context.hh
@@ -54,7 +54,7 @@ extern __thread void* t_mainStack;
  extern __thread size_t t_mainStackSize;
  #endif /* HAVE_FIBER_SANITIZER */
  
-static inline void notifyStackSwitch(void* startOfStack, size_t stackSize)
+static inline void notifyStackSwitch(const void* startOfStack, size_t stackSize)
  {
  #ifdef HAVE_FIBER_SANITIZER
    __sanitizer_start_switch_fiber(nullptr, startOfStack, stackSize);
diff --git a/pdns/recursordist/rec-main.cc b/pdns/recursordist/rec-main.cc

index b30f7fa26266e371781933ccbd3a9d93c61a0c81..ff87da509652e8f41cfce35fb1c652659e30c03b 100644 (file)
--- a/pdns/recursordist/rec-main.cc
+++ b/pdns/recursordist/rec-main.cc
@@ -2408,7 +2408,7 @@ static void recursorThread()
        t_bogusqueryring = std::make_unique<boost::circular_buffer<pair<DNSName, uint16_t>>>();
        t_bogusqueryring->set_capacity(ringsize);
      }
-    MT = std::make_unique<MT_t>(::arg().asNum("stack-size"));
+    MT = std::make_unique<MT_t>(::arg().asNum("stack-size"), ::arg().asNum("stack-cache-size"));
      threadInfo.mt = MT.get();
  
      /* start protobuf export threads if needed */
@@ -2601,6 +2601,7 @@ int main(int argc, char** argv)
  #else
      ::arg().set("stack-size", "stack size per mthread") = "200000";
  #endif
+    ::arg().set("stack-cache-size", "Size of the stack cache, per mthread") = "100";
      // This mode forces metrics snap updates and disable root-refresh, to get consistent counters
      ::arg().setSwitch("devonly-regression-test-mode", "internal use only") = "no";
      ::arg().set("soa-minimum-ttl", "Don't change") = "0";
author	Remi Gacogne <remi.gacogne@powerdns.com>
	Thu, 5 Jan 2023 11:10:46 +0000 (12:10 +0100)
committer	Remi Gacogne <remi.gacogne@powerdns.com>
	Thu, 5 Jan 2023 11:28:14 +0000 (12:28 +0100)
pdns/mtasker.cc		patch \| blob \| blame \| history
pdns/mtasker.hh		patch \| blob \| blame \| history
pdns/recursordist/docs/performance.rst		patch \| blob \| blame \| history
pdns/recursordist/docs/settings.rst		patch \| blob \| blame \| history
pdns/recursordist/mtasker_context.hh		patch \| blob \| blame \| history
pdns/recursordist/rec-main.cc		patch \| blob \| blame \| history