return 1;
}
+template<class Key, class Val, class Cmp> std::shared_ptr<pdns_ucontext_t> MTasker<Key,Val,Cmp>::getUContext()
+{
+ auto uc = std::make_shared<pdns_ucontext_t>();
+ if (d_cachedStacks.empty()) {
+ uc->uc_stack.resize(d_stacksize + 1);
+ }
+ else {
+ uc->uc_stack = std::move(d_cachedStacks.top());
+ d_cachedStacks.pop();
+ }
+
+ uc->uc_link = &d_kernel; // come back to kernel after dying
+
+#ifdef PDNS_USE_VALGRIND
+ uc->valgrind_id = VALGRIND_STACK_REGISTER(&uc->uc_stack[0],
+ &uc->uc_stack[uc->uc_stack.size()-1]);
+#endif /* PDNS_USE_VALGRIND */
+
+ return uc;
+}
+
//! launches a new thread
/** The kernel can call this to make a new thread, which starts at the function start and gets passed the val void pointer.
\param start Pointer to the function which will form the start of the thread
*/
template<class Key, class Val, class Cmp>void MTasker<Key,Val,Cmp>::makeThread(tfunc_t *start, void* val)
{
- auto uc=std::make_shared<pdns_ucontext_t>();
-
- uc->uc_link = &d_kernel; // come back to kernel after dying
- uc->uc_stack.resize (d_stacksize+1);
-#ifdef PDNS_USE_VALGRIND
- uc->valgrind_id = VALGRIND_STACK_REGISTER(&uc->uc_stack[0],
- &uc->uc_stack[uc->uc_stack.size()-1]);
-#endif /* PDNS_USE_VALGRIND */
+ auto uc = getUContext();
++d_threadsCount;
auto& thread = d_threads[d_maxtid];
d_runQueue.pop();
return true;
}
- if(!d_zombiesQueue.empty()) {
- d_threads.erase(d_zombiesQueue.front());
+ if (!d_zombiesQueue.empty()) {
+ auto zombi = d_zombiesQueue.front();
+ if (d_cachedStacks.size() < d_maxCachedStacks) {
+ auto thread = d_threads.find(zombi);
+ if (thread != d_threads.end()) {
+ d_cachedStacks.push(std::move(thread->second.context->uc_stack));
+ }
+ d_threads.erase(thread);
+ }
+ else {
+ d_threads.erase(zombi);
+ }
--d_threadsCount;
d_zombiesQueue.pop();
return true;
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#pragma once
-#include <stdint.h>
+#include <cstdint>
+#include <ctime>
#include <queue>
-#include <vector>
#include <map>
-#include <time.h>
+#include <memory>
+#include <stack>
+#include <vector>
+
#include <boost/multi_index_container.hpp>
#include <boost/multi_index/ordered_index.hpp>
#include <boost/multi_index/key_extractors.hpp>
#include "namespaces.hh"
#include "misc.hh"
#include "mtasker_context.hh"
-#include <memory>
using namespace ::boost::multi_index;
// #define MTASKERTIMING 1
-struct KeyTag {};
-
//! The main MTasker class
/** The main MTasker class. See the main page for more information.
\tparam EventKey Type of the key with which events are to be identified. Defaults to int.
{
std::shared_ptr<pdns_ucontext_t> context;
std::function<void(void)> start;
- char* startOfStack;
- char* highestStackSeen;
+ const char* startOfStack;
+ const char* highestStackSeen;
#ifdef MTASKERTIMING
CPUTime dt;
unsigned int totTime;
#endif
};
- typedef std::map<int, ThreadInfo> mthreads_t;
+ using pdns_mtasker_stack_t = std::vector<char, lazy_allocator<char>>;
+ using mthreads_t = std::map<int, ThreadInfo>;
+
mthreads_t d_threads;
+ std::stack<pdns_mtasker_stack_t> d_cachedStacks;
size_t d_stacksize;
- size_t d_threadsCount;
- int d_tid;
- int d_maxtid;
+ size_t d_threadsCount{0};
+ size_t d_maxCachedStacks{0};
+ int d_tid{0};
+ int d_maxtid{0};
EventVal d_waitval;
- enum waitstatusenum {Error=-1,TimeOut=0,Answer} d_waitstatus;
+ enum waitstatusenum : int8_t {Error=-1,TimeOut=0,Answer} d_waitstatus;
public:
struct Waiter
struct timeval ttd;
int tid;
};
+ struct KeyTag {};
typedef multi_index_container<
Waiter,
This limit applies solely to the stack, the heap is not limited in any way. If threads need to allocate a lot of data,
the use of new/delete is suggested.
*/
- MTasker(size_t stacksize=16*8192) : d_stacksize(stacksize), d_threadsCount(0), d_tid(0), d_maxtid(0), d_waitstatus(Error)
+ MTasker(size_t stacksize=16*8192, size_t stackCacheSize=0) : d_stacksize(stacksize), d_maxCachedStacks(stackCacheSize), d_waitstatus(Error)
{
initMainStackBounds();
unsigned int getUsec();
private:
+ std::shared_ptr<pdns_ucontext_t> getUContext();
+
EventKey d_eventkey; // for waitEvent, contains exact key it was awoken for
};
#include "mtasker.cc"
.. versionadded:: 4.2.0
The :ref:`setting-distributor-threads` parameter can be used to run more than one distributor thread.
+MTasker and MThreads
+--------------------
+
+PowerDNS Recursor uses a cooperative multitasking in userspace called ``MTasker``, based either on ``boost::context`` if available, or on ``System V ucontexts`` otherwise. For maximum performance, please make sure that your system supports ``boost::context``, as the alternative has been known to be quite slower.
+
+The maximum number of simultaneous MTasker threads, called ``MThreads``, can be tuned via :ref:`setting-max-mthreads`, as the default value of 2048 might not be enough for large-scale installations.
+
+When a ``MThread`` is started, a new stack is dynamically allocated for it on the heap. The size of that stack can be configured via the :ref:`setting-stack-size` parameter, whose default value is 200 kB which should be enough in most cases.
+
+To reduce the cost of allocating a new stack for every query, the recursor can cache a small amount of stacks to make sure that the allocation stays cheap. This can be configured via the :ref:`setting-stack-cache-size` setting. The only trade-off of enabling this cache is a slightly increased memory consumption, at worst equals to the number of stacks specified by :ref:`setting-stack-cache-size` multiplied by the size of one stack, itself specified via :ref:`setting-stack-size`.
+
Performance tips
----------------
If set to non-zero, PowerDNS will assume it is being spoofed after seeing this many answers with the wrong id.
+.. _setting-stack-cache-size:
+
+``stack-cache-size``
+--------------------
+.. versionadded:: 4.9.0
+
+- Integer
+- Default: 100
+
+Maximum number of mthread stacks that can be cached for later reuse, per thread. Caching these stacks reduces the CPU load at the cost of a slightly higher memory usage, each cached stack consuming `stack-size` bytes of memory.
+It makes no sense to cache more stacks than the value of `max-mthreads`, since there will never be more stacks than that in use at a given time.
+
.. _setting-stack-size:
``stack-size``
- Integer
- Default: 200000
-Size of the stack of each mthread.
+Size in bytes of the stack of each mthread.
.. _setting-statistics-interval:
extern __thread size_t t_mainStackSize;
#endif /* HAVE_FIBER_SANITIZER */
-static inline void notifyStackSwitch(void* startOfStack, size_t stackSize)
+static inline void notifyStackSwitch(const void* startOfStack, size_t stackSize)
{
#ifdef HAVE_FIBER_SANITIZER
__sanitizer_start_switch_fiber(nullptr, startOfStack, stackSize);
t_bogusqueryring = std::make_unique<boost::circular_buffer<pair<DNSName, uint16_t>>>();
t_bogusqueryring->set_capacity(ringsize);
}
- MT = std::make_unique<MT_t>(::arg().asNum("stack-size"));
+ MT = std::make_unique<MT_t>(::arg().asNum("stack-size"), ::arg().asNum("stack-cache-size"));
threadInfo.mt = MT.get();
/* start protobuf export threads if needed */
#else
::arg().set("stack-size", "stack size per mthread") = "200000";
#endif
+ ::arg().set("stack-cache-size", "Size of the stack cache, per mthread") = "100";
// This mode forces metrics snap updates and disable root-refresh, to get consistent counters
::arg().setSwitch("devonly-regression-test-mode", "internal use only") = "no";
::arg().set("soa-minimum-ttl", "Don't change") = "0";