]> git.ipfire.org Git - thirdparty/bind9.git/commitdiff
Allocate DNS send buffers using dedicated per-worker memory arenas
authorArtem Boldariev <artem@boldariev.com>
Thu, 10 Aug 2023 20:08:25 +0000 (23:08 +0300)
committerOndřej Surý <ondrej@isc.org>
Tue, 5 Sep 2023 07:39:41 +0000 (09:39 +0200)
This commit ensures that memory allocations related to DNS send
buffers are routed through dedicated per-worker memory arenas in order
to decrease memory usage on high load caused by TCP-based DNS
transports.

We do that by following jemalloc developers suggestions:

https://github.com/jemalloc/jemalloc/issues/2483#issuecomment-1639019699
https://github.com/jemalloc/jemalloc/issues/2483#issuecomment-1698173849

lib/ns/client.c
lib/ns/include/ns/client.h
lib/ns/server.c

index f13ff04c7669be5f6f550a6f4b45f5362fd01c9e..58d5c4862489c94dda0f9b03b54d2eb72cc41c01 100644 (file)
@@ -334,7 +334,7 @@ client_allocsendbuf(ns_client_t *client, isc_buffer_t *buffer,
 
        if (TCP_CLIENT(client)) {
                INSIST(client->tcpbuf == NULL);
-               client->tcpbuf = isc_mem_get(client->manager->mctx,
+               client->tcpbuf = isc_mem_get(client->manager->send_mctx,
                                             NS_CLIENT_TCP_BUFFER_SIZE);
                client->tcpbuf_size = NS_CLIENT_TCP_BUFFER_SIZE;
                data = client->tcpbuf;
@@ -371,9 +371,9 @@ client_sendpkg(ns_client_t *client, isc_buffer_t *buffer) {
 
        if (isc_buffer_base(buffer) == client->tcpbuf) {
                size_t used = isc_buffer_usedlength(buffer);
-               client->tcpbuf =
-                       isc_mem_creget(client->manager->mctx, client->tcpbuf,
-                                      client->tcpbuf_size, used, sizeof(char));
+               client->tcpbuf = isc_mem_creget(
+                       client->manager->send_mctx, client->tcpbuf,
+                       client->tcpbuf_size, used, sizeof(char));
                client->tcpbuf_size = used;
                r.base = client->tcpbuf;
                r.length = used;
@@ -449,7 +449,7 @@ ns_client_sendraw(ns_client_t *client, dns_message_t *message) {
        return;
 done:
        if (client->tcpbuf != NULL) {
-               isc_mem_put(client->manager->mctx, client->tcpbuf,
+               isc_mem_put(client->manager->send_mctx, client->tcpbuf,
                            client->tcpbuf_size);
        }
 
@@ -733,7 +733,7 @@ renderend:
 
 cleanup:
        if (client->tcpbuf != NULL) {
-               isc_mem_put(client->manager->mctx, client->tcpbuf,
+               isc_mem_put(client->manager->send_mctx, client->tcpbuf,
                            client->tcpbuf_size);
        }
 
@@ -1621,7 +1621,7 @@ ns__client_reset_cb(void *client0) {
 
        ns_client_endrequest(client);
        if (client->tcpbuf != NULL) {
-               isc_mem_put(client->manager->mctx, client->tcpbuf,
+               isc_mem_put(client->manager->send_mctx, client->tcpbuf,
                            client->tcpbuf_size);
        }
 
@@ -1658,7 +1658,8 @@ ns__client_put_cb(void *client0) {
 
        client->magic = 0;
 
-       isc_mem_put(manager->mctx, client->sendbuf, NS_CLIENT_SEND_BUFFER_SIZE);
+       isc_mem_put(manager->send_mctx, client->sendbuf,
+                   NS_CLIENT_SEND_BUFFER_SIZE);
        if (client->opt != NULL) {
                INSIST(dns_rdataset_isassociated(client->opt));
                dns_rdataset_disassociate(client->opt);
@@ -2308,7 +2309,7 @@ ns__client_setup(ns_client_t *client, ns_clientmgr_t *mgr, bool new) {
                dns_message_create(client->manager->mctx,
                                   DNS_MESSAGE_INTENTPARSE, &client->message);
 
-               client->sendbuf = isc_mem_get(client->manager->mctx,
+               client->sendbuf = isc_mem_get(client->manager->send_mctx,
                                              NS_CLIENT_SEND_BUFFER_SIZE);
                /*
                 * Set magic earlier than usual because ns_query_init()
@@ -2356,7 +2357,7 @@ ns__client_setup(ns_client_t *client, ns_clientmgr_t *mgr, bool new) {
 
 cleanup:
        if (client->sendbuf != NULL) {
-               isc_mem_put(client->manager->mctx, client->sendbuf,
+               isc_mem_put(client->manager->send_mctx, client->sendbuf,
                            NS_CLIENT_SEND_BUFFER_SIZE);
        }
 
@@ -2392,6 +2393,8 @@ clientmgr_destroy_cb(void *arg) {
 
        ns_server_detach(&manager->sctx);
 
+       isc_mem_detach(&manager->send_mctx);
+
        isc_mem_putanddetach(&manager->mctx, manager, sizeof(*manager));
 }
 
@@ -2424,6 +2427,61 @@ ns_clientmgr_create(ns_server_t *sctx, isc_loopmgr_t *loopmgr,
        isc_refcount_init(&manager->references, 1);
        ns_server_attach(sctx, &manager->sctx);
 
+       /*
+        * We create specialised per-worker memory context specifically
+        * dedicated and tuned for allocating send buffers as it is a very
+        * common operation. Not doing so may result in excessive memory
+        * use in certain workloads.
+        *
+        * Please see this thread for more details:
+        *
+        * https://github.com/jemalloc/jemalloc/issues/2483
+        *
+        * In particular, this information from the jemalloc developers is
+        * of the most interest:
+        *
+        * https://github.com/jemalloc/jemalloc/issues/2483#issuecomment-1639019699
+        * https://github.com/jemalloc/jemalloc/issues/2483#issuecomment-1698173849
+        *
+        * In essence, we use the following memory management strategy:
+        *
+        * 1. We use a per-worker memory arena for send buffers memory
+        * allocation to reduce lock contention (In reality, we create a
+        * per-client manager arena, but we have one client manager per
+        * worker).
+        *
+        * 2. The automatically created arenas settings remain unchanged
+        * and may be controlled by users (e.g. by setting the
+        * "MALLOC_CONF" variable).
+        *
+        * 3. We attune the arenas to not use dirty pages cache as the
+        * cache would have a poor reuse rate, and that is known to
+        * significantly contribute to excessive memory use.
+        *
+        * 4. There is no strict need for the dirty cache, as there is a
+        * per arena bin for each allocation size, so because we initially
+        * allocate strictly 64K per send buffer (enough for a DNS
+        * message), allocations would get directed to one bin (an "object
+        * pool" or a "slab") maintained within an arena. That is, there
+        * is an object pool already, specifically to optimise for the
+        * case of frequent allocations of objects of the given size. The
+        * object pool should suffice our needs, as we will end up
+        * recycling the objects from there without the need to back it by
+        * an additional layer of dirty pages cache. The dirty pages cache
+        * would have worked better in the case when there are more
+        * allocation bins involved due to a higher reuse rate (the case
+        * of a more "generic" memory management).
+        */
+       isc_mem_create_arena(&manager->send_mctx);
+       isc_mem_setname(manager->send_mctx, "sendbufs");
+       (void)isc_mem_arena_set_dirty_decay_ms(manager->send_mctx, 0);
+       /*
+        * Disable muzzy pages cache too, as versions < 5.2.0 have it
+        * enabled by default. The muzzy pages cache goes right below the
+        * dirty pages cache and backs it.
+        */
+       (void)isc_mem_arena_set_muzzy_decay_ms(manager->send_mctx, 0);
+
        manager->magic = MANAGER_MAGIC;
 
        MTRACE("create");
index 111e107d582b95e8cd2f3ff4391edf4c989cddc7..43cd72d1c8576a5a4f34469ec04c1e82621eb69e 100644 (file)
@@ -144,6 +144,7 @@ struct ns_clientmgr {
        unsigned int magic;
 
        isc_mem_t     *mctx;
+       isc_mem_t     *send_mctx;
        ns_server_t   *sctx;
        isc_refcount_t references;
        uint32_t       tid;
index bd17a9baaa6ad106ee487e8eb0fdf751a211fad7..d853848a7639be2d580942cde6a97572fcbb7dd2 100644 (file)
@@ -55,6 +55,11 @@ ns_server_create(isc_mem_t *mctx, ns_matchview_t matchingview,
 
        isc_mem_attach(mctx, &sctx->mctx);
 
+       /*
+        * See here for more details:
+        * https://github.com/jemalloc/jemalloc/issues/2483
+        */
+
        isc_refcount_init(&sctx->references, 1);
 
        isc_quota_init(&sctx->xfroutquota, 10);