From: alessio Date: Sun, 9 Mar 2025 08:13:16 +0000 (+0100) Subject: Adaptive memory allocation strategy for qp-tries X-Git-Tag: v9.21.9~41^2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=70b1777d8aef75da1b184fe8155dc818ce66628a;p=thirdparty%2Fbind9.git Adaptive memory allocation strategy for qp-tries qp-tries allocate their nodes (twigs) in chunks to reduce allocator pressure and improve memory locality. The choice of chunk size presents a tradeoff: larger chunks benefit qp-tries with many values (as seen in large zones and resolvers) but waste memory in smaller use cases. Previously, our fixed chunk size of 2^10 twigs meant that even an empty qp-trie would consume 12KB of memory, while reducing this size would negatively impact resolver performance. This commit implements an adaptive chunking strategy that: - Tracks the size of the most recently allocated chunk. - Doubles the chunk size for each new allocation until reaching a predefined maximum. This approach effectively balances memory efficiency for small tries while maintaining the performance benefits of larger chunk sizes for bigger data structures. This commit also splits the callback freeing qpmultis into two phases, one that frees the underlying qptree, and one that reclaims the qpmulti memory. In order to prevent races between the qpmulti destructor and chunk garbage collection jobs, the second phase is protected by reference counting. --- diff --git a/lib/dns/include/dns/qp.h b/lib/dns/include/dns/qp.h index 16eca977d99..d72730c80ac 100644 --- a/lib/dns/include/dns/qp.h +++ b/lib/dns/include/dns/qp.h @@ -89,6 +89,12 @@ #include #include +/*% + * How many bytes a qp-trie might allocate as part of an insert. Needed for + * overmem checks. + */ +#define QP_SAFETY_MARGIN ((1ul << 12ul) * 12) + /*% * A `dns_qp_t` supports single-threaded read/write access. */ @@ -306,7 +312,6 @@ typedef struct dns_qp_memusage { size_t hold; /*%< nodes retained for readers */ size_t free; /*%< nodes to be reclaimed */ size_t node_size; /*%< in bytes */ - size_t chunk_size; /*%< nodes per chunk */ size_t chunk_count; /*%< allocated chunks */ size_t bytes; /*%< total memory in chunks and metadata */ bool fragmented; /*%< trie needs compaction */ diff --git a/lib/dns/qp.c b/lib/dns/qp.c index 79a0b8e6eeb..afed2a7fcf6 100644 --- a/lib/dns/qp.c +++ b/lib/dns/qp.c @@ -96,6 +96,19 @@ static atomic_uint_fast64_t rollback_time; #define TRACE(...) #endif +#if DNS_QPMULTI_TRACE +ISC_REFCOUNT_STATIC_TRACE_DECL(dns_qpmulti); +#define dns_qpmulti_ref(ptr) dns_qpmulti__ref(ptr, __func__, __FILE__, __LINE__) +#define dns_qpmulti_unref(ptr) \ + dns_qpmulti__unref(ptr, __func__, __FILE__, __LINE__) +#define dns_qpmulti_attach(ptr, ptrp) \ + dns_qpmulti__attach(ptr, ptrp, __func__, __FILE__, __LINE__) +#define dns_qpmulti_detach(ptrp) \ + dns_qpmulti__detach(ptrp, __func__, __FILE__, __LINE__) +#else +ISC_REFCOUNT_STATIC_DECL(dns_qpmulti); +#endif + /*********************************************************************** * * converting DNS names to trie keys @@ -425,7 +438,7 @@ write_protect(dns_qp_t *qp, dns_qpchunk_t chunk) { #else -#define chunk_get_raw(qp) isc_mem_allocate(qp->mctx, QP_CHUNK_BYTES) +#define chunk_get_raw(qp, size) isc_mem_allocate(qp->mctx, size) #define chunk_free_raw(qp, ptr) isc_mem_free(qp->mctx, ptr) #define chunk_shrink_raw(qp, ptr, size) isc_mem_reallocate(qp->mctx, ptr, size) @@ -454,6 +467,22 @@ cells_immutable(dns_qp_t *qp, dns_qpref_t ref) { } } +/* + * Find the next power that is both bigger than size and prev_capacity, + * but still within the chunk min and max sizes. + */ +static dns_qpcell_t +next_capacity(uint32_t prev_capacity, uint32_t size) { + /* + * Unfortunately builtin_clz is undefined for 0. We work around this + * issue by flooring the request size at 2. + */ + size = ISC_MAX3(size, prev_capacity, 2u); + uint32_t log2 = 32u - __builtin_clz(size - 1u); + + return 1U << ISC_CLAMP(log2, QP_CHUNK_LOG_MIN, QP_CHUNK_LOG_MAX); +} + /* * Create a fresh bump chunk and allocate some twigs from it. */ @@ -462,9 +491,15 @@ chunk_alloc(dns_qp_t *qp, dns_qpchunk_t chunk, dns_qpweight_t size) { INSIST(qp->base->ptr[chunk] == NULL); INSIST(qp->usage[chunk].used == 0); INSIST(qp->usage[chunk].free == 0); + INSIST(qp->chunk_capacity <= QP_CHUNK_SIZE); - qp->base->ptr[chunk] = chunk_get_raw(qp); - qp->usage[chunk] = (qp_usage_t){ .exists = true, .used = size }; + qp->chunk_capacity = next_capacity(qp->chunk_capacity * 2u, size); + qp->base->ptr[chunk] = + chunk_get_raw(qp, qp->chunk_capacity * sizeof(dns_qpnode_t)); + + qp->usage[chunk] = (qp_usage_t){ .exists = true, + .used = size, + .capacity = qp->chunk_capacity }; qp->used_count += size; qp->bump = chunk; qp->fender = 0; @@ -544,7 +579,7 @@ alloc_twigs(dns_qp_t *qp, dns_qpweight_t size) { dns_qpchunk_t chunk = qp->bump; dns_qpcell_t cell = qp->usage[chunk].used; - if (cell + size <= QP_CHUNK_SIZE) { + if (cell + size <= qp->usage[chunk].capacity) { qp->usage[chunk].used += size; qp->used_count += size; return make_ref(chunk, cell); @@ -697,38 +732,47 @@ reclaim_chunks_cb(struct rcu_head *arg) { REQUIRE(QPMULTI_VALID(multi)); LOCK(&multi->mutex); - dns_qp_t *qp = &multi->writer; - REQUIRE(QP_VALID(qp)); - - unsigned int free = 0; - isc_nanosecs_t start = isc_time_monotonic(); - for (unsigned int i = 0; i < rcuctx->count; i++) { - dns_qpchunk_t chunk = rcuctx->chunk[i]; - if (qp->usage[chunk].snapshot) { - /* cleanup when snapshot is destroyed */ - qp->usage[chunk].snapfree = true; - } else { - chunk_free(qp, chunk); - free++; - } - } + /* + * If chunk_max is zero, chunks have already been freed. + */ + if (qp->chunk_max != 0) { + unsigned int free = 0; + isc_nanosecs_t start = isc_time_monotonic(); - isc_mem_putanddetach(&rcuctx->mctx, rcuctx, - STRUCT_FLEX_SIZE(rcuctx, chunk, rcuctx->count)); + INSIST(QP_VALID(qp)); - isc_nanosecs_t time = isc_time_monotonic() - start; - recycle_time += time; + for (unsigned int i = 0; i < rcuctx->count; i++) { + dns_qpchunk_t chunk = rcuctx->chunk[i]; + if (qp->usage[chunk].snapshot) { + /* clean up when snapshot is destroyed */ + qp->usage[chunk].snapfree = true; + } else { + chunk_free(qp, chunk); + free++; + } + } - if (free > 0) { - LOG_STATS("qp reclaim" PRItime "free %u chunks", time, free); - LOG_STATS("qp reclaim leaf %u live %u used %u free %u hold %u", - qp->leaf_count, qp->used_count - qp->free_count, - qp->used_count, qp->free_count, qp->hold_count); + isc_nanosecs_t time = isc_time_monotonic() - start; + recycle_time += time; + + if (free > 0) { + LOG_STATS("qp reclaim" PRItime "free %u chunks", time, + free); + LOG_STATS( + "qp reclaim leaf %u live %u used %u free %u " + "hold %u", + qp->leaf_count, qp->used_count - qp->free_count, + qp->used_count, qp->free_count, qp->hold_count); + } } UNLOCK(&multi->mutex); + + dns_qpmulti_detach(&multi); + isc_mem_putanddetach(&rcuctx->mctx, rcuctx, + STRUCT_FLEX_SIZE(rcuctx, chunk, rcuctx->count)); } /* @@ -773,6 +817,11 @@ reclaim_chunks(dns_qpmulti_t *multi) { } } + /* + * Reference the qpmulti object to keep it from being + * freed until reclaim_chunks_cb() runs. + */ + dns_qpmulti_ref(multi); call_rcu(&rcuctx->rcu_head, reclaim_chunks_cb); LOG_STATS("qp will reclaim %u chunks", count); @@ -1023,12 +1072,13 @@ dns_qp_memusage(dns_qp_t *qp) { .hold = qp->hold_count, .free = qp->free_count, .node_size = sizeof(dns_qpnode_t), - .chunk_size = QP_CHUNK_SIZE, .fragmented = QP_NEEDGC(qp), }; + size_t chunk_usage_bytes = 0; for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) { if (qp->base->ptr[chunk] != NULL) { + chunk_usage_bytes += qp->usage[chunk].capacity; memusage.chunk_count += 1; } } @@ -1037,7 +1087,7 @@ dns_qp_memusage(dns_qp_t *qp) { * XXXFANF does not subtract chunks that have been shrunk, * and does not count unreclaimed dns_qpbase_t objects */ - memusage.bytes = memusage.chunk_count * QP_CHUNK_BYTES + + memusage.bytes = chunk_usage_bytes + qp->chunk_max * sizeof(qp->base->ptr[0]) + qp->chunk_max * sizeof(qp->usage[0]); @@ -1055,7 +1105,7 @@ dns_qpmulti_memusage(dns_qpmulti_t *multi) { dns_qp_memusage_t memusage = dns_qp_memusage(qp); if (qp->transaction_mode == QP_UPDATE) { - memusage.bytes -= QP_CHUNK_BYTES; + memusage.bytes -= qp->usage[qp->bump].capacity; memusage.bytes += qp->usage[qp->bump].used * sizeof(dns_qpnode_t); } @@ -1440,12 +1490,12 @@ dns_qpmulti_create(isc_mem_t *mctx, const dns_qpmethods_t *methods, void *uctx, REQUIRE(qpmp != NULL && *qpmp == NULL); dns_qpmulti_t *multi = isc_mem_get(mctx, sizeof(*multi)); - *multi = (dns_qpmulti_t){ - .magic = QPMULTI_MAGIC, - .reader_ref = INVALID_REF, - }; + *multi = (dns_qpmulti_t){ .magic = QPMULTI_MAGIC, + .reader_ref = INVALID_REF, + .references = ISC_REFCOUNT_INITIALIZER(1) }; isc_mutex_init(&multi->mutex); ISC_LIST_INIT(multi->snapshots); + /* * Do not waste effort allocating a bump chunk that will be thrown * away when a transaction is opened. dns_qpmulti_update() always @@ -1465,11 +1515,13 @@ destroy_guts(dns_qp_t *qp) { if (qp->chunk_max == 0) { return; } + for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) { if (qp->base->ptr[chunk] != NULL) { chunk_free(qp, chunk); } } + qp->chunk_max = 0; ENSURE(qp->used_count == 0); ENSURE(qp->free_count == 0); ENSURE(isc_refcount_current(&qp->base->refcount) == 1); @@ -1495,7 +1547,26 @@ dns_qp_destroy(dns_qp_t **qptp) { } static void -qpmulti_destroy_cb(struct rcu_head *arg) { +qpmulti_free_mem(dns_qpmulti_t *multi) { + REQUIRE(QPMULTI_VALID(multi)); + + /* reassure thread sanitizer */ + LOCK(&multi->mutex); + dns_qp_t *qp = &multi->writer; + UNLOCK(&multi->mutex); + + isc_mutex_destroy(&multi->mutex); + isc_mem_putanddetach(&qp->mctx, multi, sizeof(*multi)); +} + +#if QPMULTI_TRACE +ISC_REFCOUNT_STATIC_TRACE_IMPL(dns_qpmulti, qpmulti_free_mem) +#else +ISC_REFCOUNT_STATIC_IMPL(dns_qpmulti, qpmulti_free_mem) +#endif + +static void +qpmulti_destroy_guts_cb(struct rcu_head *arg) { qp_rcuctx_t *rcuctx = caa_container_of(arg, qp_rcuctx_t, rcu_head); REQUIRE(QPRCU_VALID(rcuctx)); /* only nonzero for reclaim_chunks_cb() */ @@ -1514,10 +1585,9 @@ qpmulti_destroy_cb(struct rcu_head *arg) { UNLOCK(&multi->mutex); - isc_mutex_destroy(&multi->mutex); + dns_qpmulti_detach(&multi); isc_mem_putanddetach(&rcuctx->mctx, rcuctx, STRUCT_FLEX_SIZE(rcuctx, chunk, rcuctx->count)); - isc_mem_putanddetach(&qp->mctx, multi, sizeof(*multi)); } void @@ -1543,7 +1613,7 @@ dns_qpmulti_destroy(dns_qpmulti_t **qpmp) { .multi = multi, }; isc_mem_attach(qp->mctx, &rcuctx->mctx); - call_rcu(&rcuctx->rcu_head, qpmulti_destroy_cb); + call_rcu(&rcuctx->rcu_head, qpmulti_destroy_guts_cb); } /*********************************************************************** diff --git a/lib/dns/qp_p.h b/lib/dns/qp_p.h index 0206c5fd56b..add3bdeaeeb 100644 --- a/lib/dns/qp_p.h +++ b/lib/dns/qp_p.h @@ -19,6 +19,8 @@ #pragma once +#include + #include /*********************************************************************** @@ -141,22 +143,29 @@ enum { * size to make the allocator work harder. */ #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -#define QP_CHUNK_LOG 7 +#define QP_CHUNK_LOG_MIN 6 +#define QP_CHUNK_LOG_MAX 7 #else -#define QP_CHUNK_LOG 10 +#define QP_CHUNK_LOG_MIN 6 +#define QP_CHUNK_LOG_MAX 10 #endif -STATIC_ASSERT(6 <= QP_CHUNK_LOG && QP_CHUNK_LOG <= 20, - "qp-trie chunk size is unreasonable"); +STATIC_ASSERT(6 <= QP_CHUNK_LOG_MIN && QP_CHUNK_LOG_MIN <= QP_CHUNK_LOG_MAX, + "qp-trie min chunk size is unreasonable"); +STATIC_ASSERT(6 <= QP_CHUNK_LOG_MAX && QP_CHUNK_LOG_MAX <= 20, + "qp-trie max chunk size is unreasonable"); -#define QP_CHUNK_SIZE (1U << QP_CHUNK_LOG) +#define QP_CHUNK_SIZE (1U << QP_CHUNK_LOG_MAX) #define QP_CHUNK_BYTES (QP_CHUNK_SIZE * sizeof(dns_qpnode_t)) +STATIC_ASSERT(QP_SAFETY_MARGIN >= QP_CHUNK_BYTES, + "qp-trie safety margin too small"); + /* * We need a bitfield this big to count how much of a chunk is in use: - * it needs to count from 0 up to and including `1 << QP_CHUNK_LOG`. + * it needs to count from 0 up to and including `1 << QP_CHUNK_LOG_MAX`. */ -#define QP_USAGE_BITS (QP_CHUNK_LOG + 1) +#define QP_USAGE_BITS (QP_CHUNK_LOG_MAX + 1) /* * A chunk needs to be compacted if it is less full than this threshold. @@ -268,6 +277,8 @@ ref_cell(dns_qpref_t ref) { typedef struct qp_usage { /*% the allocation point, increases monotonically */ dns_qpcell_t used : QP_USAGE_BITS; + /*% the actual size of the allocation */ + dns_qpcell_t capacity : QP_USAGE_BITS; /*% count of nodes no longer needed, also monotonic */ dns_qpcell_t free : QP_USAGE_BITS; /*% qp->base->ptr[chunk] != NULL */ @@ -322,6 +333,7 @@ typedef struct qp_rcuctx { struct rcu_head rcu_head; isc_mem_t *mctx; dns_qpmulti_t *multi; + ISC_LINK(struct qp_rcuctx) link; dns_qpchunk_t count; dns_qpchunk_t chunk[]; } qp_rcuctx_t; @@ -479,6 +491,8 @@ struct dns_qp { dns_qpcell_t used_count, free_count; /*% free cells that cannot be recovered right now */ dns_qpcell_t hold_count; + /*% capacity of last allocated chunk, for exponential chunk growth */ + dns_qpcell_t chunk_capacity; /*% what kind of transaction was most recently started [MT] */ enum { QP_NONE, QP_WRITE, QP_UPDATE } transaction_mode : 2; /*% compact the entire trie [MT] */ @@ -523,6 +537,8 @@ struct dns_qpmulti { dns_qp_t *rollback; /*% all snapshots of this trie */ ISC_LIST(dns_qpsnap_t) snapshots; + /*% refcount for memory reclamation */ + isc_refcount_t references; }; /*********************************************************************** diff --git a/lib/dns/qpcache.c b/lib/dns/qpcache.c index 032dc47a1ae..40b7b206c7d 100644 --- a/lib/dns/qpcache.c +++ b/lib/dns/qpcache.c @@ -519,7 +519,7 @@ qpcache_miss(qpcache_t *qpdb, dns_slabheader_t *newheader, size_t purgesize = 2 * (sizeof(qpcnode_t) + dns_name_size(&HEADERNODE(newheader)->name)) + - rdataset_size(newheader) + 12288; + rdataset_size(newheader) + QP_SAFETY_MARGIN; expire_lru_headers(qpdb, idx, purgesize, nlocktypep, tlocktypep DNS__DB_FLARG_PASS); diff --git a/lib/isc/include/isc/util.h b/lib/isc/include/isc/util.h index 9fed22df60c..971f21dc1ee 100644 --- a/lib/isc/include/isc/util.h +++ b/lib/isc/include/isc/util.h @@ -69,6 +69,8 @@ #define ISC_CLAMP(v, x, y) ((v) < (x) ? (x) : ((v) > (y) ? (y) : (v))) +#define ISC_MAX3(a, b, c) ISC_MAX(ISC_MAX((a), (b)), (c)) + /*% * The UNCONST() macro can be used to omit warnings produced by certain * compilers when operating with pointers declared with the const type qual- diff --git a/lib/isc/mem.c b/lib/isc/mem.c index ff34485ae0e..c1b0ab7331c 100644 --- a/lib/isc/mem.c +++ b/lib/isc/mem.c @@ -434,9 +434,10 @@ isc__mem_initialize(void) { void isc__mem_shutdown(void) { - /* should be called after an rcu_barrier() */ bool empty; + rcu_barrier(); + isc__mem_checkdestroyed(); LOCK(&contextslock);