From dada048cbf575ccc3c70305392d28f29ca284680 Mon Sep 17 00:00:00 2001 From: Tobias Stoeckmann Date: Wed, 16 Oct 2024 21:38:04 +0200 Subject: [PATCH] depmod: Write index in pre-order Index files in pre-order have a significant performance improvement for libkmod users. On Arch Linux system, dumping configuration takes 296 read calls in pre-order, compared to 4080 in post-order. Tests on a Raspberry Pi 2 test system have shown an improvement by 9 %. Even writing is faster now. This happens because we must know in advance how many bytes index nodes will consume in the resulting file. Although this code calculates it on the fly and caches the results, saving lseek system calls has a significant positive effect which compensates this extra overhead. On Arch Linux system, writing index takes 6143 lseek calls now, compared to previous 83701, leading to a performance gain of 13 %. Signed-off-by: Tobias Stoeckmann Link: https://github.com/kmod-project/kmod/pull/188 Signed-off-by: Lucas De Marchi --- tools/depmod.c | 128 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 93 insertions(+), 35 deletions(-) diff --git a/tools/depmod.c b/tools/depmod.c index 3bb5a519..435917c0 100644 --- a/tools/depmod.c +++ b/tools/depmod.c @@ -138,6 +138,8 @@ struct index_node { struct index_value *values; unsigned char first; /* range of child nodes */ unsigned char last; + uint32_t size; /* size of node */ + uint32_t total; /* size of node and its children */ struct index_node *children[INDEX_CHILDMAX]; /* indexed by character */ }; @@ -323,26 +325,72 @@ static int index__haschildren(const struct index_node *node) return node->first < INDEX_CHILDMAX; } -/* Recursive post-order traversal +static uint32_t index_get_mask(const struct index_node *node) +{ + uint32_t mask = 0; + + if (index__haschildren(node)) + mask |= INDEX_NODE_CHILDS; + + if (node->prefix[0]) + mask |= INDEX_NODE_PREFIX; + + if (node->values) + mask |= INDEX_NODE_VALUES; + + return mask; +} + +static uint32_t index_calculate_size(struct index_node *node) +{ + node->size = node->total = 0; + + if (index__haschildren(node)) { + int i; + + node->size += 2; /* first + last, unsigned char */ + node->size += (node->last - node->first + 1) * sizeof(uint32_t); + + for (i = node->first; i <= node->last; i++) { + struct index_node *child = node->children[i]; + if (child != NULL) + node->total += index_calculate_size(child); + } + } + + if (node->prefix[0]) + node->size += strlen(node->prefix) + 1; + + if (node->values) { + const struct index_value *v; + + node->size += sizeof(uint32_t); + + for (v = node->values; v != NULL; v = v->next) { + node->size += sizeof(uint32_t); + node->size += strlen(v->value) + 1; + } + } + node->total += node->size; - Pre-order would make for better read-side buffering / readahead / caching. - (post-order means you go backwards in the file as you descend the tree). - However, index reading is already fast enough. - Pre-order is simpler for writing, and depmod is already slow. + return node->total; +} + +/* + * Recursive pre-order traversal + * + * Returns total amount of bytes written, i.e. byte count of node and its children */ -static uint32_t index_write__node(const struct index_node *node, FILE *out) +static uint32_t index_write__node(const struct index_node *node, FILE *out, + uint32_t offset) { uint32_t *child_offs = NULL; int child_count = 0; - long offset; - if (!node) - return 0; - - /* Write children and save their offsets */ + /* Calculate children offsets */ if (index__haschildren(node)) { - const struct index_node *child; int i; + size_t sizes = 0; child_count = node->last - node->first + 1; child_offs = malloc(child_count * sizeof(uint32_t)); @@ -350,25 +398,30 @@ static uint32_t index_write__node(const struct index_node *node, FILE *out) fatal_oom(); for (i = 0; i < child_count; i++) { + struct index_node *child; + child = node->children[node->first + i]; - child_offs[i] = htonl(index_write__node(child, out)); + if (child == NULL) { + child_offs[i] = 0; + } else { + uint32_t mask = index_get_mask(child); + child_offs[i] = + htonl((offset + node->size + sizes) | mask); + sizes += child->total; + } } } - /* Now write this node */ - offset = ftell(out); - + /* Write this node */ if (node->prefix[0]) { fputs(node->prefix, out); fputc('\0', out); - offset |= INDEX_NODE_PREFIX; } if (child_count) { fputc(node->first, out); fputc(node->last, out); fwrite(child_offs, sizeof(uint32_t), child_count, out); - offset |= INDEX_NODE_CHILDS; } free(child_offs); @@ -390,37 +443,42 @@ static uint32_t index_write__node(const struct index_node *node, FILE *out) fputs(v->value, out); fputc('\0', out); } - offset |= INDEX_NODE_VALUES; } - return offset; + /* Now write children */ + if (child_count) { + int i; + + offset += node->size; + for (i = 0; i < child_count; i++) { + struct index_node *child = node->children[node->first + i]; + if (child != NULL) + offset += index_write__node(child, out, offset); + } + } + + return node->total; } -static void index_write(const struct index_node *node, FILE *out) +static void index_write(struct index_node *node, FILE *out) { - long initial_offset, final_offset; + /* First 3 words are magic, index, offset of node */ + const uint32_t first_off = 3 * sizeof(uint32_t); uint32_t u; + index_calculate_size(node); + u = htonl(INDEX_MAGIC); fwrite(&u, sizeof(u), 1, out); u = htonl(INDEX_VERSION); fwrite(&u, sizeof(u), 1, out); - /* Second word is reserved for the offset of the root node */ - initial_offset = ftell(out); - assert(initial_offset >= 0); - u = 0; - fwrite(&u, sizeof(uint32_t), 1, out); + /* Write offset of first node */ + u = htonl(first_off | index_get_mask(node)); + fwrite(&u, sizeof(u), 1, out); /* Dump trie */ - u = htonl(index_write__node(node, out)); - - /* Update first word */ - final_offset = ftell(out); - assert(final_offset >= 0); - (void)fseek(out, initial_offset, SEEK_SET); - fwrite(&u, sizeof(uint32_t), 1, out); - (void)fseek(out, final_offset, SEEK_SET); + index_write__node(node, out, first_off); } /* configuration parsing **********************************************/ -- 2.47.2