From: Willy Tarreau Date: Sun, 26 Nov 2023 10:56:08 +0000 (+0100) Subject: OPTIM: pattern: save memory and time using ebst instead of ebis X-Git-Tag: v2.9-dev12~82 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3ac9912837118a81f3291e106ce9a7c4be6c934f;p=thirdparty%2Fhaproxy.git OPTIM: pattern: save memory and time using ebst instead of ebis In the pat_ref_elt struct, the pattern string is stored outside of the node element, using a pointer to an strdup(). Not only this needlessly wastes at least 16-24 bytes per entry (8 for the pointer, 8-16 for the allocator), it also makes the tree descent less efficient since both the node and the string have to be visited for each layer (hence at least two cache lines). Let's use an ebmb storage and place the pattern right at the end of the pat_ref_elt, making it a variable-sized element instead. The set-map test below jumps from 173 to 182 kreq/s/core, and the memory usage drops from 356 MB to 324 MB: http-request set-map(/dev/null) %[rand(1000000)] 1 This is even more visible with large maps: after loading 16M IP addresses into a map, the process uses this amount of memory: - 3.15 GB with haproxy-2.8 - 4.21 GB with haproxy-2.9-dev11 - 3.68 GB with this patch So that's a net saving of 32 bytes per entry here, which cuts in half the extra cost of the tree, and loading a large map takes about 20% less time. --- diff --git a/include/haproxy/pattern-t.h b/include/haproxy/pattern-t.h index 7d0699bf8d..6c1ba2407f 100644 --- a/include/haproxy/pattern-t.h +++ b/include/haproxy/pattern-t.h @@ -104,7 +104,7 @@ struct pat_ref { char *reference; /* The reference name. */ char *display; /* String displayed to identify the pattern origin. */ struct list head; /* The head of the list of struct pat_ref_elt. */ - struct eb_root ebpt_root; /* The tree where pattern reference elements are attached. */ + struct eb_root ebmb_root; /* The tree where pattern reference elements are attached. */ struct list pat; /* The head of the list of struct pattern_expr. */ unsigned int flags; /* flags PAT_REF_*. */ unsigned int curr_gen; /* current generation number (anything below can be removed) */ @@ -118,18 +118,18 @@ struct pat_ref { /* This is a part of struct pat_ref. Each entry contains one pattern and one * associated value as original string. All derivative forms (via exprs) are - * accessed from list_head or tree_head. + * accessed from list_head or tree_head. Be careful, it's variable-sized! */ struct pat_ref_elt { struct list list; /* Used to chain elements. */ - struct ebpt_node node; /* Node to attach this element to its ebtree. */ struct list back_refs; /* list of users tracking this pat ref */ void *list_head; /* all &pattern_list->from_ref derived from this reference, ends with NULL */ void *tree_head; /* all &pattern_tree->from_ref derived from this reference, ends with NULL */ - char *pattern; char *sample; unsigned int gen_id; /* generation of pat_ref this was made for */ int line; + struct ebmb_node node; /* Node to attach this element to its ebtree. */ + const char pattern[0]; // const only to make sure nobody tries to free it. }; /* This contain each tree indexed entry. This struct permit to associate diff --git a/src/pattern.c b/src/pattern.c index 0d3ed60a4d..1dc4f53258 100644 --- a/src/pattern.c +++ b/src/pattern.c @@ -1597,9 +1597,8 @@ void pat_ref_delete_by_ptr(struct pat_ref *ref, struct pat_ref_elt *elt) HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock); LIST_DELETE(&elt->list); - ebpt_delete(&elt->node); + ebmb_delete(&elt->node); free(elt->sample); - free(elt->pattern); free(elt); } @@ -1611,7 +1610,7 @@ int pat_ref_delete_by_id(struct pat_ref *ref, struct pat_ref_elt *refelt) { int ret = !!refelt->node.node.leaf_p; - ebpt_delete(&refelt->node); + ebmb_delete(&refelt->node); return ret; } @@ -1622,16 +1621,16 @@ int pat_ref_delete_by_id(struct pat_ref *ref, struct pat_ref_elt *refelt) */ int pat_ref_delete(struct pat_ref *ref, const char *key) { - struct ebpt_node *node; + struct ebmb_node *node; int found = 0; /* delete pattern from reference */ - node = ebis_lookup(&ref->ebpt_root, key); + node = ebst_lookup(&ref->ebmb_root, key); while (node) { struct pat_ref_elt *elt; - elt = ebpt_entry(node, struct pat_ref_elt, node); - node = ebpt_next_dup(node); + elt = ebmb_entry(node, struct pat_ref_elt, node); + node = ebmb_next_dup(node); pat_ref_delete_by_ptr(ref, elt); found = 1; } @@ -1645,11 +1644,11 @@ int pat_ref_delete(struct pat_ref *ref, const char *key) */ struct pat_ref_elt *pat_ref_find_elt(struct pat_ref *ref, const char *key) { - struct ebpt_node *node; + struct ebmb_node *node; - node = ebis_lookup(&ref->ebpt_root, key); + node = ebst_lookup(&ref->ebmb_root, key); if (node) - return ebpt_entry(node, struct pat_ref_elt, node); + return ebmb_entry(node, struct pat_ref_elt, node); return NULL; } @@ -1757,7 +1756,7 @@ int pat_ref_set(struct pat_ref *ref, const char *key, const char *value, char ** int found = 0; char *_merr; char **merr; - struct ebpt_node *node; + struct ebmb_node *node; if (err) { merr = &_merr; @@ -1771,12 +1770,12 @@ int pat_ref_set(struct pat_ref *ref, const char *key, const char *value, char ** } else { /* Look for pattern in the reference. */ - node = ebis_lookup(&ref->ebpt_root, key); + node = ebst_lookup(&ref->ebmb_root, key); } while (node) { - elt = ebpt_entry(node, struct pat_ref_elt, node); - node = ebpt_next_dup(node); + elt = ebmb_entry(node, struct pat_ref_elt, node); + node = ebmb_next_dup(node); if (!pat_ref_set_elt(ref, elt, value, merr)) { if (err && merr) { if (!found) { @@ -1832,7 +1831,7 @@ struct pat_ref *pat_ref_new(const char *reference, const char *display, unsigned ref->entry_cnt = 0; LIST_INIT(&ref->head); - ref->ebpt_root = EB_ROOT; + ref->ebmb_root = EB_ROOT; LIST_INIT(&ref->pat); HA_RWLOCK_INIT(&ref->lock); LIST_APPEND(&pattern_reference, &ref->list); @@ -1869,7 +1868,7 @@ struct pat_ref *pat_ref_newid(int unique_id, const char *display, unsigned int f ref->next_gen = 0; ref->unique_id = unique_id; LIST_INIT(&ref->head); - ref->ebpt_root = EB_ROOT; + ref->ebmb_root = EB_ROOT; LIST_INIT(&ref->pat); HA_RWLOCK_INIT(&ref->lock); LIST_APPEND(&pattern_reference, &ref->list); @@ -1885,17 +1884,16 @@ struct pat_ref *pat_ref_newid(int unique_id, const char *display, unsigned int f struct pat_ref_elt *pat_ref_append(struct pat_ref *ref, const char *pattern, const char *sample, int line) { struct pat_ref_elt *elt; + int len = strlen(pattern); - elt = calloc(1, sizeof(*elt)); + elt = calloc(1, sizeof(*elt) + len + 1); if (!elt) goto fail; elt->gen_id = ref->curr_gen; elt->line = line; - elt->pattern = strdup(pattern); - if (!elt->pattern) - goto fail; + memcpy((char*)elt->pattern, pattern, len + 1); if (sample) { elt->sample = strdup(sample); @@ -1909,12 +1907,9 @@ struct pat_ref_elt *pat_ref_append(struct pat_ref *ref, const char *pattern, con LIST_APPEND(&ref->head, &elt->list); /* Even if calloc()'ed, ensure this node is not linked to a tree. */ elt->node.node.leaf_p = NULL; - elt->node.key = elt->pattern; - ebis_insert(&ref->ebpt_root, &elt->node); + ebst_insert(&ref->ebmb_root, &elt->node); return elt; fail: - if (elt) - free(elt->pattern); free(elt); return NULL; } @@ -2081,8 +2076,7 @@ int pat_ref_purge_range(struct pat_ref *ref, uint from, uint to, int budget) pat_delete_gen(ref, elt); LIST_DELETE(&elt->list); - ebpt_delete(&elt->node); - free(elt->pattern); + ebmb_delete(&elt->node); free(elt->sample); free(elt); } @@ -2378,7 +2372,7 @@ int pattern_read_from_file(struct pattern_head *head, unsigned int refflags, { struct pat_ref *ref; struct pattern_expr *expr; - struct ebpt_node *node; + struct ebmb_node *node; struct pat_ref_elt *elt; int reuse = 0; @@ -2469,10 +2463,10 @@ int pattern_read_from_file(struct pattern_head *head, unsigned int refflags, return 1; /* Load reference content in the pattern expression. */ - node = ebpt_first(&ref->ebpt_root); + node = ebmb_first(&ref->ebmb_root); while (node) { - elt = ebpt_entry(node, struct pat_ref_elt, node); - node = ebpt_next(node); + elt = ebmb_entry(node, struct pat_ref_elt, node); + node = ebmb_next(node); if (!pat_ref_push(elt, expr, patflags, err)) { if (elt->line > 0) memprintf(err, "%s at line %d of file '%s'",