]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/strbuf.c
c2342e67a22bdc4dd430c5d36e8958fc49cbddc6
[thirdparty/systemd.git] / src / basic / strbuf.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <string.h>
4
5 #include "alloc-util.h"
6 #include "sort-util.h"
7 #include "strbuf.h"
8
9 /*
10 * Strbuf stores given strings in a single continuous allocated memory
11 * area. Identical strings are de-duplicated and return the same offset
12 * as the first string stored. If the tail of a string already exists
13 * in the buffer, the tail is returned.
14 *
15 * A trie (http://en.wikipedia.org/wiki/Trie) is used to maintain the
16 * information about the stored strings.
17 *
18 * Example of udev rules:
19 * $ ./udevadm test .
20 * ...
21 * read rules file: /usr/lib/udev/rules.d/99-systemd.rules
22 * rules contain 196608 bytes tokens (16384 * 12 bytes), 39742 bytes strings
23 * 23939 strings (207859 bytes), 20404 de-duplicated (171653 bytes), 3536 trie nodes used
24 * ...
25 */
26
27 struct strbuf* strbuf_new(void) {
28 _cleanup_(strbuf_freep) struct strbuf *str = NULL;
29
30 str = new(struct strbuf, 1);
31 if (!str)
32 return NULL;
33
34 *str = (struct strbuf) {
35 .buf = new0(char, 1),
36 .root = new0(struct strbuf_node, 1),
37 .len = 1,
38 .nodes_count = 1,
39 };
40 if (!str->buf || !str->root)
41 return NULL;
42
43 return TAKE_PTR(str);
44 }
45
46 static struct strbuf_node* strbuf_node_cleanup(struct strbuf_node *node) {
47 assert(node);
48
49 FOREACH_ARRAY(child, node->children, node->children_count)
50 strbuf_node_cleanup(child->child);
51
52 free(node->children);
53 return mfree(node);
54 }
55
56 /* clean up trie data, leave only the string buffer */
57 void strbuf_complete(struct strbuf *str) {
58 if (!str || !str->root)
59 return;
60
61 str->root = strbuf_node_cleanup(str->root);
62 }
63
64 /* clean up everything */
65 struct strbuf* strbuf_free(struct strbuf *str) {
66 if (!str)
67 return NULL;
68
69 strbuf_complete(str);
70 free(str->buf);
71 return mfree(str);
72 }
73
74 static int strbuf_children_cmp(const struct strbuf_child_entry *n1, const struct strbuf_child_entry *n2) {
75 assert(n1);
76 assert(n2);
77
78 return CMP(n1->c, n2->c);
79 }
80
81 static void bubbleinsert(struct strbuf_node *node,
82 uint8_t c,
83 struct strbuf_node *node_child) {
84
85 struct strbuf_child_entry new = {
86 .c = c,
87 .child = node_child,
88 };
89 int left = 0, right = node->children_count;
90
91 while (right > left) {
92 int middle = (right + left) / 2 ;
93 if (strbuf_children_cmp(&node->children[middle], &new) <= 0)
94 left = middle + 1;
95 else
96 right = middle;
97 }
98
99 memmove(node->children + left + 1, node->children + left,
100 sizeof(struct strbuf_child_entry) * (node->children_count - left));
101 node->children[left] = new;
102
103 node->children_count++;
104 }
105
106 /* add string, return the index/offset into the buffer */
107 ssize_t strbuf_add_string_full(struct strbuf *str, const char *s, size_t len) {
108 uint8_t c;
109 ssize_t off;
110
111 assert(str);
112 assert(s || len == 0);
113
114 if (len == SIZE_MAX)
115 len = strlen(s);
116
117 if (!str->root)
118 return -EINVAL;
119
120 /* search string; start from last character to find possibly matching tails */
121
122 str->in_count++;
123 if (len == 0) {
124 str->dedup_count++;
125 return 0;
126 }
127 str->in_len += len;
128
129 struct strbuf_node *node = str->root;
130 for (size_t depth = 0; depth <= len; depth++) {
131 /* match against current node */
132 off = node->value_off + node->value_len - len;
133 if (depth == len || (node->value_len >= len && memcmp(str->buf + off, s, len) == 0)) {
134 str->dedup_len += len;
135 str->dedup_count++;
136 return off;
137 }
138
139 c = s[len - 1 - depth];
140
141 /* lookup child node */
142 struct strbuf_child_entry *child, search = { .c = c };
143 child = typesafe_bsearch(&search, node->children, node->children_count, strbuf_children_cmp);
144 if (!child)
145 break;
146 node = child->child;
147 }
148
149 /* add new string */
150 if (!GREEDY_REALLOC(str->buf, str->len + len + 1))
151 return -ENOMEM;
152 off = str->len;
153 memcpy(str->buf + off, s, len);
154 str->len += len;
155 str->buf[str->len++] = '\0';
156
157 /* new node */
158 _cleanup_free_ struct strbuf_node *node_child = NULL;
159
160 node_child = new(struct strbuf_node, 1);
161 if (!node_child)
162 return -ENOMEM;
163 *node_child = (struct strbuf_node) {
164 .value_off = off,
165 .value_len = len,
166 };
167
168 /* extend array, add new entry, sort for bisection */
169 if (!GREEDY_REALLOC(node->children, node->children_count + 1))
170 return -ENOMEM;
171
172 str->nodes_count++;
173
174 bubbleinsert(node, c, TAKE_PTR(node_child));
175
176 return off;
177 }