]>
Commit | Line | Data |
---|---|---|
1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ | |
2 | ||
3 | #include <string.h> | |
4 | ||
5 | #include "alloc-util.h" | |
6 | #include "sort-util.h" | |
7 | #include "strbuf.h" | |
8 | ||
9 | /* | |
10 | * Strbuf stores given strings in a single continuous allocated memory | |
11 | * area. Identical strings are de-duplicated and return the same offset | |
12 | * as the first string stored. If the tail of a string already exists | |
13 | * in the buffer, the tail is returned. | |
14 | * | |
15 | * A trie (http://en.wikipedia.org/wiki/Trie) is used to maintain the | |
16 | * information about the stored strings. | |
17 | * | |
18 | * Example of udev rules: | |
19 | * $ ./udevadm test . | |
20 | * ... | |
21 | * read rules file: /usr/lib/udev/rules.d/99-systemd.rules | |
22 | * rules contain 196608 bytes tokens (16384 * 12 bytes), 39742 bytes strings | |
23 | * 23939 strings (207859 bytes), 20404 de-duplicated (171653 bytes), 3536 trie nodes used | |
24 | * ... | |
25 | */ | |
26 | ||
27 | struct strbuf* strbuf_new(void) { | |
28 | _cleanup_(strbuf_freep) struct strbuf *str = NULL; | |
29 | ||
30 | str = new(struct strbuf, 1); | |
31 | if (!str) | |
32 | return NULL; | |
33 | ||
34 | *str = (struct strbuf) { | |
35 | .buf = new0(char, 1), | |
36 | .root = new0(struct strbuf_node, 1), | |
37 | .len = 1, | |
38 | .nodes_count = 1, | |
39 | }; | |
40 | if (!str->buf || !str->root) | |
41 | return NULL; | |
42 | ||
43 | return TAKE_PTR(str); | |
44 | } | |
45 | ||
46 | static struct strbuf_node* strbuf_node_cleanup(struct strbuf_node *node) { | |
47 | assert(node); | |
48 | ||
49 | FOREACH_ARRAY(child, node->children, node->children_count) | |
50 | strbuf_node_cleanup(child->child); | |
51 | ||
52 | free(node->children); | |
53 | return mfree(node); | |
54 | } | |
55 | ||
56 | /* clean up trie data, leave only the string buffer */ | |
57 | void strbuf_complete(struct strbuf *str) { | |
58 | if (!str || !str->root) | |
59 | return; | |
60 | ||
61 | str->root = strbuf_node_cleanup(str->root); | |
62 | } | |
63 | ||
64 | /* clean up everything */ | |
65 | struct strbuf* strbuf_free(struct strbuf *str) { | |
66 | if (!str) | |
67 | return NULL; | |
68 | ||
69 | strbuf_complete(str); | |
70 | free(str->buf); | |
71 | return mfree(str); | |
72 | } | |
73 | ||
74 | static int strbuf_children_cmp(const struct strbuf_child_entry *n1, const struct strbuf_child_entry *n2) { | |
75 | assert(n1); | |
76 | assert(n2); | |
77 | ||
78 | return CMP(n1->c, n2->c); | |
79 | } | |
80 | ||
81 | static void bubbleinsert(struct strbuf_node *node, | |
82 | uint8_t c, | |
83 | struct strbuf_node *node_child) { | |
84 | ||
85 | struct strbuf_child_entry new = { | |
86 | .c = c, | |
87 | .child = node_child, | |
88 | }; | |
89 | int left = 0, right = node->children_count; | |
90 | ||
91 | while (right > left) { | |
92 | int middle = (right + left) / 2 ; | |
93 | if (strbuf_children_cmp(&node->children[middle], &new) <= 0) | |
94 | left = middle + 1; | |
95 | else | |
96 | right = middle; | |
97 | } | |
98 | ||
99 | memmove(node->children + left + 1, node->children + left, | |
100 | sizeof(struct strbuf_child_entry) * (node->children_count - left)); | |
101 | node->children[left] = new; | |
102 | ||
103 | node->children_count++; | |
104 | } | |
105 | ||
106 | /* add string, return the index/offset into the buffer */ | |
107 | ssize_t strbuf_add_string_full(struct strbuf *str, const char *s, size_t len) { | |
108 | uint8_t c; | |
109 | ssize_t off; | |
110 | ||
111 | assert(str); | |
112 | assert(s || len == 0); | |
113 | ||
114 | if (len == SIZE_MAX) | |
115 | len = strlen(s); | |
116 | ||
117 | if (!str->root) | |
118 | return -EINVAL; | |
119 | ||
120 | /* search string; start from last character to find possibly matching tails */ | |
121 | ||
122 | str->in_count++; | |
123 | if (len == 0) { | |
124 | str->dedup_count++; | |
125 | return 0; | |
126 | } | |
127 | str->in_len += len; | |
128 | ||
129 | struct strbuf_node *node = str->root; | |
130 | for (size_t depth = 0; depth <= len; depth++) { | |
131 | /* match against current node */ | |
132 | off = node->value_off + node->value_len - len; | |
133 | if (depth == len || (node->value_len >= len && memcmp(str->buf + off, s, len) == 0)) { | |
134 | str->dedup_len += len; | |
135 | str->dedup_count++; | |
136 | return off; | |
137 | } | |
138 | ||
139 | c = s[len - 1 - depth]; | |
140 | ||
141 | /* lookup child node */ | |
142 | struct strbuf_child_entry *child, search = { .c = c }; | |
143 | child = typesafe_bsearch(&search, node->children, node->children_count, strbuf_children_cmp); | |
144 | if (!child) | |
145 | break; | |
146 | node = child->child; | |
147 | } | |
148 | ||
149 | /* add new string */ | |
150 | if (!GREEDY_REALLOC(str->buf, str->len + len + 1)) | |
151 | return -ENOMEM; | |
152 | off = str->len; | |
153 | memcpy(str->buf + off, s, len); | |
154 | str->len += len; | |
155 | str->buf[str->len++] = '\0'; | |
156 | ||
157 | /* new node */ | |
158 | _cleanup_free_ struct strbuf_node *node_child = NULL; | |
159 | ||
160 | node_child = new(struct strbuf_node, 1); | |
161 | if (!node_child) | |
162 | return -ENOMEM; | |
163 | *node_child = (struct strbuf_node) { | |
164 | .value_off = off, | |
165 | .value_len = len, | |
166 | }; | |
167 | ||
168 | /* extend array, add new entry, sort for bisection */ | |
169 | if (!GREEDY_REALLOC(node->children, node->children_count + 1)) | |
170 | return -ENOMEM; | |
171 | ||
172 | str->nodes_count++; | |
173 | ||
174 | bubbleinsert(node, c, TAKE_PTR(node_child)); | |
175 | ||
176 | return off; | |
177 | } |