]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/strbuf.c
8b8281bb3b31d55927ce2619256ef26952101359
[thirdparty/systemd.git] / src / basic / strbuf.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2012 Kay Sievers <kay@vrfy.org>
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <errno.h>
22 #include <stdlib.h>
23 #include <string.h>
24
25 #include "alloc-util.h"
26 #include "strbuf.h"
27 #include "util.h"
28
29 /*
30 * Strbuf stores given strings in a single continuous allocated memory
31 * area. Identical strings are de-duplicated and return the same offset
32 * as the first string stored. If the tail of a string already exists
33 * in the buffer, the tail is returned.
34 *
35 * A trie (http://en.wikipedia.org/wiki/Trie) is used to maintain the
36 * information about the stored strings.
37 *
38 * Example of udev rules:
39 * $ ./udevadm test .
40 * ...
41 * read rules file: /usr/lib/udev/rules.d/99-systemd.rules
42 * rules contain 196608 bytes tokens (16384 * 12 bytes), 39742 bytes strings
43 * 23939 strings (207859 bytes), 20404 de-duplicated (171653 bytes), 3536 trie nodes used
44 * ...
45 */
46
47 struct strbuf *strbuf_new(void) {
48 struct strbuf *str;
49
50 str = new(struct strbuf, 1);
51 if (!str)
52 return NULL;
53 *str = (struct strbuf) {
54 .buf = new0(char, 1),
55 .root = new0(struct strbuf_node, 1),
56 .len = 1,
57 .nodes_count = 1,
58 };
59 if (!str->buf || !str->root) {
60 free(str->buf);
61 free(str->root);
62 return mfree(str);
63 }
64
65 return str;
66 }
67
68 static struct strbuf_node* strbuf_node_cleanup(struct strbuf_node *node) {
69 size_t i;
70
71 for (i = 0; i < node->children_count; i++)
72 strbuf_node_cleanup(node->children[i].child);
73 free(node->children);
74 return mfree(node);
75 }
76
77 /* clean up trie data, leave only the string buffer */
78 void strbuf_complete(struct strbuf *str) {
79 if (!str)
80 return;
81 if (str->root)
82 str->root = strbuf_node_cleanup(str->root);
83 }
84
85 /* clean up everything */
86 void strbuf_cleanup(struct strbuf *str) {
87 strbuf_complete(str);
88 free(str->buf);
89 free(str);
90 }
91
92 static int strbuf_children_cmp(const struct strbuf_child_entry *n1,
93 const struct strbuf_child_entry *n2) {
94 return n1->c - n2->c;
95 }
96
97 static void bubbleinsert(struct strbuf_node *node,
98 uint8_t c,
99 struct strbuf_node *node_child) {
100
101 struct strbuf_child_entry new = {
102 .c = c,
103 .child = node_child,
104 };
105 int left = 0, right = node->children_count;
106
107 while (right > left) {
108 int middle = (right + left) / 2 ;
109 if (strbuf_children_cmp(&node->children[middle], &new) <= 0)
110 left = middle + 1;
111 else
112 right = middle;
113 }
114
115 memmove(node->children + left + 1, node->children + left,
116 sizeof(struct strbuf_child_entry) * (node->children_count - left));
117 node->children[left] = new;
118
119 node->children_count++;
120 }
121
122 /* add string, return the index/offset into the buffer */
123 ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) {
124 uint8_t c;
125 struct strbuf_node *node;
126 size_t depth;
127 char *buf_new;
128 struct strbuf_child_entry *child;
129 struct strbuf_node *node_child;
130 ssize_t off;
131
132 if (!str->root)
133 return -EINVAL;
134
135 /* search string; start from last character to find possibly matching tails */
136
137 str->in_count++;
138 if (len == 0) {
139 str->dedup_count++;
140 return 0;
141 }
142 str->in_len += len;
143
144 node = str->root;
145 for (depth = 0; depth <= len; depth++) {
146 struct strbuf_child_entry search;
147
148 /* match against current node */
149 off = node->value_off + node->value_len - len;
150 if (depth == len || (node->value_len >= len && memcmp(str->buf + off, s, len) == 0)) {
151 str->dedup_len += len;
152 str->dedup_count++;
153 return off;
154 }
155
156 c = s[len - 1 - depth];
157
158 /* lookup child node */
159 search.c = c;
160 child = bsearch_safe(&search, node->children, node->children_count,
161 sizeof(struct strbuf_child_entry),
162 (__compar_fn_t) strbuf_children_cmp);
163 if (!child)
164 break;
165 node = child->child;
166 }
167
168 /* add new string */
169 buf_new = realloc(str->buf, str->len + len+1);
170 if (!buf_new)
171 return -ENOMEM;
172 str->buf = buf_new;
173 off = str->len;
174 memcpy(str->buf + off, s, len);
175 str->len += len;
176 str->buf[str->len++] = '\0';
177
178 /* new node */
179 node_child = new(struct strbuf_node, 1);
180 if (!node_child)
181 return -ENOMEM;
182 *node_child = (struct strbuf_node) {
183 .value_off = off,
184 .value_len = len,
185 };
186
187 /* extend array, add new entry, sort for bisection */
188 child = reallocarray(node->children, node->children_count + 1, sizeof(struct strbuf_child_entry));
189 if (!child) {
190 free(node_child);
191 return -ENOMEM;
192 }
193
194 str->nodes_count++;
195
196 node->children = child;
197 bubbleinsert(node, c, node_child);
198
199 return off;
200 }