]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/strbuf.c
Merge pull request #2727 from ian-kelling/man-pr-v3
[thirdparty/systemd.git] / src / basic / strbuf.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2012 Kay Sievers <kay@vrfy.org>
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #include <errno.h>
21 #include <stdlib.h>
22 #include <string.h>
23
24 #include "alloc-util.h"
25 #include "strbuf.h"
26
27 /*
28 * Strbuf stores given strings in a single continuous allocated memory
29 * area. Identical strings are de-duplicated and return the same offset
30 * as the first string stored. If the tail of a string already exists
31 * in the buffer, the tail is returned.
32 *
33 * A trie (http://en.wikipedia.org/wiki/Trie) is used to maintain the
34 * information about the stored strings.
35 *
36 * Example of udev rules:
37 * $ ./udevadm test .
38 * ...
39 * read rules file: /usr/lib/udev/rules.d/99-systemd.rules
40 * rules contain 196608 bytes tokens (16384 * 12 bytes), 39742 bytes strings
41 * 23939 strings (207859 bytes), 20404 de-duplicated (171653 bytes), 3536 trie nodes used
42 * ...
43 */
44
45 struct strbuf *strbuf_new(void) {
46 struct strbuf *str;
47
48 str = new0(struct strbuf, 1);
49 if (!str)
50 return NULL;
51
52 str->buf = new0(char, 1);
53 if (!str->buf)
54 goto err;
55 str->len = 1;
56
57 str->root = new0(struct strbuf_node, 1);
58 if (!str->root)
59 goto err;
60 str->nodes_count = 1;
61 return str;
62 err:
63 free(str->buf);
64 free(str->root);
65 free(str);
66 return NULL;
67 }
68
69 static void strbuf_node_cleanup(struct strbuf_node *node) {
70 size_t i;
71
72 for (i = 0; i < node->children_count; i++)
73 strbuf_node_cleanup(node->children[i].child);
74 free(node->children);
75 free(node);
76 }
77
78 /* clean up trie data, leave only the string buffer */
79 void strbuf_complete(struct strbuf *str) {
80 if (!str)
81 return;
82 if (str->root)
83 strbuf_node_cleanup(str->root);
84 str->root = NULL;
85 }
86
87 /* clean up everything */
88 void strbuf_cleanup(struct strbuf *str) {
89 if (!str)
90 return;
91 if (str->root)
92 strbuf_node_cleanup(str->root);
93 free(str->buf);
94 free(str);
95 }
96
97 static int strbuf_children_cmp(const struct strbuf_child_entry *n1,
98 const struct strbuf_child_entry *n2) {
99 return n1->c - n2->c;
100 }
101
102 static void bubbleinsert(struct strbuf_node *node,
103 uint8_t c,
104 struct strbuf_node *node_child) {
105
106 struct strbuf_child_entry new = {
107 .c = c,
108 .child = node_child,
109 };
110 int left = 0, right = node->children_count;
111
112 while (right > left) {
113 int middle = (right + left) / 2 ;
114 if (strbuf_children_cmp(&node->children[middle], &new) <= 0)
115 left = middle + 1;
116 else
117 right = middle;
118 }
119
120 memmove(node->children + left + 1, node->children + left,
121 sizeof(struct strbuf_child_entry) * (node->children_count - left));
122 node->children[left] = new;
123
124 node->children_count++;
125 }
126
127 /* add string, return the index/offset into the buffer */
128 ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) {
129 uint8_t c;
130 struct strbuf_node *node;
131 size_t depth;
132 char *buf_new;
133 struct strbuf_child_entry *child;
134 struct strbuf_node *node_child;
135 ssize_t off;
136
137 if (!str->root)
138 return -EINVAL;
139
140 /* search string; start from last character to find possibly matching tails */
141 if (len == 0)
142 return 0;
143 str->in_count++;
144 str->in_len += len;
145
146 node = str->root;
147 c = s[len-1];
148 for (depth = 0; depth <= len; depth++) {
149 struct strbuf_child_entry search;
150
151 /* match against current node */
152 off = node->value_off + node->value_len - len;
153 if (depth == len || (node->value_len >= len && memcmp(str->buf + off, s, len) == 0)) {
154 str->dedup_len += len;
155 str->dedup_count++;
156 return off;
157 }
158
159 /* bsearch is not allowed on a NULL sequence */
160 if (node->children_count == 0)
161 break;
162
163 /* lookup child node */
164 c = s[len - 1 - depth];
165 search.c = c;
166 child = bsearch(&search, node->children, node->children_count,
167 sizeof(struct strbuf_child_entry),
168 (__compar_fn_t) strbuf_children_cmp);
169 if (!child)
170 break;
171 node = child->child;
172 }
173
174 /* add new string */
175 buf_new = realloc(str->buf, str->len + len+1);
176 if (!buf_new)
177 return -ENOMEM;
178 str->buf = buf_new;
179 off = str->len;
180 memcpy(str->buf + off, s, len);
181 str->len += len;
182 str->buf[str->len++] = '\0';
183
184 /* new node */
185 node_child = new0(struct strbuf_node, 1);
186 if (!node_child)
187 return -ENOMEM;
188 node_child->value_off = off;
189 node_child->value_len = len;
190
191 /* extend array, add new entry, sort for bisection */
192 child = realloc(node->children, (node->children_count + 1) * sizeof(struct strbuf_child_entry));
193 if (!child) {
194 free(node_child);
195 return -ENOMEM;
196 }
197
198 str->nodes_count++;
199
200 node->children = child;
201 bubbleinsert(node, c, node_child);
202
203 return off;
204 }