]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/strbuf.c
Merge pull request #1691 from poettering/util-lib-3
[thirdparty/systemd.git] / src / basic / strbuf.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2012 Kay Sievers <kay@vrfy.org>
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <stdlib.h>
23 #include <string.h>
24
25 #include "alloc-util.h"
26 #include "strbuf.h"
27 #include "util.h"
28
29 /*
30 * Strbuf stores given strings in a single continuous allocated memory
31 * area. Identical strings are de-duplicated and return the same offset
32 * as the first string stored. If the tail of a string already exists
33 * in the buffer, the tail is returned.
34 *
35 * A trie (http://en.wikipedia.org/wiki/Trie) is used to maintain the
36 * information about the stored strings.
37 *
38 * Example of udev rules:
39 * $ ./udevadm test .
40 * ...
41 * read rules file: /usr/lib/udev/rules.d/99-systemd.rules
42 * rules contain 196608 bytes tokens (16384 * 12 bytes), 39742 bytes strings
43 * 23939 strings (207859 bytes), 20404 de-duplicated (171653 bytes), 3536 trie nodes used
44 * ...
45 */
46
47 struct strbuf *strbuf_new(void) {
48 struct strbuf *str;
49
50 str = new0(struct strbuf, 1);
51 if (!str)
52 return NULL;
53
54 str->buf = new0(char, 1);
55 if (!str->buf)
56 goto err;
57 str->len = 1;
58
59 str->root = new0(struct strbuf_node, 1);
60 if (!str->root)
61 goto err;
62 str->nodes_count = 1;
63 return str;
64 err:
65 free(str->buf);
66 free(str->root);
67 free(str);
68 return NULL;
69 }
70
71 static void strbuf_node_cleanup(struct strbuf_node *node) {
72 size_t i;
73
74 for (i = 0; i < node->children_count; i++)
75 strbuf_node_cleanup(node->children[i].child);
76 free(node->children);
77 free(node);
78 }
79
80 /* clean up trie data, leave only the string buffer */
81 void strbuf_complete(struct strbuf *str) {
82 if (!str)
83 return;
84 if (str->root)
85 strbuf_node_cleanup(str->root);
86 str->root = NULL;
87 }
88
89 /* clean up everything */
90 void strbuf_cleanup(struct strbuf *str) {
91 if (!str)
92 return;
93 if (str->root)
94 strbuf_node_cleanup(str->root);
95 free(str->buf);
96 free(str);
97 }
98
99 static int strbuf_children_cmp(const struct strbuf_child_entry *n1,
100 const struct strbuf_child_entry *n2) {
101 return n1->c - n2->c;
102 }
103
104 static void bubbleinsert(struct strbuf_node *node,
105 uint8_t c,
106 struct strbuf_node *node_child) {
107
108 struct strbuf_child_entry new = {
109 .c = c,
110 .child = node_child,
111 };
112 int left = 0, right = node->children_count;
113
114 while (right > left) {
115 int middle = (right + left) / 2 ;
116 if (strbuf_children_cmp(&node->children[middle], &new) <= 0)
117 left = middle + 1;
118 else
119 right = middle;
120 }
121
122 memmove(node->children + left + 1, node->children + left,
123 sizeof(struct strbuf_child_entry) * (node->children_count - left));
124 node->children[left] = new;
125
126 node->children_count ++;
127 }
128
129 /* add string, return the index/offset into the buffer */
130 ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) {
131 uint8_t c;
132 struct strbuf_node *node;
133 size_t depth;
134 char *buf_new;
135 struct strbuf_child_entry *child;
136 struct strbuf_node *node_child;
137 ssize_t off;
138
139 if (!str->root)
140 return -EINVAL;
141
142 /* search string; start from last character to find possibly matching tails */
143 if (len == 0)
144 return 0;
145 str->in_count++;
146 str->in_len += len;
147
148 node = str->root;
149 c = s[len-1];
150 for (depth = 0; depth <= len; depth++) {
151 struct strbuf_child_entry search;
152
153 /* match against current node */
154 off = node->value_off + node->value_len - len;
155 if (depth == len || (node->value_len >= len && memcmp(str->buf + off, s, len) == 0)) {
156 str->dedup_len += len;
157 str->dedup_count++;
158 return off;
159 }
160
161 /* lookup child node */
162 c = s[len - 1 - depth];
163 search.c = c;
164 child = bsearch(&search, node->children, node->children_count,
165 sizeof(struct strbuf_child_entry),
166 (__compar_fn_t) strbuf_children_cmp);
167 if (!child)
168 break;
169 node = child->child;
170 }
171
172 /* add new string */
173 buf_new = realloc(str->buf, str->len + len+1);
174 if (!buf_new)
175 return -ENOMEM;
176 str->buf = buf_new;
177 off = str->len;
178 memcpy(str->buf + off, s, len);
179 str->len += len;
180 str->buf[str->len++] = '\0';
181
182 /* new node */
183 node_child = new0(struct strbuf_node, 1);
184 if (!node_child)
185 return -ENOMEM;
186 node_child->value_off = off;
187 node_child->value_len = len;
188
189 /* extend array, add new entry, sort for bisection */
190 child = realloc(node->children, (node->children_count + 1) * sizeof(struct strbuf_child_entry));
191 if (!child) {
192 free(node_child);
193 return -ENOMEM;
194 }
195
196 str->nodes_count++;
197
198 node->children = child;
199 bubbleinsert(node, c, node_child);
200
201 return off;
202 }