]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/strbuf.c
basic/strbuf: include empty strings in count
[thirdparty/systemd.git] / src / basic / strbuf.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
955bd501
KS
2/***
3 This file is part of systemd.
4
1298001e 5 Copyright 2012 Kay Sievers <kay@vrfy.org>
955bd501
KS
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19***/
20
11c3a366 21#include <errno.h>
955bd501
KS
22#include <stdlib.h>
23#include <string.h>
24
b5efdb8a 25#include "alloc-util.h"
955bd501 26#include "strbuf.h"
d6c5d19b 27#include "util.h"
955bd501 28
4693cfb3 29/*
ab06eef8 30 * Strbuf stores given strings in a single continuous allocated memory
f1c0ece1
KS
31 * area. Identical strings are de-duplicated and return the same offset
32 * as the first string stored. If the tail of a string already exists
33 * in the buffer, the tail is returned.
4693cfb3 34 *
f1c0ece1
KS
35 * A trie (http://en.wikipedia.org/wiki/Trie) is used to maintain the
36 * information about the stored strings.
4693cfb3
KS
37 *
38 * Example of udev rules:
39 * $ ./udevadm test .
40 * ...
41 * read rules file: /usr/lib/udev/rules.d/99-systemd.rules
42 * rules contain 196608 bytes tokens (16384 * 12 bytes), 39742 bytes strings
43 * 23939 strings (207859 bytes), 20404 de-duplicated (171653 bytes), 3536 trie nodes used
44 * ...
45 */
46
955bd501
KS
47struct strbuf *strbuf_new(void) {
48 struct strbuf *str;
49
2fb076ad 50 str = new(struct strbuf, 1);
955bd501
KS
51 if (!str)
52 return NULL;
2fb076ad
ZJS
53 *str = (struct strbuf) {
54 .buf = new0(char, 1),
55 .root = new0(struct strbuf_node, 1),
56 .len = 1,
57 .nodes_count = 1,
58 };
59 if (!str->buf || !str->root) {
60 free(str->buf);
61 free(str->root);
62 return mfree(str);
63 }
955bd501 64
955bd501 65 return str;
955bd501
KS
66}
67
2fb076ad 68static struct strbuf_node* strbuf_node_cleanup(struct strbuf_node *node) {
955bd501
KS
69 size_t i;
70
71 for (i = 0; i < node->children_count; i++)
72 strbuf_node_cleanup(node->children[i].child);
73 free(node->children);
2fb076ad 74 return mfree(node);
955bd501
KS
75}
76
4693cfb3 77/* clean up trie data, leave only the string buffer */
955bd501
KS
78void strbuf_complete(struct strbuf *str) {
79 if (!str)
80 return;
81 if (str->root)
2fb076ad 82 str->root = strbuf_node_cleanup(str->root);
955bd501
KS
83}
84
4693cfb3 85/* clean up everything */
955bd501 86void strbuf_cleanup(struct strbuf *str) {
2fb076ad 87 strbuf_complete(str);
955bd501
KS
88 free(str->buf);
89 free(str);
90}
91
3c8bed4e
ZJS
92static int strbuf_children_cmp(const struct strbuf_child_entry *n1,
93 const struct strbuf_child_entry *n2) {
955bd501
KS
94 return n1->c - n2->c;
95}
96
3c8bed4e
ZJS
97static void bubbleinsert(struct strbuf_node *node,
98 uint8_t c,
99 struct strbuf_node *node_child) {
100
101 struct strbuf_child_entry new = {
102 .c = c,
103 .child = node_child,
104 };
105 int left = 0, right = node->children_count;
106
107 while (right > left) {
108 int middle = (right + left) / 2 ;
109 if (strbuf_children_cmp(&node->children[middle], &new) <= 0)
110 left = middle + 1;
111 else
112 right = middle;
113 }
114
115 memmove(node->children + left + 1, node->children + left,
116 sizeof(struct strbuf_child_entry) * (node->children_count - left));
117 node->children[left] = new;
118
313cefa1 119 node->children_count++;
3c8bed4e
ZJS
120}
121
4693cfb3 122/* add string, return the index/offset into the buffer */
955bd501
KS
123ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) {
124 uint8_t c;
125 struct strbuf_node *node;
126 size_t depth;
127 char *buf_new;
128 struct strbuf_child_entry *child;
129 struct strbuf_node *node_child;
130 ssize_t off;
131
132 if (!str->root)
133 return -EINVAL;
134
135 /* search string; start from last character to find possibly matching tails */
435ce146 136
955bd501 137 str->in_count++;
435ce146
ZJS
138 if (len == 0) {
139 str->dedup_count++;
140 return 0;
141 }
955bd501
KS
142 str->in_len += len;
143
144 node = str->root;
955bd501
KS
145 for (depth = 0; depth <= len; depth++) {
146 struct strbuf_child_entry search;
147
148 /* match against current node */
149 off = node->value_off + node->value_len - len;
150 if (depth == len || (node->value_len >= len && memcmp(str->buf + off, s, len) == 0)) {
151 str->dedup_len += len;
152 str->dedup_count++;
153 return off;
154 }
155
c45606eb
LR
156 c = s[len - 1 - depth];
157
955bd501 158 /* lookup child node */
955bd501 159 search.c = c;
d6c5d19b
ZJS
160 child = bsearch_safe(&search, node->children, node->children_count,
161 sizeof(struct strbuf_child_entry),
162 (__compar_fn_t) strbuf_children_cmp);
955bd501
KS
163 if (!child)
164 break;
165 node = child->child;
166 }
167
168 /* add new string */
169 buf_new = realloc(str->buf, str->len + len+1);
170 if (!buf_new)
171 return -ENOMEM;
172 str->buf = buf_new;
173 off = str->len;
174 memcpy(str->buf + off, s, len);
175 str->len += len;
176 str->buf[str->len++] = '\0';
177
178 /* new node */
2fb076ad 179 node_child = new(struct strbuf_node, 1);
955bd501
KS
180 if (!node_child)
181 return -ENOMEM;
2fb076ad
ZJS
182 *node_child = (struct strbuf_node) {
183 .value_off = off,
184 .value_len = len,
185 };
955bd501
KS
186
187 /* extend array, add new entry, sort for bisection */
62d74c78 188 child = reallocarray(node->children, node->children_count + 1, sizeof(struct strbuf_child_entry));
a9c307e5
ZJS
189 if (!child) {
190 free(node_child);
955bd501 191 return -ENOMEM;
a9c307e5
ZJS
192 }
193
194 str->nodes_count++;
195
955bd501 196 node->children = child;
3c8bed4e 197 bubbleinsert(node, c, node_child);
955bd501
KS
198
199 return off;
200}