]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/strbuf.c
1b08c9c2d35d4a65b0ffff64ea65d4154ba5877d
[thirdparty/systemd.git] / src / basic / strbuf.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2012 Kay Sievers <kay@vrfy.org>
6 ***/
7
8 #include <errno.h>
9 #include <stdlib.h>
10 #include <string.h>
11
12 #include "alloc-util.h"
13 #include "strbuf.h"
14 #include "util.h"
15
16 /*
17 * Strbuf stores given strings in a single continuous allocated memory
18 * area. Identical strings are de-duplicated and return the same offset
19 * as the first string stored. If the tail of a string already exists
20 * in the buffer, the tail is returned.
21 *
22 * A trie (http://en.wikipedia.org/wiki/Trie) is used to maintain the
23 * information about the stored strings.
24 *
25 * Example of udev rules:
26 * $ ./udevadm test .
27 * ...
28 * read rules file: /usr/lib/udev/rules.d/99-systemd.rules
29 * rules contain 196608 bytes tokens (16384 * 12 bytes), 39742 bytes strings
30 * 23939 strings (207859 bytes), 20404 de-duplicated (171653 bytes), 3536 trie nodes used
31 * ...
32 */
33
34 struct strbuf *strbuf_new(void) {
35 struct strbuf *str;
36
37 str = new(struct strbuf, 1);
38 if (!str)
39 return NULL;
40 *str = (struct strbuf) {
41 .buf = new0(char, 1),
42 .root = new0(struct strbuf_node, 1),
43 .len = 1,
44 .nodes_count = 1,
45 };
46 if (!str->buf || !str->root) {
47 free(str->buf);
48 free(str->root);
49 return mfree(str);
50 }
51
52 return str;
53 }
54
55 static struct strbuf_node* strbuf_node_cleanup(struct strbuf_node *node) {
56 size_t i;
57
58 for (i = 0; i < node->children_count; i++)
59 strbuf_node_cleanup(node->children[i].child);
60 free(node->children);
61 return mfree(node);
62 }
63
64 /* clean up trie data, leave only the string buffer */
65 void strbuf_complete(struct strbuf *str) {
66 if (!str)
67 return;
68 if (str->root)
69 str->root = strbuf_node_cleanup(str->root);
70 }
71
72 /* clean up everything */
73 void strbuf_cleanup(struct strbuf *str) {
74 strbuf_complete(str);
75 free(str->buf);
76 free(str);
77 }
78
79 static int strbuf_children_cmp(const struct strbuf_child_entry *n1,
80 const struct strbuf_child_entry *n2) {
81 return n1->c - n2->c;
82 }
83
84 static void bubbleinsert(struct strbuf_node *node,
85 uint8_t c,
86 struct strbuf_node *node_child) {
87
88 struct strbuf_child_entry new = {
89 .c = c,
90 .child = node_child,
91 };
92 int left = 0, right = node->children_count;
93
94 while (right > left) {
95 int middle = (right + left) / 2 ;
96 if (strbuf_children_cmp(&node->children[middle], &new) <= 0)
97 left = middle + 1;
98 else
99 right = middle;
100 }
101
102 memmove(node->children + left + 1, node->children + left,
103 sizeof(struct strbuf_child_entry) * (node->children_count - left));
104 node->children[left] = new;
105
106 node->children_count++;
107 }
108
109 /* add string, return the index/offset into the buffer */
110 ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) {
111 uint8_t c;
112 struct strbuf_node *node;
113 size_t depth;
114 char *buf_new;
115 struct strbuf_child_entry *child;
116 struct strbuf_node *node_child;
117 ssize_t off;
118
119 if (!str->root)
120 return -EINVAL;
121
122 /* search string; start from last character to find possibly matching tails */
123
124 str->in_count++;
125 if (len == 0) {
126 str->dedup_count++;
127 return 0;
128 }
129 str->in_len += len;
130
131 node = str->root;
132 for (depth = 0; depth <= len; depth++) {
133 struct strbuf_child_entry search;
134
135 /* match against current node */
136 off = node->value_off + node->value_len - len;
137 if (depth == len || (node->value_len >= len && memcmp(str->buf + off, s, len) == 0)) {
138 str->dedup_len += len;
139 str->dedup_count++;
140 return off;
141 }
142
143 c = s[len - 1 - depth];
144
145 /* lookup child node */
146 search.c = c;
147 child = bsearch_safe(&search, node->children, node->children_count,
148 sizeof(struct strbuf_child_entry),
149 (__compar_fn_t) strbuf_children_cmp);
150 if (!child)
151 break;
152 node = child->child;
153 }
154
155 /* add new string */
156 buf_new = realloc(str->buf, str->len + len+1);
157 if (!buf_new)
158 return -ENOMEM;
159 str->buf = buf_new;
160 off = str->len;
161 memcpy(str->buf + off, s, len);
162 str->len += len;
163 str->buf[str->len++] = '\0';
164
165 /* new node */
166 node_child = new(struct strbuf_node, 1);
167 if (!node_child)
168 return -ENOMEM;
169 *node_child = (struct strbuf_node) {
170 .value_off = off,
171 .value_len = len,
172 };
173
174 /* extend array, add new entry, sort for bisection */
175 child = reallocarray(node->children, node->children_count + 1, sizeof(struct strbuf_child_entry));
176 if (!child) {
177 free(node_child);
178 return -ENOMEM;
179 }
180
181 str->nodes_count++;
182
183 node->children = child;
184 bubbleinsert(node, c, node_child);
185
186 return off;
187 }